From: Karl Williamson Date: Sat, 21 May 2016 17:35:10 +0000 (-0600) Subject: perllocale: Document NUL collation handling X-Git-Tag: v5.25.2~147 X-Git-Url: https://perl5.git.perl.org/perl5.git/commitdiff_plain/4e615abd31bcd0bb8f321ae4687e5aef1a8aa391 perllocale: Document NUL collation handling And add a TODO test, because this shortly will be improved upon --- diff --git a/lib/locale.t b/lib/locale.t index 1e99b6e..ddb5d79 100644 --- a/lib/locale.t +++ b/lib/locale.t @@ -1735,6 +1735,20 @@ foreach my $Locale (@Locale) { last; } } + + use locale; + + ++$locales_test_number; + $test_names{$locales_test_number} + = 'TODO Skip in locales where \001 has primary sorting weight; ' + . 'otherwise verify that \0 doesn\'t have primary sorting weight'; + if ("a\001c" lt "ab") { + report_result($Locale, $locales_test_number, 1); + } + else { + my $ok = "ab" lt "a\0c"; + report_result($Locale, $locales_test_number, $ok); + } } my $ok1; diff --git a/pod/perllocale.pod b/pod/perllocale.pod index 018f916..0c7e769 100644 --- a/pod/perllocale.pod +++ b/pod/perllocale.pod @@ -1565,6 +1565,16 @@ called, and whatever it does is what you get. =head1 BUGS +=head2 Collation of strings containing embedded C characters + +Perl handles C characters in the middle of strings. In many +locales, control characters are ignored unless the strings otherwise +compare equal. Unlike other control characters, C characters are +never ignored. For example, if given that C<"b"> sorts after +C<"\001">, and C<"c"> sorts after C<"b">, C<"a\0c"> always sorts before +C<"ab">. This is true even in locales in which C<"ab"> sorts before +C<"a\001c">. + =head2 Broken systems In certain systems, the operating system's locale support