3 # This tests plain 'use locale' and adorned 'use locale ":not_characters"'
4 # Because these pragmas are compile time, and I (khw) am trying to test
5 # without using 'eval' as much as possible, which might cloud the issue, the
6 # crucial parts of the code are duplicated in a block for each pragma.
8 # To make a TODO test, add the string 'TODO' to its %test_names value
10 binmode STDOUT, ':utf8';
11 binmode STDERR, ':utf8';
17 require Config; import Config;
18 if (!$Config{d_setlocale} || $Config{ccflags} =~ /\bD?NO_LOCALE\b/) {
28 # =1 adds debugging output; =2 increases the verbosity somewhat
29 my $debug = $ENV{PERL_DEBUG_FULL_TEST} // 0;
31 # Certain tests have been shown to be problematical for a few locales. Don't
32 # fail them unless at least this percentage of the tested locales fail.
33 # Some Windows machines are defective in every locale but the C, calling \t
34 # printable; superscripts to be digits, etc. See
35 # http://markmail.org/message/5jwam4xsx4amsdnv. Also on AIX machines, many
36 # locales call a no-break space a graphic.
37 # (There aren't 1000 locales currently in existence, so 99.9 works)
38 my $acceptable_fold_failure_percentage = ($^O =~ / ^ ( MSWin32 | AIX ) $ /ix)
42 # The list of test numbers of the problematic tests.
43 my @problematical_tests;
48 my $dumper = Dumpvalue->new(
55 my($mess) = join "", @_;
57 print $dumper->stringify($mess,1), "\n";
61 return unless $debug > 1;
69 my $have_setlocale = 0;
72 import POSIX ':locale_h';
76 # Visual C's CRT goes silly on strings of the form "en_US.ISO8859-1"
77 # and mingw32 uses said silly CRT
78 # This doesn't seem to be an issue any more, at least on Windows XP,
79 # so re-enable the tests for Windows XP onwards.
80 my $winxp = ($^O eq 'MSWin32' && defined &Win32::GetOSVersion &&
81 join('.', (Win32::GetOSVersion())[1..2]) >= 5.1);
82 $have_setlocale = 0 if ((($^O eq 'MSWin32' && !$winxp) || $^O eq 'NetWare') &&
83 $Config{cc} =~ /^(cl|gcc)/i);
85 # UWIN seems to loop after taint tests, just skip for now
86 $have_setlocale = 0 if ($^O =~ /^uwin/);
93 my ($result, $message) = @_;
94 $message = "" unless defined $message;
96 print 'not ' unless ($result);
97 print "ok " . ++$test_num;
102 # First we'll do a lot of taint checking for locales.
103 # This is the easiest to test, actually, as any locale,
104 # even the default locale will taint under 'use locale'.
106 sub is_tainted { # hello, camel two.
107 no warnings 'uninitialized' ;
110 not eval { $dummy = join("", @_), kill 0; 1 }
113 sub check_taint ($;$) {
114 my $message_tail = $_[1] // "";
115 $message_tail = ": $message_tail" if $message_tail;
116 ok is_tainted($_[0]), "verify that is tainted$message_tail";
119 sub check_taint_not ($;$) {
120 my $message_tail = $_[1] // "";
121 $message_tail = ": $message_tail" if $message_tail;
122 ok((not is_tainted($_[0])), "verify that isn't tainted$message_tail");
125 "\tb\t" =~ /^m?(\s)(.*)\1$/;
126 check_taint_not $&, "not tainted outside 'use locale'";
129 use locale; # engage locale and therefore locale taint.
135 check_taint ucfirst($a);
141 check_taint lcfirst($a);
144 check_taint_not sprintf('%e', 123.456);
145 check_taint_not sprintf('%f', 123.456);
146 check_taint_not sprintf('%g', 123.456);
147 check_taint_not sprintf('%d', 123.456);
148 check_taint_not sprintf('%x', 123.456);
150 $_ = $a; # untaint $_
152 $_ = uc($a); # taint $_
156 /(\w)/; # taint $&, $`, $', $+, $1.
164 /(.)/; # untaint $&, $`, $', $+, $1.
172 /(\W)/; # taint $&, $`, $', $+, $1.
180 /(\s)/; # taint $&, $`, $', $+, $1.
188 /(\S)/; # taint $&, $`, $', $+, $1.
196 $_ = $a; # untaint $_
200 /(b)/; # this must not taint
208 $_ = $a; # untaint $_
212 $b = uc($a); # taint $b
213 s/(.+)/$b/; # this must taint only the $_
223 $_ = $a; # untaint $_
225 s/(.+)/b/; # this must not taint
234 $b = $a; # untaint $b
236 ($b = $a) =~ s/\w/$&/;
237 check_taint $b; # $b should be tainted.
238 check_taint_not $a; # $a should be not.
240 $_ = $a; # untaint $_
242 s/(\w)/\l$1/; # this must taint
251 $_ = $a; # untaint $_
253 s/(\w)/\L$1/; # this must taint
262 $_ = $a; # untaint $_
264 s/(\w)/\u$1/; # this must taint
273 $_ = $a; # untaint $_
275 s/(\w)/\U$1/; # this must taint
284 # After all this tainting $a should be cool.
289 check_taint_not $1, '"a" =~ /([a-z])/';
290 "foo.bar_baz" =~ /^(.*)[._](.*?)$/; # Bug 120675
291 check_taint_not $1, '"foo.bar_baz" =~ /^(.*)[._](.*?)$/';
293 # BE SURE TO COPY ANYTHING YOU ADD to the block below
295 { # This is just the previous tests copied here with a different
296 # compile-time pragma.
298 use locale ':not_characters'; # engage restricted locale with different
303 check_taint_not uc($a);
304 check_taint_not "\U$a";
305 check_taint_not ucfirst($a);
306 check_taint_not "\u$a";
307 check_taint_not lc($a);
308 check_taint_not fc($a);
309 check_taint_not "\L$a";
310 check_taint_not "\F$a";
311 check_taint_not lcfirst($a);
312 check_taint_not "\l$a";
314 check_taint_not sprintf('%e', 123.456);
315 check_taint_not sprintf('%f', 123.456);
316 check_taint_not sprintf('%g', 123.456);
317 check_taint_not sprintf('%d', 123.456);
318 check_taint_not sprintf('%x', 123.456);
320 $_ = $a; # untaint $_
322 $_ = uc($a); # taint $_
326 /(\w)/; # taint $&, $`, $', $+, $1.
334 /(.)/; # untaint $&, $`, $', $+, $1.
342 /(\W)/; # taint $&, $`, $', $+, $1.
350 /(\s)/; # taint $&, $`, $', $+, $1.
358 /(\S)/; # taint $&, $`, $', $+, $1.
366 $_ = $a; # untaint $_
370 /(b)/; # this must not taint
378 $_ = $a; # untaint $_
382 $b = uc($a); # taint $b
383 s/(.+)/$b/; # this must taint only the $_
393 $_ = $a; # untaint $_
395 s/(.+)/b/; # this must not taint
404 $b = $a; # untaint $b
406 ($b = $a) =~ s/\w/$&/;
407 check_taint_not $b; # $b should be tainted.
408 check_taint_not $a; # $a should be not.
410 $_ = $a; # untaint $_
412 s/(\w)/\l$1/; # this must taint
421 $_ = $a; # untaint $_
423 s/(\w)/\L$1/; # this must taint
432 $_ = $a; # untaint $_
434 s/(\w)/\u$1/; # this must taint
443 $_ = $a; # untaint $_
445 s/(\w)/\U$1/; # this must taint
454 # After all this tainting $a should be cool.
459 check_taint_not $1, '"a" =~ /([a-z])/';
460 "foo.bar_baz" =~ /^(.*)[._](.*?)$/; # Bug 120675
461 check_taint_not $1, '"foo.bar_baz" =~ /^(.*)[._](.*?)$/';
464 # Here are in scope of 'use locale'
466 # I think we've seen quite enough of taint.
467 # Let us do some *real* locale work now,
468 # unless setlocale() is missing (i.e. minitest).
470 unless ($have_setlocale) {
471 print "1..$test_num\n";
475 # The test number before our first setlocale()
476 my $final_without_setlocale = $test_num;
480 debug "# Scanning for locales...\n";
482 # Note that it's okay that some languages have their native names
483 # capitalized here even though that's not "right". They are lowercased
484 # anyway later during the scanning process (and besides, some clueless
485 # vendor might have them capitalized erroneously anyway).
489 Arabic:ar:dz eg sa:6 arabic8
490 Brezhoneg Breton:br:fr:1 15
491 Bulgarski Bulgarian:bg:bg:5
492 Chinese:zh:cn tw:cn.EUC eucCN eucTW euc.CN euc.TW Big5 GB2312 tw.EUC
493 Hrvatski Croatian:hr:hr:2
494 Cymraeg Welsh:cy:cy:1 14 15
496 Dansk Danish:da:dk:1 15
497 Nederlands Dutch:nl:be nl:1 15
498 English American British:en:au ca gb ie nz us uk zw:1 15 cp850
500 Eesti Estonian:et:ee:4 6 13
501 Suomi Finnish:fi:fi:1 15
503 Deutsch German:de:at be ch de lu:1 15
504 Euskaraz Basque:eu:es fr:1 15
505 Galego Galician:gl:es:1 15
506 Ellada Greek:el:gr:7 g8
508 Greenlandic:kl:gl:4 6
509 Hebrew:iw:il:8 hebrew8
511 Indonesian:id:id:1 15
512 Gaeilge Irish:ga:IE:1 14 15
513 Italiano Italian:it:ch it:1 15
514 Nihongo Japanese:ja:jp:euc eucJP jp.EUC sjis
516 Latine Latin:la:va:1 15
518 Lithuanian:lt:lt:4 6 13
519 Macedonian:mk:mk:1 15
522 Norsk Norwegian:no no\@nynorsk nb nn:no:1 15
524 Polski Polish:pl:pl:2
526 Russki Russian:ru:ru su ua:5 koi8 koi8r KOI8-R koi8u cp1251 cp866
527 Serbski Serbian:sr:yu:5
529 Slovene Slovenian:sl:si:2
530 Sqhip Albanian:sq:sq:1 15
531 Svenska Swedish:sv:fi se:1 15
533 Turkish:tr:tr:9 turkish8
537 if ($^O eq 'os390') {
538 # These cause heartburn. Broken locales?
539 $locales =~ s/Svenska Swedish:sv:fi se:1 15\n//;
540 $locales =~ s/Thai:th:th:11 tis620\n//;
543 sub in_utf8 () { $^H & 0x08 || (${^OPEN} || "") =~ /:utf8/ }
546 require "lib/locale/utf8";
548 require "lib/locale/latin1";
557 return if grep { $locale eq $_ } @Locale;
558 return unless setlocale(&POSIX::LC_ALL, $locale);
561 local $SIG{__WARN__} = sub {
562 $badutf8 = $_[0] =~ /Malformed UTF-8/;
564 $Locale =~ /UTF-?8/i;
568 ok(0, "Locale name contains malformed utf8");
571 push @Locale, $locale;
574 sub decode_encodings {
577 foreach (split(/ /, shift)) {
579 push @enc, "ISO8859-$1";
580 push @enc, "iso8859$1"; # HP
582 push @enc, "roman8"; # HP
586 push @enc, "$_.UTF-8";
589 if ($^O eq 'os390') {
590 push @enc, qw(IBM-037 IBM-819 IBM-1047);
599 trylocale("ISO8859-$_");
600 trylocale("iso8859$_");
601 trylocale("iso8859-$_");
602 trylocale("iso_8859_$_");
603 trylocale("isolatin$_");
604 trylocale("isolatin-$_");
605 trylocale("iso_latin_$_");
608 # Sanitize the environment so that we can run the external 'locale'
609 # program without the taint mode getting grumpy.
611 # $ENV{PATH} is special in VMS.
612 delete $ENV{PATH} if $^O ne 'VMS' or $Config{d_setenv};
614 # Other subversive stuff.
615 delete @ENV{qw(IFS CDPATH ENV BASH_ENV)};
617 if (-x "/usr/bin/locale" && open(LOCALES, "/usr/bin/locale -a 2>/dev/null|")) {
619 # It seems that /usr/bin/locale steadfastly outputs 8 bit data, which
620 # ain't great when we're running this testPERL_UNICODE= so that utf8
621 # locales will cause all IO hadles to default to (assume) utf8
622 next unless utf8::valid($_);
627 } elsif ($^O eq 'VMS' && defined($ENV{'SYS$I18N_LOCALE'}) && -d 'SYS$I18N_LOCALE') {
628 # The SYS$I18N_LOCALE logical name search list was not present on
629 # VAX VMS V5.5-12, but was on AXP && VAX VMS V6.2 as well as later versions.
630 opendir(LOCALES, "SYS\$I18N_LOCALE:");
631 while ($_ = readdir(LOCALES)) {
636 } elsif (($^O eq 'openbsd' || $^O eq 'bitrig' ) && -e '/usr/share/locale') {
638 # OpenBSD doesn't have a locale executable, so reading /usr/share/locale
639 # is much easier and faster than the last resort method.
641 opendir(LOCALES, '/usr/share/locale');
642 while ($_ = readdir(LOCALES)) {
649 # This is going to be slow.
651 foreach my $locale (split(/\n/, $locales)) {
652 my ($locale_name, $language_codes, $country_codes, $encodings) =
654 my @enc = decode_encodings($encodings);
655 foreach my $loc (split(/ /, $locale_name)) {
657 foreach my $enc (@enc) {
658 trylocale("$loc.$enc");
661 foreach my $enc (@enc) {
662 trylocale("$loc.$enc");
665 foreach my $lang (split(/ /, $language_codes)) {
667 foreach my $country (split(/ /, $country_codes)) {
668 my $lc = "${lang}_${country}";
670 foreach my $enc (@enc) {
671 trylocale("$lc.$enc");
673 my $lC = "${lang}_\U${country}";
675 foreach my $enc (@enc) {
676 trylocale("$lC.$enc");
683 setlocale(&POSIX::LC_ALL, "C");
685 if ($^O eq 'darwin') {
686 # Darwin 8/Mac OS X 10.4 and 10.5 have bad Basque locales: perl bug #35895,
687 # Apple bug ID# 4139653. It also has a problem in Byelorussian.
688 (my $v) = $Config{osvers} =~ /^(\d+)/;
689 if ($v >= 8 and $v < 10) {
690 debug "# Skipping eu_ES, be_BY locales -- buggy in Darwin\n";
691 @Locale = grep ! m/^(eu_ES(?:\..*)?|be_BY\.CP1131)$/, @Locale;
693 debug "# Skipping be_BY locales -- buggy in Darwin\n";
694 @Locale = grep ! m/^be_BY\.CP1131$/, @Locale;
698 @Locale = sort @Locale;
700 debug "# Locales =\n";
708 my @Added_alpha; # Alphas that aren't in the C locale.
712 # This returns a display string denoting the input parameter @_, each
713 # entry of which is a single character in the range 0-255. The first part
714 # of the output is a string of the characters in @_ that are ASCII
715 # graphics, and hence unambiguously displayable. They are given by code
716 # point order. The second part is the remaining code points, the ordinals
717 # of which are each displayed as 2-digit hex. Blanks are inserted so as
718 # to keep anything from the first part looking like a 2-digit hex number.
721 my @chars = sort { ord $a <=> ord $b } @_;
725 push @chars, chr(258); # This sentinel simplifies the loop termination
727 foreach my $i (0 .. @chars - 1) {
728 my $char = $chars[$i];
732 # We avoid using [:posix:] classes, as these are being tested in this
733 # file. Each equivalence class below is for things that can appear in
734 # a range; those that can't be in a range have class -1. 0 for those
735 # which should be output in hex; and >0 for the other ranges
736 if ($char =~ /[A-Z]/) {
739 elsif ($char =~ /[a-z]/) {
742 elsif ($char =~ /[0-9]/) {
745 # Uncomment to get literal punctuation displayed instead of hex
746 #elsif ($char =~ /[[\]!"#\$\%&\'()*+,.\/:\\;<=>?\@\^_`{|}~-]/) {
747 # $class = -1; # Punct never appears in a range
750 $class = 0; # Output in hex
753 if (! defined $range_start) {
755 $output .= " " . $char;
758 $range_start = ord $char;
759 $start_class = $class;
761 } # A range ends if not consecutive, or the class-type changes
762 elsif (ord $char != ($range_end = ord($chars[$i-1])) + 1
763 || $class != $start_class)
766 # Here, the current character is not in the range. This means the
767 # previous character must have been. Output the range up through
769 my $range_length = $range_end - $range_start + 1;
770 if ($start_class > 0) {
771 $output .= " " . chr($range_start);
772 $output .= "-" . chr($range_end) if $range_length > 1;
775 $output .= sprintf(" %02X", $range_start);
776 $output .= sprintf("-%02X", $range_end) if $range_length > 1;
779 # Handle the new current character, as potentially beginning a new
791 my ($Locale, $i, $pass_fail, $message) = @_;
793 $message = " ($message)" if $message;
794 unless ($pass_fail) {
795 $Problem{$i}{$Locale} = 1;
796 debug "# failed $i ($test_names{$i}) with locale '$Locale'$message\n";
798 push @{$Okay{$i}}, $Locale;
802 sub report_multi_result {
803 my ($Locale, $i, $results_ref) = @_;
805 # $results_ref points to an array, each element of which is a character that was
806 # in error for this test numbered '$i'. If empty, the test passed
810 $message = join " ", "for", disp_chars(@$results_ref);
812 report_result($Locale, $i, @$results_ref == 0, $message);
815 my $first_locales_test_number = $final_without_setlocale + 1;
816 my $locales_test_number;
817 my $not_necessarily_a_problem_test_number;
818 my $first_casing_test_number;
819 my %setlocale_failed; # List of locales that setlocale() didn't work on
821 foreach $Locale (@Locale) {
822 $locales_test_number = $first_locales_test_number - 1;
824 debug "# Locale = $Locale\n";
826 unless (setlocale(&POSIX::LC_ALL, $Locale)) {
827 $setlocale_failed{$Locale} = $Locale;
831 # We test UTF-8 locales only under ':not_characters'; otherwise they have
832 # documented deficiencies. Non- UTF-8 locales are tested only under plain
833 # 'use locale', as otherwise we would have to convert everything in them
835 # The locale name doesn't necessarily have to have "utf8" in it to be a
836 # UTF-8 locale, but it works mostly.
837 my $is_utf8_locale = $Locale =~ /UTF-?8/i;
839 my %UPPER = (); # All alpha X for which uc(X) == X and lc(X) != X
840 my %lower = (); # All alpha X for which lc(X) == X and uc(X) != X
841 my %BoThCaSe = (); # All alpha X for which uc(X) == lc(X) == X
843 if (! $is_utf8_locale) {
845 @{$posixes{'word'}} = grep /\w/, map { chr } 0..255;
846 @{$posixes{'digit'}} = grep /\d/, map { chr } 0..255;
847 @{$posixes{'space'}} = grep /\s/, map { chr } 0..255;
848 @{$posixes{'alpha'}} = grep /[[:alpha:]]/, map {chr } 0..255;
849 @{$posixes{'alnum'}} = grep /[[:alnum:]]/, map {chr } 0..255;
850 @{$posixes{'ascii'}} = grep /[[:ascii:]]/, map {chr } 0..255;
851 @{$posixes{'blank'}} = grep /[[:blank:]]/, map {chr } 0..255;
852 @{$posixes{'cntrl'}} = grep /[[:cntrl:]]/, map {chr } 0..255;
853 @{$posixes{'graph'}} = grep /[[:graph:]]/, map {chr } 0..255;
854 @{$posixes{'lower'}} = grep /[[:lower:]]/, map {chr } 0..255;
855 @{$posixes{'print'}} = grep /[[:print:]]/, map {chr } 0..255;
856 @{$posixes{'punct'}} = grep /[[:punct:]]/, map {chr } 0..255;
857 @{$posixes{'upper'}} = grep /[[:upper:]]/, map {chr } 0..255;
858 @{$posixes{'xdigit'}} = grep /[[:xdigit:]]/, map {chr } 0..255;
859 @{$posixes{'cased'}} = grep /[[:upper:]]/i, map {chr } 0..255;
861 # Sieve the uppercase and the lowercase.
863 for (@{$posixes{'word'}}) {
864 if (/[^\d_]/) { # skip digits and the _
875 use locale ':not_characters';
876 @{$posixes{'word'}} = grep /\w/, map { chr } 0..255;
877 @{$posixes{'digit'}} = grep /\d/, map { chr } 0..255;
878 @{$posixes{'space'}} = grep /\s/, map { chr } 0..255;
879 @{$posixes{'alpha'}} = grep /[[:alpha:]]/, map {chr } 0..255;
880 @{$posixes{'alnum'}} = grep /[[:alnum:]]/, map {chr } 0..255;
881 @{$posixes{'ascii'}} = grep /[[:ascii:]]/, map {chr } 0..255;
882 @{$posixes{'blank'}} = grep /[[:blank:]]/, map {chr } 0..255;
883 @{$posixes{'cntrl'}} = grep /[[:cntrl:]]/, map {chr } 0..255;
884 @{$posixes{'graph'}} = grep /[[:graph:]]/, map {chr } 0..255;
885 @{$posixes{'lower'}} = grep /[[:lower:]]/, map {chr } 0..255;
886 @{$posixes{'print'}} = grep /[[:print:]]/, map {chr } 0..255;
887 @{$posixes{'punct'}} = grep /[[:punct:]]/, map {chr } 0..255;
888 @{$posixes{'upper'}} = grep /[[:upper:]]/, map {chr } 0..255;
889 @{$posixes{'xdigit'}} = grep /[[:xdigit:]]/, map {chr } 0..255;
890 @{$posixes{'cased'}} = grep /[[:upper:]]/i, map {chr } 0..255;
891 for (@{$posixes{'word'}}) {
892 if (/[^\d_]/) { # skip digits and the _
903 # Ordered, where possible, in groups of "this is a subset of the next
905 debug "# :upper: = ", disp_chars(@{$posixes{'upper'}}), "\n";
906 debug "# :lower: = ", disp_chars(@{$posixes{'lower'}}), "\n";
907 debug "# :cased: = ", disp_chars(@{$posixes{'cased'}}), "\n";
908 debug "# :alpha: = ", disp_chars(@{$posixes{'alpha'}}), "\n";
909 debug "# :alnum: = ", disp_chars(@{$posixes{'alnum'}}), "\n";
910 debug "# w = ", disp_chars(@{$posixes{'word'}}), "\n";
911 debug "# :graph: = ", disp_chars(@{$posixes{'graph'}}), "\n";
912 debug "# :print: = ", disp_chars(@{$posixes{'print'}}), "\n";
913 debug "# d = ", disp_chars(@{$posixes{'digit'}}), "\n";
914 debug "# :xdigit: = ", disp_chars(@{$posixes{'xdigit'}}), "\n";
915 debug "# :blank: = ", disp_chars(@{$posixes{'blank'}}), "\n";
916 debug "# s = ", disp_chars(@{$posixes{'space'}}), "\n";
917 debug "# :punct: = ", disp_chars(@{$posixes{'punct'}}), "\n";
918 debug "# :cntrl: = ", disp_chars(@{$posixes{'cntrl'}}), "\n";
919 debug "# :ascii: = ", disp_chars(@{$posixes{'ascii'}}), "\n";
921 foreach (keys %UPPER) {
923 $BoThCaSe{$_}++ if exists $lower{$_};
925 foreach (keys %lower) {
926 $BoThCaSe{$_}++ if exists $UPPER{$_};
928 foreach (keys %BoThCaSe) {
934 foreach my $ord ( 0 .. 255 ) {
935 $Unassigned{chr $ord} = 1;
937 foreach my $class (keys %posixes) {
938 foreach my $char (@{$posixes{$class}}) {
939 delete $Unassigned{$char};
943 debug "# UPPER = ", disp_chars(keys %UPPER), "\n";
944 debug "# lower = ", disp_chars(keys %lower), "\n";
945 debug "# BoThCaSe = ", disp_chars(keys %BoThCaSe), "\n";
946 debug "# Unassigned = ", disp_chars(sort { ord $a <=> ord $b } keys %Unassigned), "\n";
950 foreach my $x (sort keys %UPPER) {
953 if ($is_utf8_locale) {
954 use locale ':not_characters';
955 $ok = $x =~ /[[:upper:]]/;
956 $fold_ok = $x =~ /[[:lower:]]/i;
960 $ok = $x =~ /[[:upper:]]/;
961 $fold_ok = $x =~ /[[:lower:]]/i;
963 push @failures, $x unless $ok;
964 push @fold_failures, $x unless $fold_ok;
966 $locales_test_number++;
967 $first_casing_test_number = $locales_test_number;
968 $test_names{$locales_test_number} = 'Verify that /[[:upper:]]/ matches all alpha X for which uc(X) == X and lc(X) != X';
969 report_multi_result($Locale, $locales_test_number, \@failures);
971 $locales_test_number++;
973 $test_names{$locales_test_number} = 'Verify that /[[:lower:]]/i matches all alpha X for which uc(X) == X and lc(X) != X';
974 report_multi_result($Locale, $locales_test_number, \@fold_failures);
977 undef @fold_failures;
979 foreach my $x (sort keys %lower) {
982 if ($is_utf8_locale) {
983 use locale ':not_characters';
984 $ok = $x =~ /[[:lower:]]/;
985 $fold_ok = $x =~ /[[:upper:]]/i;
989 $ok = $x =~ /[[:lower:]]/;
990 $fold_ok = $x =~ /[[:upper:]]/i;
992 push @failures, $x unless $ok;
993 push @fold_failures, $x unless $fold_ok;
996 $locales_test_number++;
997 $test_names{$locales_test_number} = 'Verify that /[[:lower:]]/ matches all alpha X for which lc(X) == X and uc(X) != X';
998 report_multi_result($Locale, $locales_test_number, \@failures);
1000 $locales_test_number++;
1001 $test_names{$locales_test_number} = 'Verify that /[[:upper:]]/i matches all alpha X for which lc(X) == X and uc(X) != X';
1002 report_multi_result($Locale, $locales_test_number, \@fold_failures);
1004 { # Find the alphabetic characters that are not considered alphabetics
1005 # in the default (C) locale.
1010 for (keys %UPPER, keys %lower, keys %BoThCaSe) {
1011 push(@Added_alpha, $_) if (/\W/);
1015 @Added_alpha = sort @Added_alpha;
1017 debug "# Added_alpha = ", disp_chars(@Added_alpha), "\n";
1019 # Cross-check the whole 8-bit character set.
1021 ++$locales_test_number;
1023 $test_names{$locales_test_number} = 'Verify that \w and [:word:] are identical';
1024 for (map { chr } 0..255) {
1025 if ($is_utf8_locale) {
1026 use locale ':not_characters';
1027 push @f, $_ unless /[[:word:]]/ == /\w/;
1030 push @f, $_ unless /[[:word:]]/ == /\w/;
1033 report_multi_result($Locale, $locales_test_number, \@f);
1035 ++$locales_test_number;
1037 $test_names{$locales_test_number} = 'Verify that \d and [:digit:] are identical';
1038 for (map { chr } 0..255) {
1039 if ($is_utf8_locale) {
1040 use locale ':not_characters';
1041 push @f, $_ unless /[[:digit:]]/ == /\d/;
1044 push @f, $_ unless /[[:digit:]]/ == /\d/;
1047 report_multi_result($Locale, $locales_test_number, \@f);
1049 ++$locales_test_number;
1051 $test_names{$locales_test_number} = 'Verify that \s and [:space:] are identical';
1052 for (map { chr } 0..255) {
1053 if ($is_utf8_locale) {
1054 use locale ':not_characters';
1055 push @f, $_ unless /[[:space:]]/ == /\s/;
1058 push @f, $_ unless /[[:space:]]/ == /\s/;
1061 report_multi_result($Locale, $locales_test_number, \@f);
1063 ++$locales_test_number;
1065 $test_names{$locales_test_number} = 'Verify that [:posix:] and [:^posix:] are mutually exclusive';
1066 for (map { chr } 0..255) {
1067 if ($is_utf8_locale) {
1068 use locale ':not_characters';
1069 push @f, $_ unless (/[[:alpha:]]/ xor /[[:^alpha:]]/) ||
1070 (/[[:alnum:]]/ xor /[[:^alnum:]]/) ||
1071 (/[[:ascii:]]/ xor /[[:^ascii:]]/) ||
1072 (/[[:blank:]]/ xor /[[:^blank:]]/) ||
1073 (/[[:cntrl:]]/ xor /[[:^cntrl:]]/) ||
1074 (/[[:digit:]]/ xor /[[:^digit:]]/) ||
1075 (/[[:graph:]]/ xor /[[:^graph:]]/) ||
1076 (/[[:lower:]]/ xor /[[:^lower:]]/) ||
1077 (/[[:print:]]/ xor /[[:^print:]]/) ||
1078 (/[[:space:]]/ xor /[[:^space:]]/) ||
1079 (/[[:upper:]]/ xor /[[:^upper:]]/) ||
1080 (/[[:word:]]/ xor /[[:^word:]]/) ||
1081 (/[[:xdigit:]]/ xor /[[:^xdigit:]]/) ||
1083 # effectively is what [:cased:] would be if it existed.
1084 (/[[:upper:]]/i xor /[[:^upper:]]/i);
1087 push @f, $_ unless (/[[:alpha:]]/ xor /[[:^alpha:]]/) ||
1088 (/[[:alnum:]]/ xor /[[:^alnum:]]/) ||
1089 (/[[:ascii:]]/ xor /[[:^ascii:]]/) ||
1090 (/[[:blank:]]/ xor /[[:^blank:]]/) ||
1091 (/[[:cntrl:]]/ xor /[[:^cntrl:]]/) ||
1092 (/[[:digit:]]/ xor /[[:^digit:]]/) ||
1093 (/[[:graph:]]/ xor /[[:^graph:]]/) ||
1094 (/[[:lower:]]/ xor /[[:^lower:]]/) ||
1095 (/[[:print:]]/ xor /[[:^print:]]/) ||
1096 (/[[:space:]]/ xor /[[:^space:]]/) ||
1097 (/[[:upper:]]/ xor /[[:^upper:]]/) ||
1098 (/[[:word:]]/ xor /[[:^word:]]/) ||
1099 (/[[:xdigit:]]/ xor /[[:^xdigit:]]/) ||
1100 (/[[:upper:]]/i xor /[[:^upper:]]/i);
1103 report_multi_result($Locale, $locales_test_number, \@f);
1105 # The rules for the relationships are given in:
1106 # http://www.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap07.html
1109 ++$locales_test_number;
1111 $test_names{$locales_test_number} = 'Verify that [:lower:] contains at least a-z';
1113 if ($is_utf8_locale) {
1114 use locale ':not_characters';
1115 push @f, $_ unless /[[:lower:]]/;
1118 push @f, $_ unless /[[:lower:]]/;
1121 report_multi_result($Locale, $locales_test_number, \@f);
1123 ++$locales_test_number;
1125 $test_names{$locales_test_number} = 'Verify that [:lower:] is a subset of [:alpha:]';
1126 for (map { chr } 0..255) {
1127 if ($is_utf8_locale) {
1128 use locale ':not_characters';
1129 push @f, $_ if /[[:lower:]]/ and ! /[[:alpha:]]/;
1132 push @f, $_ if /[[:lower:]]/ and ! /[[:alpha:]]/;
1135 report_multi_result($Locale, $locales_test_number, \@f);
1137 ++$locales_test_number;
1139 $test_names{$locales_test_number} = 'Verify that [:upper:] contains at least A-Z';
1141 if ($is_utf8_locale) {
1142 use locale ':not_characters';
1143 push @f, $_ unless /[[:upper:]]/;
1146 push @f, $_ unless /[[:upper:]]/;
1149 report_multi_result($Locale, $locales_test_number, \@f);
1151 ++$locales_test_number;
1153 $test_names{$locales_test_number} = 'Verify that [:upper:] is a subset of [:alpha:]';
1154 for (map { chr } 0..255) {
1155 if ($is_utf8_locale) {
1156 use locale ':not_characters';
1157 push @f, $_ if /[[:upper:]]/ and ! /[[:alpha:]]/;
1160 push @f, $_ if /[[:upper:]]/ and ! /[[:alpha:]]/;
1163 report_multi_result($Locale, $locales_test_number, \@f);
1165 ++$locales_test_number;
1167 $test_names{$locales_test_number} = 'Verify that /[[:lower:]]/i is a subset of [:alpha:]';
1168 for (map { chr } 0..255) {
1169 if ($is_utf8_locale) {
1170 use locale ':not_characters';
1171 push @f, $_ if /[[:lower:]]/i and ! /[[:alpha:]]/;
1174 push @f, $_ if /[[:lower:]]/i and ! /[[:alpha:]]/;
1177 report_multi_result($Locale, $locales_test_number, \@f);
1179 ++$locales_test_number;
1181 $test_names{$locales_test_number} = 'Verify that [:alpha:] is a subset of [:alnum:]';
1182 for (map { chr } 0..255) {
1183 if ($is_utf8_locale) {
1184 use locale ':not_characters';
1185 push @f, $_ if /[[:alpha:]]/ and ! /[[:alnum:]]/;
1188 push @f, $_ if /[[:alpha:]]/ and ! /[[:alnum:]]/;
1191 report_multi_result($Locale, $locales_test_number, \@f);
1193 ++$locales_test_number;
1195 $test_names{$locales_test_number} = 'Verify that [:digit:] contains at least 0-9';
1197 if ($is_utf8_locale) {
1198 use locale ':not_characters';
1199 push @f, $_ unless /[[:digit:]]/;
1202 push @f, $_ unless /[[:digit:]]/;
1205 report_multi_result($Locale, $locales_test_number, \@f);
1207 ++$locales_test_number;
1209 $test_names{$locales_test_number} = 'Verify that [:digit:] is a subset of [:alnum:]';
1210 for (map { chr } 0..255) {
1211 if ($is_utf8_locale) {
1212 use locale ':not_characters';
1213 push @f, $_ if /[[:digit:]]/ and ! /[[:alnum:]]/;
1216 push @f, $_ if /[[:digit:]]/ and ! /[[:alnum:]]/;
1219 report_multi_result($Locale, $locales_test_number, \@f);
1221 ++$locales_test_number;
1223 $test_names{$locales_test_number} = 'Verify that [:digit:] matches either 10 or 20 code points';
1224 report_result($Locale, $locales_test_number, @{$posixes{'digit'}} == 10 || @{$posixes{'digit'}} == 20);
1226 ++$locales_test_number;
1228 $test_names{$locales_test_number} = 'Verify that if there is a second set of digits in [:digit:], they are consecutive';
1229 if (@{$posixes{'digit'}} == 20) {
1231 for (map { chr } 0..255) {
1232 next unless /[[:digit:]]/;
1234 if (defined $previous_ord) {
1235 if ($is_utf8_locale) {
1236 use locale ':not_characters';
1237 push @f, $_ if ord $_ != $previous_ord + 1;
1240 push @f, $_ if ord $_ != $previous_ord + 1;
1243 $previous_ord = ord $_;
1246 report_multi_result($Locale, $locales_test_number, \@f);
1248 ++$locales_test_number;
1250 $test_names{$locales_test_number} = 'Verify that [:digit:] is a subset of [:xdigit:]';
1251 for (map { chr } 0..255) {
1252 if ($is_utf8_locale) {
1253 use locale ':not_characters';
1254 push @f, $_ if /[[:digit:]]/ and ! /[[:xdigit:]]/;
1257 push @f, $_ if /[[:digit:]]/ and ! /[[:xdigit:]]/;
1260 report_multi_result($Locale, $locales_test_number, \@f);
1262 ++$locales_test_number;
1264 $test_names{$locales_test_number} = 'Verify that [:xdigit:] contains at least A-F, a-f';
1265 for ('A' .. 'F', 'a' .. 'f') {
1266 if ($is_utf8_locale) {
1267 use locale ':not_characters';
1268 push @f, $_ unless /[[:xdigit:]]/;
1271 push @f, $_ unless /[[:xdigit:]]/;
1274 report_multi_result($Locale, $locales_test_number, \@f);
1276 ++$locales_test_number;
1278 $test_names{$locales_test_number} = 'Verify that any additional members of [:xdigit:], are in groups of 6 consecutive code points';
1281 for (map { chr } 0..255) {
1282 next unless /[[:xdigit:]]/;
1283 next if /[[:digit:]]/;
1285 if (defined $previous_ord) {
1286 if ($is_utf8_locale) {
1287 use locale ':not_characters';
1288 push @f, $_ if ord $_ != $previous_ord + 1;
1291 push @f, $_ if ord $_ != $previous_ord + 1;
1296 undef $previous_ord;
1299 $previous_ord = ord $_;
1302 report_multi_result($Locale, $locales_test_number, \@f);
1304 ++$locales_test_number;
1306 $test_names{$locales_test_number} = 'Verify that [:xdigit:] is a subset of [:graph:]';
1307 for (map { chr } 0..255) {
1308 if ($is_utf8_locale) {
1309 use locale ':not_characters';
1310 push @f, $_ if /[[:xdigit:]]/ and ! /[[:graph:]]/;
1313 push @f, $_ if /[[:xdigit:]]/ and ! /[[:graph:]]/;
1316 report_multi_result($Locale, $locales_test_number, \@f);
1318 # Note that xdigit doesn't have to be a subset of alnum
1320 ++$locales_test_number;
1322 $test_names{$locales_test_number} = 'Verify that [:punct:] is a subset of [:graph:]';
1323 for (map { chr } 0..255) {
1324 if ($is_utf8_locale) {
1325 use locale ':not_characters';
1326 push @f, $_ if /[[:punct:]]/ and ! /[[:graph:]]/;
1329 push @f, $_ if /[[:punct:]]/ and ! /[[:graph:]]/;
1332 report_multi_result($Locale, $locales_test_number, \@f);
1334 ++$locales_test_number;
1336 $test_names{$locales_test_number} = 'Verify that the space character is not in [:graph:]';
1337 if ($is_utf8_locale) {
1338 use locale ':not_characters';
1339 push @f, " " if " " =~ /[[:graph:]]/;
1342 push @f, " " if " " =~ /[[:graph:]]/;
1344 report_multi_result($Locale, $locales_test_number, \@f);
1346 ++$locales_test_number;
1348 $test_names{$locales_test_number} = 'Verify that [:space:] contains at least [\f\n\r\t\cK ]';
1349 for (' ', "\f", "\n", "\r", "\t", "\cK") {
1350 if ($is_utf8_locale) {
1351 use locale ':not_characters';
1352 push @f, $_ unless /[[:space:]]/;
1355 push @f, $_ unless /[[:space:]]/;
1358 report_multi_result($Locale, $locales_test_number, \@f);
1360 ++$locales_test_number;
1362 $test_names{$locales_test_number} = 'Verify that [:blank:] contains at least [\t ]';
1364 if ($is_utf8_locale) {
1365 use locale ':not_characters';
1366 push @f, $_ unless /[[:blank:]]/;
1369 push @f, $_ unless /[[:blank:]]/;
1372 report_multi_result($Locale, $locales_test_number, \@f);
1374 ++$locales_test_number;
1376 $test_names{$locales_test_number} = 'Verify that [:blank:] is a subset of [:space:]';
1377 for (map { chr } 0..255) {
1378 if ($is_utf8_locale) {
1379 use locale ':not_characters';
1380 push @f, $_ if /[[:blank:]]/ and ! /[[:space:]]/;
1383 push @f, $_ if /[[:blank:]]/ and ! /[[:space:]]/;
1386 report_multi_result($Locale, $locales_test_number, \@f);
1388 ++$locales_test_number;
1390 $test_names{$locales_test_number} = 'Verify that [:graph:] is a subset of [:print:]';
1391 for (map { chr } 0..255) {
1392 if ($is_utf8_locale) {
1393 use locale ':not_characters';
1394 push @f, $_ if /[[:graph:]]/ and ! /[[:print:]]/;
1397 push @f, $_ if /[[:graph:]]/ and ! /[[:print:]]/;
1400 report_multi_result($Locale, $locales_test_number, \@f);
1402 ++$locales_test_number;
1404 $test_names{$locales_test_number} = 'Verify that the space character is in [:print:]';
1405 if ($is_utf8_locale) {
1406 use locale ':not_characters';
1407 push @f, " " if " " !~ /[[:print:]]/;
1410 push @f, " " if " " !~ /[[:print:]]/;
1412 report_multi_result($Locale, $locales_test_number, \@f);
1414 ++$locales_test_number;
1416 $test_names{$locales_test_number} = 'Verify that isn\'t both [:cntrl:] and [:print:]';
1417 for (map { chr } 0..255) {
1418 if ($is_utf8_locale) {
1419 use locale ':not_characters';
1420 push @f, $_ if (/[[:print:]]/ and /[[:cntrl:]]/);
1423 push @f, $_ if (/[[:print:]]/ and /[[:cntrl:]]/);
1426 report_multi_result($Locale, $locales_test_number, \@f);
1428 ++$locales_test_number;
1430 $test_names{$locales_test_number} = 'Verify that isn\'t both [:alpha:] and [:digit:]';
1431 for (map { chr } 0..255) {
1432 if ($is_utf8_locale) {
1433 use locale ':not_characters';
1434 push @f, $_ if /[[:alpha:]]/ and /[[:digit:]]/;
1437 push @f, $_ if /[[:alpha:]]/ and /[[:digit:]]/;
1440 report_multi_result($Locale, $locales_test_number, \@f);
1442 ++$locales_test_number;
1444 $test_names{$locales_test_number} = 'Verify that isn\'t both [:alnum:] and [:punct:]';
1445 for (map { chr } 0..255) {
1446 if ($is_utf8_locale) {
1447 use locale ':not_characters';
1448 push @f, $_ if /[[:alnum:]]/ and /[[:punct:]]/;
1451 push @f, $_ if /[[:alnum:]]/ and /[[:punct:]]/;
1454 report_multi_result($Locale, $locales_test_number, \@f);
1456 ++$locales_test_number;
1458 $test_names{$locales_test_number} = 'Verify that isn\'t both [:xdigit:] and [:punct:]';
1459 for (map { chr } 0..255) {
1460 if ($is_utf8_locale) {
1461 use locale ':not_characters';
1462 push @f, $_ if (/[[:punct:]]/ and /[[:xdigit:]]/);
1465 push @f, $_ if (/[[:punct:]]/ and /[[:xdigit:]]/);
1468 report_multi_result($Locale, $locales_test_number, \@f);
1470 ++$locales_test_number;
1472 $test_names{$locales_test_number} = 'Verify that isn\'t both [:graph:] and [:space:]';
1473 for (map { chr } 0..255) {
1474 if ($is_utf8_locale) {
1475 use locale ':not_characters';
1476 push @f, $_ if (/[[:graph:]]/ and /[[:space:]]/);
1479 push @f, $_ if (/[[:graph:]]/ and /[[:space:]]/);
1482 report_multi_result($Locale, $locales_test_number, \@f);
1484 foreach ($first_casing_test_number..$locales_test_number) {
1485 push @problematical_tests, $_;
1489 # Test for read-only scalars' locale vs non-locale comparisons.
1495 if ($is_utf8_locale) {
1496 use locale ':not_characters';
1497 $ok = ($a cmp "qwerty") == 0;
1501 $ok = ($a cmp "qwerty") == 0;
1503 report_result($Locale, ++$locales_test_number, $ok);
1504 $test_names{$locales_test_number} = 'Verify that cmp works with a read-only scalar; no- vs locale';
1508 my ($from, $to, $lesser, $greater,
1509 @test, %test, $test, $yes, $no, $sign);
1511 ++$locales_test_number;
1512 $test_names{$locales_test_number} = 'Verify that "le", "ne", etc work';
1513 $not_necessarily_a_problem_test_number = $locales_test_number;
1516 $from = int(($_*@{$posixes{'word'}})/10);
1517 $to = $from + int(@{$posixes{'word'}}/10);
1518 $to = $#{$posixes{'word'}} if ($to > $#{$posixes{'word'}});
1519 $lesser = join('', @{$posixes{'word'}}[$from..$to]);
1520 # Select a slice one character on.
1522 $to = $#{$posixes{'word'}} if ($to > $#{$posixes{'word'}});
1523 $greater = join('', @{$posixes{'word'}}[$from..$to]);
1524 if ($is_utf8_locale) {
1525 use locale ':not_characters';
1526 ($yes, $no, $sign) = ($lesser lt $greater
1528 : ("not ", " ", -1));
1532 ($yes, $no, $sign) = ($lesser lt $greater
1534 : ("not ", " ", -1));
1536 # all these tests should FAIL (return 0). Exact lt or gt cannot
1537 # be tested because in some locales, say, eacute and E may test
1541 $no.' ($lesser le $greater)', # 1
1542 'not ($lesser ne $greater)', # 2
1543 ' ($lesser eq $greater)', # 3
1544 $yes.' ($lesser ge $greater)', # 4
1545 $yes.' ($lesser ge $greater)', # 5
1546 $yes.' ($greater le $lesser )', # 7
1547 'not ($greater ne $lesser )', # 8
1548 ' ($greater eq $lesser )', # 9
1549 $no.' ($greater ge $lesser )', # 10
1550 'not (($lesser cmp $greater) == -($sign))' # 11
1552 @test{@test} = 0 x @test;
1554 for my $ti (@test) {
1555 if ($is_utf8_locale) {
1556 use locale ':not_characters';
1557 $test{$ti} = eval $ti;
1560 # Already in 'use locale';
1561 $test{$ti} = eval $ti;
1563 $test ||= $test{$ti}
1565 report_result($Locale, $locales_test_number, $test == 0);
1567 debug "# lesser = '$lesser'\n";
1568 debug "# greater = '$greater'\n";
1569 debug "# lesser cmp greater = ",
1570 $lesser cmp $greater, "\n";
1571 debug "# greater cmp lesser = ",
1572 $greater cmp $lesser, "\n";
1573 debug "# (greater) from = $from, to = $to\n";
1574 for my $ti (@test) {
1575 debugf("# %-40s %-4s", $ti,
1576 $test{$ti} ? 'FAIL' : 'ok');
1577 if ($ti =~ /\(\.*(\$.+ +cmp +\$[^\)]+)\.*\)/) {
1578 debugf("(%s == %4d)", $1, eval $1);
1616 if (! $is_utf8_locale) {
1619 my ($x, $y) = (1.23, 1.23);
1622 printf ''; # printf used to reset locale to "C"
1627 my $z = sprintf ''; # sprintf used to reset locale to "C"
1634 local $SIG{__WARN__} =
1640 # The == (among other ops) used to warn for locales
1641 # that had something else than "." as the radix character.
1665 $ok12 = abs(($f + $g) - 3.57) < 0.01;
1667 $ok14 = $ok15 = $ok16 = 1; # Skip for non-utf8 locales
1671 $ok17 = "1.5:1.25" eq sprintf("%g:%g", $h, $i);
1673 $ok18 = $j eq sprintf("%g:%g", $h, $i);
1676 use locale ':not_characters';
1678 my ($x, $y) = (1.23, 1.23);
1680 printf ''; # printf used to reset locale to "C"
1685 my $z = sprintf ''; # sprintf used to reset locale to "C"
1691 local $SIG{__WARN__} =
1717 $ok12 = abs(($f + $g) - 3.57) < 0.01;
1720 # Look for non-ASCII error messages, and verify that the first
1721 # such is in UTF-8 (the others almost certainly will be like the
1724 foreach my $err (keys %!) {
1726 $! = eval "&Errno::$err"; # Convert to strerror() output
1727 my $strerror = "$!";
1728 if ("$strerror" =~ /\P{ASCII}/) {
1729 $ok14 = utf8::is_utf8($strerror);
1734 # Similarly, we verify that a non-ASCII radix is in UTF-8. This
1735 # also catches if there is a disparity between sprintf and
1738 my $string_g = "$g";
1739 my $sprintf_g = sprintf("%g", $g);
1741 $ok15 = $string_g =~ / ^ \p{ASCII}+ $ /x || utf8::is_utf8($string_g);
1742 $ok16 = $sprintf_g eq $string_g;
1746 $ok17 = "1.5:1.25" eq sprintf("%g:%g", $h, $i);
1748 $ok18 = $j eq sprintf("%g:%g", $h, $i);
1751 report_result($Locale, ++$locales_test_number, $ok1);
1752 $test_names{$locales_test_number} = 'Verify that an intervening printf doesn\'t change assignment results';
1753 my $first_a_test = $locales_test_number;
1755 debug "# $first_a_test..$locales_test_number: \$a = $a, \$b = $b, Locale = $Locale\n";
1757 report_result($Locale, ++$locales_test_number, $ok2);
1758 $test_names{$locales_test_number} = 'Verify that an intervening sprintf doesn\'t change assignment results';
1760 my $first_c_test = $locales_test_number;
1762 report_result($Locale, ++$locales_test_number, $ok3);
1763 $test_names{$locales_test_number} = 'Verify that a different locale radix works when doing "==" with a constant';
1765 report_result($Locale, ++$locales_test_number, $ok4);
1766 $test_names{$locales_test_number} = 'Verify that a different locale radix works when doing "==" with a scalar';
1768 report_result($Locale, ++$locales_test_number, $ok5);
1769 $test_names{$locales_test_number} = 'Verify that a different locale radix works when doing "==" with a scalar and an intervening sprintf';
1771 debug "# $first_c_test..$locales_test_number: \$c = $c, \$d = $d, Locale = $Locale\n";
1773 report_result($Locale, ++$locales_test_number, $ok6);
1774 $test_names{$locales_test_number} = 'Verify that can assign stringified under inner no-locale block';
1775 my $first_e_test = $locales_test_number;
1777 report_result($Locale, ++$locales_test_number, $ok7);
1778 $test_names{$locales_test_number} = 'Verify that "==" with a scalar still works in inner no locale';
1780 report_result($Locale, ++$locales_test_number, $ok8);
1781 $test_names{$locales_test_number} = 'Verify that "==" with a scalar and an intervening sprintf still works in inner no locale';
1783 debug "# $first_e_test..$locales_test_number: \$e = $e, no locale\n";
1785 report_result($Locale, ++$locales_test_number, $ok9);
1786 $test_names{$locales_test_number} = 'Verify that after a no-locale block, a different locale radix still works when doing "==" with a constant';
1787 my $first_f_test = $locales_test_number;
1789 report_result($Locale, ++$locales_test_number, $ok10);
1790 $test_names{$locales_test_number} = 'Verify that after a no-locale block, a different locale radix still works when doing "==" with a scalar';
1792 report_result($Locale, ++$locales_test_number, $ok11);
1793 $test_names{$locales_test_number} = 'Verify that after a no-locale block, a different locale radix still works when doing "==" with a scalar and an intervening sprintf';
1795 report_result($Locale, ++$locales_test_number, $ok12);
1796 $test_names{$locales_test_number} = 'Verify that after a no-locale block, a different locale radix can participate in an addition and function call as numeric';
1798 report_result($Locale, ++$locales_test_number, $ok13);
1799 $test_names{$locales_test_number} = 'Verify that don\'t get warning under "==" even if radix is not a dot';
1801 report_result($Locale, ++$locales_test_number, $ok14);
1802 $test_names{$locales_test_number} = 'Verify that non-ASCII UTF-8 error messages are in UTF-8';
1804 report_result($Locale, ++$locales_test_number, $ok15);
1805 $test_names{$locales_test_number} = 'Verify that a number with a UTF-8 radix has a UTF-8 stringification';
1807 report_result($Locale, ++$locales_test_number, $ok16);
1808 $test_names{$locales_test_number} = 'Verify that a sprintf of a number with a UTF-8 radix yields UTF-8';
1810 report_result($Locale, ++$locales_test_number, $ok17);
1811 $test_names{$locales_test_number} = 'Verify that a sprintf of a number outside locale scope uses a dot radix';
1813 report_result($Locale, ++$locales_test_number, $ok18);
1814 $test_names{$locales_test_number} = 'Verify that a sprintf of a number back within locale scope uses locale radix';
1816 debug "# $first_f_test..$locales_test_number: \$f = $f, \$g = $g, back to locale = $Locale\n";
1818 # Does taking lc separately differ from taking
1819 # the lc "in-line"? (This was the bug 19990704.002, change #3568.)
1820 # The bug was in the caching of the 'o'-magic.
1821 if (! $is_utf8_locale) {
1827 return $lc0 cmp $lc1;
1831 return lc($_[0]) cmp lc($_[1]);
1838 report_result($Locale, ++$locales_test_number,
1839 lcA($x, $y) == 1 && lcB($x, $y) == 1 ||
1840 lcA($x, $z) == 0 && lcB($x, $z) == 0);
1843 use locale ':not_characters';
1848 return $lc0 cmp $lc1;
1852 return lc($_[0]) cmp lc($_[1]);
1859 report_result($Locale, ++$locales_test_number,
1860 lcC($x, $y) == 1 && lcD($x, $y) == 1 ||
1861 lcC($x, $z) == 0 && lcD($x, $z) == 0);
1863 $test_names{$locales_test_number} = 'Verify "lc(foo) cmp lc(bar)" is the same as using intermediaries for the cmp';
1865 # Does lc of an UPPER (if different from the UPPER) match
1866 # case-insensitively the UPPER, and does the UPPER match
1867 # case-insensitively the lc of the UPPER. And vice versa.
1871 my $re = qr/[\[\(\{\*\+\?\|\^\$\\]/;
1874 ++$locales_test_number;
1875 $test_names{$locales_test_number} = 'Verify case insensitive matching works';
1876 foreach my $x (sort keys %UPPER) {
1877 if (! $is_utf8_locale) {
1879 next unless uc $y eq $x;
1880 debug_more( "# UPPER=", disp_chars(($x)),
1881 "; lc=", disp_chars(($y)), "; ",
1882 "; fc=", disp_chars((fc $x)), "; ",
1883 disp_chars(($x)), "=~/", disp_chars(($y)), "/i=",
1884 $x =~ /$y/i ? 1 : 0,
1886 disp_chars(($y)), "=~/", disp_chars(($x)), "/i=",
1887 $y =~ /$x/i ? 1 : 0,
1890 # If $x and $y contain regular expression characters
1891 # AND THEY lowercase (/i) to regular expression characters,
1892 # regcomp() will be mightily confused. No, the \Q doesn't
1893 # help here (maybe regex engine internal lowercasing
1894 # is done after the \Q?) An example of this happening is
1895 # the bg_BG (Bulgarian) locale under EBCDIC (OS/390 USS):
1896 # the chr(173) (the "[") is the lowercase of the chr(235).
1898 # Similarly losing EBCDIC locales include cs_cz, cs_CZ,
1899 # el_gr, el_GR, en_us.IBM-037 (!), en_US.IBM-037 (!),
1900 # et_ee, et_EE, hr_hr, hr_HR, hu_hu, hu_HU, lt_LT,
1901 # mk_mk, mk_MK, nl_nl.IBM-037, nl_NL.IBM-037,
1902 # pl_pl, pl_PL, ro_ro, ro_RO, ru_ru, ru_RU,
1903 # sk_sk, sk_SK, sl_si, sl_SI, tr_tr, tr_TR.
1905 # Similar things can happen even under (bastardised)
1906 # non-EBCDIC locales: in many European countries before the
1907 # advent of ISO 8859-x nationally customised versions of
1908 # ISO 646 were devised, reusing certain punctuation
1909 # characters for modified characters needed by the
1910 # country/language. For example, the "|" might have
1911 # stood for U+00F6 or LATIN SMALL LETTER O WITH DIAERESIS.
1913 if ($x =~ $re || $y =~ $re) {
1914 print "# Regex characters in '$x' or '$y', skipping test $locales_test_number for locale '$Locale'\n";
1917 push @f, $x unless $x =~ /$y/i && $y =~ /$x/i;
1919 # fc is not a locale concept, so Perl uses lc for it.
1920 push @f, $x unless lc $x eq fc $x;
1923 use locale ':not_characters';
1925 next unless uc $y eq $x;
1926 debug_more( "# UPPER=", disp_chars(($x)),
1927 "; lc=", disp_chars(($y)), "; ",
1928 "; fc=", disp_chars((fc $x)), "; ",
1929 disp_chars(($x)), "=~/", disp_chars(($y)), "/i=",
1930 $x =~ /$y/i ? 1 : 0,
1932 disp_chars(($y)), "=~/", disp_chars(($x)), "/i=",
1933 $y =~ /$x/i ? 1 : 0,
1936 push @f, $x unless $x =~ /$y/i && $y =~ /$x/i;
1938 # The places where Unicode's lc is different from fc are
1939 # skipped here by virtue of the 'next unless uc...' line above
1940 push @f, $x unless lc $x eq fc $x;
1944 foreach my $x (sort keys %lower) {
1945 if (! $is_utf8_locale) {
1947 next unless lc $y eq $x;
1948 debug_more( "# lower=", disp_chars(($x)),
1949 "; uc=", disp_chars(($y)), "; ",
1950 "; fc=", disp_chars((fc $x)), "; ",
1951 disp_chars(($x)), "=~/", disp_chars(($y)), "/i=",
1952 $x =~ /$y/i ? 1 : 0,
1954 disp_chars(($y)), "=~/", disp_chars(($x)), "/i=",
1955 $y =~ /$x/i ? 1 : 0,
1957 if ($x =~ $re || $y =~ $re) { # See above.
1958 print "# Regex characters in '$x' or '$y', skipping test $locales_test_number for locale '$Locale'\n";
1961 push @f, $x unless $x =~ /$y/i && $y =~ /$x/i;
1963 push @f, $x unless lc $x eq fc $x;
1966 use locale ':not_characters';
1968 next unless lc $y eq $x;
1969 debug_more( "# lower=", disp_chars(($x)),
1970 "; uc=", disp_chars(($y)), "; ",
1971 "; fc=", disp_chars((fc $x)), "; ",
1972 disp_chars(($x)), "=~/", disp_chars(($y)), "/i=",
1973 $x =~ /$y/i ? 1 : 0,
1975 disp_chars(($y)), "=~/", disp_chars(($x)), "/i=",
1976 $y =~ /$x/i ? 1 : 0,
1978 push @f, $x unless $x =~ /$y/i && $y =~ /$x/i;
1980 push @f, $x unless lc $x eq fc $x;
1983 report_multi_result($Locale, $locales_test_number, \@f);
1984 push @problematical_tests, $locales_test_number;
1990 ++$locales_test_number;
1991 $test_names{$locales_test_number} = 'Verify atof with locale radix and negative exponent';
1993 my $radix = POSIX::localeconv()->{decimal_point};
1995 "3.14e+9", "3${radix}14e+9", "3.14e-9", "3${radix}14e-9",
1996 "-3.14e+9", "-3${radix}14e+9", "-3.14e-9", "-3${radix}14e-9",
1999 if (! $is_utf8_locale) {
2001 for my $num (@nums) {
2003 unless sprintf("%g", $num) =~ /3.+14/;
2007 use locale ':not_characters';
2008 for my $num (@nums) {
2010 unless sprintf("%g", $num) =~ /3.+14/;
2014 report_result($Locale, $locales_test_number, @f == 0);
2016 print "# failed $locales_test_number locale '$Locale' numbers @f\n"
2021 my $final_locales_test_number = $locales_test_number;
2023 # Recount the errors.
2025 foreach $test_num ($first_locales_test_number..$final_locales_test_number) {
2026 if (%setlocale_failed) {
2029 elsif ($Problem{$test_num} || !defined $Okay{$test_num} || !@{$Okay{$test_num}}) {
2030 if (defined $not_necessarily_a_problem_test_number
2031 && $test_num == $not_necessarily_a_problem_test_number)
2033 print "# The failure of test $not_necessarily_a_problem_test_number is not necessarily fatal.\n";
2034 print "# It usually indicates a problem in the environment,\n";
2035 print "# not in Perl itself.\n";
2037 if ($Okay{$test_num} && grep { $_ == $test_num } @problematical_tests) {
2038 # Round to nearest .1%
2039 my $percent_fail = (int(.5 + (1000 * scalar(keys $Problem{$test_num})
2040 / scalar(@Locale))))
2042 if (! $debug && $percent_fail < $acceptable_fold_failure_percentage)
2044 $test_names{$test_num} .= 'TODO';
2045 print "# ", 100 - $percent_fail, "% of locales pass the following test, so it is likely that the failures\n";
2046 print "# are errors in the locale definitions. The test is marked TODO, as the\n";
2047 print "# problem is not likely to be Perl's\n";
2052 print "# The code points that had this failure are given above. Look for lines\n";
2053 print "# that match 'failed $test_num'\n";
2056 print "# For more details, rerun, with environment variable PERL_DEBUG_FULL_TEST=1.\n";
2057 print "# Then look at that output for lines that match 'failed $test_num'\n";
2061 print "ok $test_num";
2062 if (defined $test_names{$test_num}) {
2063 # If TODO is in the test name, make it thus
2064 my $todo = $test_names{$test_num} =~ s/TODO\s*//;
2065 print " $test_names{$test_num}";
2066 print " # TODO" if $todo;
2071 $test_num = $final_locales_test_number;
2073 unless ( $^O eq 'dragonfly' ) {
2077 local $SIG{__WARN__} = sub {
2078 $warned = $_[0] =~ /uninitialized/;
2080 my $z = "y" . setlocale(&POSIX::LC_ALL, "xyzzy");
2081 ok($warned, "variable set to setlocale(BAD LOCALE) is considered uninitialized");
2084 # Test that tainting and case changing works on utf8 strings. These tests are
2085 # placed last to avoid disturbing the hard-coded test numbers that existed at
2086 # the time these were added above this in this file.
2087 # This also tests that locale overrides unicode_strings in the same scope for
2089 setlocale(&POSIX::LC_ALL, "C");
2092 use feature 'unicode_strings';
2094 foreach my $function ("uc", "ucfirst", "lc", "lcfirst", "fc") {
2095 my @list; # List of code points to test for $function
2097 # Used to calculate the changed case for ASCII characters by using the
2098 # ord, instead of using one of the functions under test.
2099 my $ascii_case_change_delta;
2100 my $above_latin1_case_change_delta; # Same for the specific ords > 255
2103 # We test an ASCII character, which should change case and be tainted;
2104 # a Latin1 character, which shouldn't change case under this C locale,
2106 # an above-Latin1 character that when the case is changed would cross
2107 # the 255/256 boundary, so doesn't change case and isn't tainted
2108 # (the \x{149} is one of these, but changes into 2 characters, the
2109 # first one of which doesn't cross the boundary.
2110 # the final one in each list is an above-Latin1 character whose case
2111 # does change, and shouldn't be tainted. The code below uses its
2112 # position in its list as a marker to indicate that it, unlike the
2113 # other code points above ASCII, has a successful case change
2114 if ($function =~ /^u/) {
2115 @list = ("", "a", "\xe0", "\xff", "\x{fb00}", "\x{149}", "\x{101}");
2116 $ascii_case_change_delta = -32;
2117 $above_latin1_case_change_delta = -1;
2120 @list = ("", "A", "\xC0", "\x{17F}", "\x{100}");
2121 $ascii_case_change_delta = +32;
2122 $above_latin1_case_change_delta = +1;
2124 foreach my $is_utf8_locale (0 .. 1) {
2125 foreach my $j (0 .. $#list) {
2126 my $char = $list[$j];
2128 for my $encoded_in_utf8 (0 .. 1) {
2131 if (! $is_utf8_locale) {
2132 $should_be = ($j == $#list)
2133 ? chr(ord($char) + $above_latin1_case_change_delta)
2134 : (length $char == 0 || ord($char) > 127)
2136 : chr(ord($char) + $ascii_case_change_delta);
2138 # This monstrosity is in order to avoid using an eval,
2139 # which might perturb the results
2140 $changed = ($function eq "uc")
2142 : ($function eq "ucfirst")
2144 : ($function eq "lc")
2146 : ($function eq "lcfirst")
2148 : ($function eq "fc")
2150 : die("Unexpected function \"$function\"");
2156 # For utf8-locales the case changing functions
2157 # should work just like they do outside of locale.
2158 # Can use eval here because not testing it when
2160 $should_be = eval "$function('$char')";
2161 die "Unexpected eval error $@ from 'eval \"$function('$char')\"'" if $@;
2164 use locale ':not_characters';
2165 $changed = ($function eq "uc")
2167 : ($function eq "ucfirst")
2169 : ($function eq "lc")
2171 : ($function eq "lcfirst")
2173 : ($function eq "fc")
2175 : die("Unexpected function \"$function\"");
2177 ok($changed eq $should_be,
2178 "$function(\"$char\") in C locale "
2179 . (($is_utf8_locale)
2180 ? "(use locale ':not_characters'"
2182 . (($encoded_in_utf8)
2183 ? "; encoded in utf8)"
2184 : "; not encoded in utf8)")
2185 . " should be \"$should_be\", got \"$changed\"");
2187 # Tainting shouldn't happen for utf8 locales, empty
2188 # strings, or those characters above 255.
2189 (! $is_utf8_locale && length($char) > 0 && ord($char) < 256)
2190 ? check_taint($changed)
2191 : check_taint_not($changed);
2193 # Use UTF-8 next time through the loop
2194 utf8::upgrade($char);
2201 # Give final advice.
2205 foreach ($first_locales_test_number..$final_locales_test_number) {
2207 my @f = sort keys %{ $Problem{$_} };
2208 my $f = join(" ", @f);
2209 $f =~ s/(.{50,60}) /$1\n#\t/g;
2212 "# The locale ", (@f == 1 ? "definition" : "definitions"), "\n#\n",
2214 "# on your system may have errors because the locale test $_\n",
2215 "# \"$test_names{$_}\"\n",
2216 "# failed in ", (@f == 1 ? "that locale" : "those locales"),
2220 # If your users are not using these locales you are safe for the moment,
2221 # but please report this failure first to perlbug\@perl.com using the
2222 # perlbug script (as described in the INSTALL file) so that the exact
2223 # details of the failures can be sorted out first and then your operating
2224 # system supplier can be alerted about these anomalies.
2231 # Tell which locales were okay and which were not.
2236 foreach my $l (@Locale) {
2238 if ($setlocale_failed{$l}) {
2243 ($first_locales_test_number..$final_locales_test_number)
2245 $p++ if $Problem{$t}{$l};
2248 push @s, $l if $p == 0;
2249 push @F, $l unless $p == 0;
2253 my $s = join(" ", @s);
2254 $s =~ s/(.{50,60}) /$1\n#\t/g;
2257 "# The following locales\n#\n",
2259 "# tested okay.\n#\n",
2261 warn "# None of your locales were fully okay.\n";
2265 my $F = join(" ", @F);
2266 $F =~ s/(.{50,60}) /$1\n#\t/g;
2270 $details = "# For more details, rerun, with environment variable PERL_DEBUG_FULL_TEST=1.\n";
2272 elsif ($debug == 1) {
2273 $details = "# For even more details, rerun, with environment variable PERL_DEBUG_FULL_TEST=2.\n";
2277 "# The following locales\n#\n",
2279 "# had problems.\n#\n",
2282 warn "# None of your locales were broken.\n";
2286 print "1..$test_num\n";