3 # This tests plain 'use locale' and adorned 'use locale ":not_characters"'
4 # Because these pragmas are compile time, and I (khw) am trying to test
5 # without using 'eval' as much as possible, which might cloud the issue, the
6 # crucial parts of the code are duplicated in a block for each pragma.
8 # To make a TODO test, add the string 'TODO' to its %test_names value
10 binmode STDOUT, ':utf8';
11 binmode STDERR, ':utf8';
17 require Config; import Config;
18 if (!$Config{d_setlocale} || $Config{ccflags} =~ /\bD?NO_LOCALE\b/) {
22 require './loc_tools.pl';
29 # =1 adds debugging output; =2 increases the verbosity somewhat
30 my $debug = $ENV{PERL_DEBUG_FULL_TEST} // 0;
32 # Certain tests have been shown to be problematical for a few locales. Don't
33 # fail them unless at least this percentage of the tested locales fail.
34 # Some Windows machines are defective in every locale but the C, calling \t
35 # printable; superscripts to be digits, etc. See
36 # http://markmail.org/message/5jwam4xsx4amsdnv. Also on AIX machines, many
37 # locales call a no-break space a graphic.
38 # (There aren't 1000 locales currently in existence, so 99.9 works)
39 my $acceptable_fold_failure_percentage = ($^O =~ / ^ ( MSWin32 | AIX ) $ /ix)
43 # The list of test numbers of the problematic tests.
44 my @problematical_tests;
49 my $dumper = Dumpvalue->new(
56 my($mess) = join "", @_;
58 print $dumper->stringify($mess,1), "\n";
62 return unless $debug > 1;
75 my ($result, $message) = @_;
76 $message = "" unless defined $message;
78 print 'not ' unless ($result);
79 print "ok " . ++$test_num;
84 # First we'll do a lot of taint checking for locales.
85 # This is the easiest to test, actually, as any locale,
86 # even the default locale will taint under 'use locale'.
88 sub is_tainted { # hello, camel two.
89 no warnings 'uninitialized' ;
92 not eval { $dummy = join("", @_), kill 0; 1 }
95 sub check_taint ($;$) {
96 my $message_tail = $_[1] // "";
97 $message_tail = ": $message_tail" if $message_tail;
98 ok is_tainted($_[0]), "verify that is tainted$message_tail";
101 sub check_taint_not ($;$) {
102 my $message_tail = $_[1] // "";
103 $message_tail = ": $message_tail" if $message_tail;
104 ok((not is_tainted($_[0])), "verify that isn't tainted$message_tail");
107 "\tb\t" =~ /^m?(\s)(.*)\1$/;
108 check_taint_not $&, "not tainted outside 'use locale'";
111 use locale; # engage locale and therefore locale taint.
117 check_taint ucfirst($a);
123 check_taint lcfirst($a);
126 check_taint_not sprintf('%e', 123.456);
127 check_taint_not sprintf('%f', 123.456);
128 check_taint_not sprintf('%g', 123.456);
129 check_taint_not sprintf('%d', 123.456);
130 check_taint_not sprintf('%x', 123.456);
132 $_ = $a; # untaint $_
134 $_ = uc($a); # taint $_
138 /(\w)/; # taint $&, $`, $', $+, $1.
146 /(.)/; # untaint $&, $`, $', $+, $1.
154 /(\W)/; # taint $&, $`, $', $+, $1.
162 /(\s)/; # taint $&, $`, $', $+, $1.
170 /(\S)/; # taint $&, $`, $', $+, $1.
178 $_ = $a; # untaint $_
182 /(b)/; # this must not taint
190 $_ = $a; # untaint $_
194 $b = uc($a); # taint $b
195 s/(.+)/$b/; # this must taint only the $_
205 $_ = $a; # untaint $_
207 s/(.+)/b/; # this must not taint
216 $b = $a; # untaint $b
218 ($b = $a) =~ s/\w/$&/;
219 check_taint $b; # $b should be tainted.
220 check_taint_not $a; # $a should be not.
222 $_ = $a; # untaint $_
224 s/(\w)/\l$1/; # this must taint
233 $_ = $a; # untaint $_
235 s/(\w)/\L$1/; # this must taint
244 $_ = $a; # untaint $_
246 s/(\w)/\u$1/; # this must taint
255 $_ = $a; # untaint $_
257 s/(\w)/\U$1/; # this must taint
266 # After all this tainting $a should be cool.
271 check_taint_not $1, '"a" =~ /([a-z])/';
272 "foo.bar_baz" =~ /^(.*)[._](.*?)$/; # Bug 120675
273 check_taint_not $1, '"foo.bar_baz" =~ /^(.*)[._](.*?)$/';
275 # BE SURE TO COPY ANYTHING YOU ADD to the block below
277 { # This is just the previous tests copied here with a different
278 # compile-time pragma.
280 use locale ':not_characters'; # engage restricted locale with different
285 check_taint_not uc($a);
286 check_taint_not "\U$a";
287 check_taint_not ucfirst($a);
288 check_taint_not "\u$a";
289 check_taint_not lc($a);
290 check_taint_not fc($a);
291 check_taint_not "\L$a";
292 check_taint_not "\F$a";
293 check_taint_not lcfirst($a);
294 check_taint_not "\l$a";
296 check_taint_not sprintf('%e', 123.456);
297 check_taint_not sprintf('%f', 123.456);
298 check_taint_not sprintf('%g', 123.456);
299 check_taint_not sprintf('%d', 123.456);
300 check_taint_not sprintf('%x', 123.456);
302 $_ = $a; # untaint $_
304 $_ = uc($a); # taint $_
308 /(\w)/; # taint $&, $`, $', $+, $1.
316 /(.)/; # untaint $&, $`, $', $+, $1.
324 /(\W)/; # taint $&, $`, $', $+, $1.
332 /(\s)/; # taint $&, $`, $', $+, $1.
340 /(\S)/; # taint $&, $`, $', $+, $1.
348 $_ = $a; # untaint $_
352 /(b)/; # this must not taint
360 $_ = $a; # untaint $_
364 $b = uc($a); # taint $b
365 s/(.+)/$b/; # this must taint only the $_
375 $_ = $a; # untaint $_
377 s/(.+)/b/; # this must not taint
386 $b = $a; # untaint $b
388 ($b = $a) =~ s/\w/$&/;
389 check_taint_not $b; # $b should be tainted.
390 check_taint_not $a; # $a should be not.
392 $_ = $a; # untaint $_
394 s/(\w)/\l$1/; # this must taint
403 $_ = $a; # untaint $_
405 s/(\w)/\L$1/; # this must taint
414 $_ = $a; # untaint $_
416 s/(\w)/\u$1/; # this must taint
425 $_ = $a; # untaint $_
427 s/(\w)/\U$1/; # this must taint
436 # After all this tainting $a should be cool.
441 check_taint_not $1, '"a" =~ /([a-z])/';
442 "foo.bar_baz" =~ /^(.*)[._](.*?)$/; # Bug 120675
443 check_taint_not $1, '"foo.bar_baz" =~ /^(.*)[._](.*?)$/';
446 # Here are in scope of 'use locale'
448 # I think we've seen quite enough of taint.
449 # Let us do some *real* locale work now,
450 # unless setlocale() is missing (i.e. minitest).
452 # The test number before our first setlocale()
453 my $final_without_setlocale = $test_num;
457 debug "# Scanning for locales...\n";
459 require POSIX; import POSIX ':locale_h';
461 my @Locale = find_locales([ &POSIX::LC_CTYPE, &POSIX::LC_ALL ]);
463 debug "# Locales =\n";
469 print "1..$test_num\n";
474 setlocale(&POSIX::LC_ALL, "C");
481 my @Added_alpha; # Alphas that aren't in the C locale.
485 # This returns a display string denoting the input parameter @_, each
486 # entry of which is a single character in the range 0-255. The first part
487 # of the output is a string of the characters in @_ that are ASCII
488 # graphics, and hence unambiguously displayable. They are given by code
489 # point order. The second part is the remaining code points, the ordinals
490 # of which are each displayed as 2-digit hex. Blanks are inserted so as
491 # to keep anything from the first part looking like a 2-digit hex number.
494 my @chars = sort { ord $a <=> ord $b } @_;
498 push @chars, chr(258); # This sentinel simplifies the loop termination
500 foreach my $i (0 .. @chars - 1) {
501 my $char = $chars[$i];
505 # We avoid using [:posix:] classes, as these are being tested in this
506 # file. Each equivalence class below is for things that can appear in
507 # a range; those that can't be in a range have class -1. 0 for those
508 # which should be output in hex; and >0 for the other ranges
509 if ($char =~ /[A-Z]/) {
512 elsif ($char =~ /[a-z]/) {
515 elsif ($char =~ /[0-9]/) {
518 # Uncomment to get literal punctuation displayed instead of hex
519 #elsif ($char =~ /[[\]!"#\$\%&\'()*+,.\/:\\;<=>?\@\^_`{|}~-]/) {
520 # $class = -1; # Punct never appears in a range
523 $class = 0; # Output in hex
526 if (! defined $range_start) {
528 $output .= " " . $char;
531 $range_start = ord $char;
532 $start_class = $class;
534 } # A range ends if not consecutive, or the class-type changes
535 elsif (ord $char != ($range_end = ord($chars[$i-1])) + 1
536 || $class != $start_class)
539 # Here, the current character is not in the range. This means the
540 # previous character must have been. Output the range up through
542 my $range_length = $range_end - $range_start + 1;
543 if ($start_class > 0) {
544 $output .= " " . chr($range_start);
545 $output .= "-" . chr($range_end) if $range_length > 1;
548 $output .= sprintf(" %02X", $range_start);
549 $output .= sprintf("-%02X", $range_end) if $range_length > 1;
552 # Handle the new current character, as potentially beginning a new
564 my ($Locale, $i, $pass_fail, $message) = @_;
566 $message = " ($message)" if $message;
567 unless ($pass_fail) {
568 $Problem{$i}{$Locale} = 1;
569 debug "# failed $i ($test_names{$i}) with locale '$Locale'$message\n";
571 push @{$Okay{$i}}, $Locale;
575 sub report_multi_result {
576 my ($Locale, $i, $results_ref) = @_;
578 # $results_ref points to an array, each element of which is a character that was
579 # in error for this test numbered '$i'. If empty, the test passed
583 $message = join " ", "for", disp_chars(@$results_ref);
585 report_result($Locale, $i, @$results_ref == 0, $message);
588 my $first_locales_test_number = $final_without_setlocale + 1;
589 my $locales_test_number;
590 my $not_necessarily_a_problem_test_number;
591 my $first_casing_test_number;
592 my %setlocale_failed; # List of locales that setlocale() didn't work on
594 foreach my $Locale (@Locale) {
595 $locales_test_number = $first_locales_test_number - 1;
597 debug "# Locale = $Locale\n";
599 unless (setlocale(&POSIX::LC_ALL, $Locale)) {
600 $setlocale_failed{$Locale} = $Locale;
604 # We test UTF-8 locales only under ':not_characters'; It is easier to
605 # test them in other test files than here. Non- UTF-8 locales are tested
606 # only under plain 'use locale', as otherwise we would have to convert
607 # everything in them to Unicode.
609 my %UPPER = (); # All alpha X for which uc(X) == X and lc(X) != X
610 my %lower = (); # All alpha X for which lc(X) == X and uc(X) != X
611 my %BoThCaSe = (); # All alpha X for which uc(X) == lc(X) == X
613 my $is_utf8_locale = is_locale_utf8($Locale);
615 debug "# is utf8 locale? = $is_utf8_locale\n";
617 my $radix = localeconv()->{decimal_point};
618 if ($radix !~ / ^ [[:ascii:]] + $/x) {
620 $radix = disp_chars(split "", $radix);
622 debug "# radix = $radix\n";
624 if (! $is_utf8_locale) {
626 @{$posixes{'word'}} = grep /\w/, map { chr } 0..255;
627 @{$posixes{'digit'}} = grep /\d/, map { chr } 0..255;
628 @{$posixes{'space'}} = grep /\s/, map { chr } 0..255;
629 @{$posixes{'alpha'}} = grep /[[:alpha:]]/, map {chr } 0..255;
630 @{$posixes{'alnum'}} = grep /[[:alnum:]]/, map {chr } 0..255;
631 @{$posixes{'ascii'}} = grep /[[:ascii:]]/, map {chr } 0..255;
632 @{$posixes{'blank'}} = grep /[[:blank:]]/, map {chr } 0..255;
633 @{$posixes{'cntrl'}} = grep /[[:cntrl:]]/, map {chr } 0..255;
634 @{$posixes{'graph'}} = grep /[[:graph:]]/, map {chr } 0..255;
635 @{$posixes{'lower'}} = grep /[[:lower:]]/, map {chr } 0..255;
636 @{$posixes{'print'}} = grep /[[:print:]]/, map {chr } 0..255;
637 @{$posixes{'punct'}} = grep /[[:punct:]]/, map {chr } 0..255;
638 @{$posixes{'upper'}} = grep /[[:upper:]]/, map {chr } 0..255;
639 @{$posixes{'xdigit'}} = grep /[[:xdigit:]]/, map {chr } 0..255;
640 @{$posixes{'cased'}} = grep /[[:upper:]]/i, map {chr } 0..255;
642 # Sieve the uppercase and the lowercase.
644 for (@{$posixes{'word'}}) {
645 if (/[^\d_]/) { # skip digits and the _
656 use locale ':not_characters';
657 @{$posixes{'word'}} = grep /\w/, map { chr } 0..255;
658 @{$posixes{'digit'}} = grep /\d/, map { chr } 0..255;
659 @{$posixes{'space'}} = grep /\s/, map { chr } 0..255;
660 @{$posixes{'alpha'}} = grep /[[:alpha:]]/, map {chr } 0..255;
661 @{$posixes{'alnum'}} = grep /[[:alnum:]]/, map {chr } 0..255;
662 @{$posixes{'ascii'}} = grep /[[:ascii:]]/, map {chr } 0..255;
663 @{$posixes{'blank'}} = grep /[[:blank:]]/, map {chr } 0..255;
664 @{$posixes{'cntrl'}} = grep /[[:cntrl:]]/, map {chr } 0..255;
665 @{$posixes{'graph'}} = grep /[[:graph:]]/, map {chr } 0..255;
666 @{$posixes{'lower'}} = grep /[[:lower:]]/, map {chr } 0..255;
667 @{$posixes{'print'}} = grep /[[:print:]]/, map {chr } 0..255;
668 @{$posixes{'punct'}} = grep /[[:punct:]]/, map {chr } 0..255;
669 @{$posixes{'upper'}} = grep /[[:upper:]]/, map {chr } 0..255;
670 @{$posixes{'xdigit'}} = grep /[[:xdigit:]]/, map {chr } 0..255;
671 @{$posixes{'cased'}} = grep /[[:upper:]]/i, map {chr } 0..255;
672 for (@{$posixes{'word'}}) {
673 if (/[^\d_]/) { # skip digits and the _
684 # Ordered, where possible, in groups of "this is a subset of the next
686 debug "# :upper: = ", disp_chars(@{$posixes{'upper'}}), "\n";
687 debug "# :lower: = ", disp_chars(@{$posixes{'lower'}}), "\n";
688 debug "# :cased: = ", disp_chars(@{$posixes{'cased'}}), "\n";
689 debug "# :alpha: = ", disp_chars(@{$posixes{'alpha'}}), "\n";
690 debug "# :alnum: = ", disp_chars(@{$posixes{'alnum'}}), "\n";
691 debug "# w = ", disp_chars(@{$posixes{'word'}}), "\n";
692 debug "# :graph: = ", disp_chars(@{$posixes{'graph'}}), "\n";
693 debug "# :print: = ", disp_chars(@{$posixes{'print'}}), "\n";
694 debug "# d = ", disp_chars(@{$posixes{'digit'}}), "\n";
695 debug "# :xdigit: = ", disp_chars(@{$posixes{'xdigit'}}), "\n";
696 debug "# :blank: = ", disp_chars(@{$posixes{'blank'}}), "\n";
697 debug "# s = ", disp_chars(@{$posixes{'space'}}), "\n";
698 debug "# :punct: = ", disp_chars(@{$posixes{'punct'}}), "\n";
699 debug "# :cntrl: = ", disp_chars(@{$posixes{'cntrl'}}), "\n";
700 debug "# :ascii: = ", disp_chars(@{$posixes{'ascii'}}), "\n";
702 foreach (keys %UPPER) {
704 $BoThCaSe{$_}++ if exists $lower{$_};
706 foreach (keys %lower) {
707 $BoThCaSe{$_}++ if exists $UPPER{$_};
709 foreach (keys %BoThCaSe) {
715 foreach my $ord ( 0 .. 255 ) {
716 $Unassigned{chr $ord} = 1;
718 foreach my $class (keys %posixes) {
719 foreach my $char (@{$posixes{$class}}) {
720 delete $Unassigned{$char};
724 debug "# UPPER = ", disp_chars(sort { ord $a <=> ord $b } keys %UPPER), "\n";
725 debug "# lower = ", disp_chars(sort { ord $a <=> ord $b } keys %lower), "\n";
726 debug "# BoThCaSe = ", disp_chars(sort { ord $a <=> ord $b } keys %BoThCaSe), "\n";
727 debug "# Unassigned = ", disp_chars(sort { ord $a <=> ord $b } keys %Unassigned), "\n";
731 foreach my $x (sort { ord $a <=> ord $b } keys %UPPER) {
734 if ($is_utf8_locale) {
735 use locale ':not_characters';
736 $ok = $x =~ /[[:upper:]]/;
737 $fold_ok = $x =~ /[[:lower:]]/i;
741 $ok = $x =~ /[[:upper:]]/;
742 $fold_ok = $x =~ /[[:lower:]]/i;
744 push @failures, $x unless $ok;
745 push @fold_failures, $x unless $fold_ok;
747 $locales_test_number++;
748 $first_casing_test_number = $locales_test_number;
749 $test_names{$locales_test_number} = 'Verify that /[[:upper:]]/ matches all alpha X for which uc(X) == X and lc(X) != X';
750 report_multi_result($Locale, $locales_test_number, \@failures);
752 $locales_test_number++;
754 $test_names{$locales_test_number} = 'Verify that /[[:lower:]]/i matches all alpha X for which uc(X) == X and lc(X) != X';
755 report_multi_result($Locale, $locales_test_number, \@fold_failures);
758 undef @fold_failures;
760 foreach my $x (sort { ord $a <=> ord $b } keys %lower) {
763 if ($is_utf8_locale) {
764 use locale ':not_characters';
765 $ok = $x =~ /[[:lower:]]/;
766 $fold_ok = $x =~ /[[:upper:]]/i;
770 $ok = $x =~ /[[:lower:]]/;
771 $fold_ok = $x =~ /[[:upper:]]/i;
773 push @failures, $x unless $ok;
774 push @fold_failures, $x unless $fold_ok;
777 $locales_test_number++;
778 $test_names{$locales_test_number} = 'Verify that /[[:lower:]]/ matches all alpha X for which lc(X) == X and uc(X) != X';
779 report_multi_result($Locale, $locales_test_number, \@failures);
781 $locales_test_number++;
782 $test_names{$locales_test_number} = 'Verify that /[[:upper:]]/i matches all alpha X for which lc(X) == X and uc(X) != X';
783 report_multi_result($Locale, $locales_test_number, \@fold_failures);
785 { # Find the alphabetic characters that are not considered alphabetics
786 # in the default (C) locale.
791 for (keys %UPPER, keys %lower, keys %BoThCaSe) {
792 push(@Added_alpha, $_) if (/\W/);
796 @Added_alpha = sort { ord $a <=> ord $b } @Added_alpha;
798 debug "# Added_alpha = ", disp_chars(@Added_alpha), "\n";
800 # Cross-check the whole 8-bit character set.
802 ++$locales_test_number;
804 $test_names{$locales_test_number} = 'Verify that \w and [:word:] are identical';
805 for (map { chr } 0..255) {
806 if ($is_utf8_locale) {
807 use locale ':not_characters';
808 push @f, $_ unless /[[:word:]]/ == /\w/;
811 push @f, $_ unless /[[:word:]]/ == /\w/;
814 report_multi_result($Locale, $locales_test_number, \@f);
816 ++$locales_test_number;
818 $test_names{$locales_test_number} = 'Verify that \d and [:digit:] are identical';
819 for (map { chr } 0..255) {
820 if ($is_utf8_locale) {
821 use locale ':not_characters';
822 push @f, $_ unless /[[:digit:]]/ == /\d/;
825 push @f, $_ unless /[[:digit:]]/ == /\d/;
828 report_multi_result($Locale, $locales_test_number, \@f);
830 ++$locales_test_number;
832 $test_names{$locales_test_number} = 'Verify that \s and [:space:] are identical';
833 for (map { chr } 0..255) {
834 if ($is_utf8_locale) {
835 use locale ':not_characters';
836 push @f, $_ unless /[[:space:]]/ == /\s/;
839 push @f, $_ unless /[[:space:]]/ == /\s/;
842 report_multi_result($Locale, $locales_test_number, \@f);
844 ++$locales_test_number;
846 $test_names{$locales_test_number} = 'Verify that [:posix:] and [:^posix:] are mutually exclusive';
847 for (map { chr } 0..255) {
848 if ($is_utf8_locale) {
849 use locale ':not_characters';
850 push @f, $_ unless (/[[:alpha:]]/ xor /[[:^alpha:]]/) ||
851 (/[[:alnum:]]/ xor /[[:^alnum:]]/) ||
852 (/[[:ascii:]]/ xor /[[:^ascii:]]/) ||
853 (/[[:blank:]]/ xor /[[:^blank:]]/) ||
854 (/[[:cntrl:]]/ xor /[[:^cntrl:]]/) ||
855 (/[[:digit:]]/ xor /[[:^digit:]]/) ||
856 (/[[:graph:]]/ xor /[[:^graph:]]/) ||
857 (/[[:lower:]]/ xor /[[:^lower:]]/) ||
858 (/[[:print:]]/ xor /[[:^print:]]/) ||
859 (/[[:space:]]/ xor /[[:^space:]]/) ||
860 (/[[:upper:]]/ xor /[[:^upper:]]/) ||
861 (/[[:word:]]/ xor /[[:^word:]]/) ||
862 (/[[:xdigit:]]/ xor /[[:^xdigit:]]/) ||
864 # effectively is what [:cased:] would be if it existed.
865 (/[[:upper:]]/i xor /[[:^upper:]]/i);
868 push @f, $_ unless (/[[:alpha:]]/ xor /[[:^alpha:]]/) ||
869 (/[[:alnum:]]/ xor /[[:^alnum:]]/) ||
870 (/[[:ascii:]]/ xor /[[:^ascii:]]/) ||
871 (/[[:blank:]]/ xor /[[:^blank:]]/) ||
872 (/[[:cntrl:]]/ xor /[[:^cntrl:]]/) ||
873 (/[[:digit:]]/ xor /[[:^digit:]]/) ||
874 (/[[:graph:]]/ xor /[[:^graph:]]/) ||
875 (/[[:lower:]]/ xor /[[:^lower:]]/) ||
876 (/[[:print:]]/ xor /[[:^print:]]/) ||
877 (/[[:space:]]/ xor /[[:^space:]]/) ||
878 (/[[:upper:]]/ xor /[[:^upper:]]/) ||
879 (/[[:word:]]/ xor /[[:^word:]]/) ||
880 (/[[:xdigit:]]/ xor /[[:^xdigit:]]/) ||
881 (/[[:upper:]]/i xor /[[:^upper:]]/i);
884 report_multi_result($Locale, $locales_test_number, \@f);
886 # The rules for the relationships are given in:
887 # http://www.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap07.html
890 ++$locales_test_number;
892 $test_names{$locales_test_number} = 'Verify that [:lower:] contains at least a-z';
894 if ($is_utf8_locale) {
895 use locale ':not_characters';
896 push @f, $_ unless /[[:lower:]]/;
899 push @f, $_ unless /[[:lower:]]/;
902 report_multi_result($Locale, $locales_test_number, \@f);
904 ++$locales_test_number;
906 $test_names{$locales_test_number} = 'Verify that [:lower:] is a subset of [:alpha:]';
907 for (map { chr } 0..255) {
908 if ($is_utf8_locale) {
909 use locale ':not_characters';
910 push @f, $_ if /[[:lower:]]/ and ! /[[:alpha:]]/;
913 push @f, $_ if /[[:lower:]]/ and ! /[[:alpha:]]/;
916 report_multi_result($Locale, $locales_test_number, \@f);
918 ++$locales_test_number;
920 $test_names{$locales_test_number} = 'Verify that [:upper:] contains at least A-Z';
922 if ($is_utf8_locale) {
923 use locale ':not_characters';
924 push @f, $_ unless /[[:upper:]]/;
927 push @f, $_ unless /[[:upper:]]/;
930 report_multi_result($Locale, $locales_test_number, \@f);
932 ++$locales_test_number;
934 $test_names{$locales_test_number} = 'Verify that [:upper:] is a subset of [:alpha:]';
935 for (map { chr } 0..255) {
936 if ($is_utf8_locale) {
937 use locale ':not_characters';
938 push @f, $_ if /[[:upper:]]/ and ! /[[:alpha:]]/;
941 push @f, $_ if /[[:upper:]]/ and ! /[[:alpha:]]/;
944 report_multi_result($Locale, $locales_test_number, \@f);
946 ++$locales_test_number;
948 $test_names{$locales_test_number} = 'Verify that /[[:lower:]]/i is a subset of [:alpha:]';
949 for (map { chr } 0..255) {
950 if ($is_utf8_locale) {
951 use locale ':not_characters';
952 push @f, $_ if /[[:lower:]]/i and ! /[[:alpha:]]/;
955 push @f, $_ if /[[:lower:]]/i and ! /[[:alpha:]]/;
958 report_multi_result($Locale, $locales_test_number, \@f);
960 ++$locales_test_number;
962 $test_names{$locales_test_number} = 'Verify that [:alpha:] is a subset of [:alnum:]';
963 for (map { chr } 0..255) {
964 if ($is_utf8_locale) {
965 use locale ':not_characters';
966 push @f, $_ if /[[:alpha:]]/ and ! /[[:alnum:]]/;
969 push @f, $_ if /[[:alpha:]]/ and ! /[[:alnum:]]/;
972 report_multi_result($Locale, $locales_test_number, \@f);
974 ++$locales_test_number;
976 $test_names{$locales_test_number} = 'Verify that [:digit:] contains at least 0-9';
978 if ($is_utf8_locale) {
979 use locale ':not_characters';
980 push @f, $_ unless /[[:digit:]]/;
983 push @f, $_ unless /[[:digit:]]/;
986 report_multi_result($Locale, $locales_test_number, \@f);
988 ++$locales_test_number;
990 $test_names{$locales_test_number} = 'Verify that [:digit:] is a subset of [:alnum:]';
991 for (map { chr } 0..255) {
992 if ($is_utf8_locale) {
993 use locale ':not_characters';
994 push @f, $_ if /[[:digit:]]/ and ! /[[:alnum:]]/;
997 push @f, $_ if /[[:digit:]]/ and ! /[[:alnum:]]/;
1000 report_multi_result($Locale, $locales_test_number, \@f);
1002 ++$locales_test_number;
1004 $test_names{$locales_test_number} = 'Verify that [:digit:] matches either 10 or 20 code points';
1005 report_result($Locale, $locales_test_number, @{$posixes{'digit'}} == 10 || @{$posixes{'digit'}} == 20);
1007 ++$locales_test_number;
1009 $test_names{$locales_test_number} = 'Verify that if there is a second set of digits in [:digit:], they are consecutive';
1010 if (@{$posixes{'digit'}} == 20) {
1012 for (map { chr } 0..255) {
1013 next unless /[[:digit:]]/;
1015 if (defined $previous_ord) {
1016 if ($is_utf8_locale) {
1017 use locale ':not_characters';
1018 push @f, $_ if ord $_ != $previous_ord + 1;
1021 push @f, $_ if ord $_ != $previous_ord + 1;
1024 $previous_ord = ord $_;
1027 report_multi_result($Locale, $locales_test_number, \@f);
1029 ++$locales_test_number;
1031 $test_names{$locales_test_number} = 'Verify that [:digit:] is a subset of [:xdigit:]';
1032 for (map { chr } 0..255) {
1033 if ($is_utf8_locale) {
1034 use locale ':not_characters';
1035 push @f, $_ if /[[:digit:]]/ and ! /[[:xdigit:]]/;
1038 push @f, $_ if /[[:digit:]]/ and ! /[[:xdigit:]]/;
1041 report_multi_result($Locale, $locales_test_number, \@f);
1043 ++$locales_test_number;
1045 $test_names{$locales_test_number} = 'Verify that [:xdigit:] contains at least A-F, a-f';
1046 for ('A' .. 'F', 'a' .. 'f') {
1047 if ($is_utf8_locale) {
1048 use locale ':not_characters';
1049 push @f, $_ unless /[[:xdigit:]]/;
1052 push @f, $_ unless /[[:xdigit:]]/;
1055 report_multi_result($Locale, $locales_test_number, \@f);
1057 ++$locales_test_number;
1059 $test_names{$locales_test_number} = 'Verify that any additional members of [:xdigit:], are in groups of 6 consecutive code points';
1062 for (map { chr } 0..255) {
1063 next unless /[[:xdigit:]]/;
1064 next if /[[:digit:]]/;
1066 if (defined $previous_ord) {
1067 if ($is_utf8_locale) {
1068 use locale ':not_characters';
1069 push @f, $_ if ord $_ != $previous_ord + 1;
1072 push @f, $_ if ord $_ != $previous_ord + 1;
1077 undef $previous_ord;
1080 $previous_ord = ord $_;
1083 report_multi_result($Locale, $locales_test_number, \@f);
1085 ++$locales_test_number;
1087 $test_names{$locales_test_number} = 'Verify that [:xdigit:] is a subset of [:graph:]';
1088 for (map { chr } 0..255) {
1089 if ($is_utf8_locale) {
1090 use locale ':not_characters';
1091 push @f, $_ if /[[:xdigit:]]/ and ! /[[:graph:]]/;
1094 push @f, $_ if /[[:xdigit:]]/ and ! /[[:graph:]]/;
1097 report_multi_result($Locale, $locales_test_number, \@f);
1099 # Note that xdigit doesn't have to be a subset of alnum
1101 ++$locales_test_number;
1103 $test_names{$locales_test_number} = 'Verify that [:punct:] is a subset of [:graph:]';
1104 for (map { chr } 0..255) {
1105 if ($is_utf8_locale) {
1106 use locale ':not_characters';
1107 push @f, $_ if /[[:punct:]]/ and ! /[[:graph:]]/;
1110 push @f, $_ if /[[:punct:]]/ and ! /[[:graph:]]/;
1113 report_multi_result($Locale, $locales_test_number, \@f);
1115 ++$locales_test_number;
1117 $test_names{$locales_test_number} = 'Verify that the space character is not in [:graph:]';
1118 if ($is_utf8_locale) {
1119 use locale ':not_characters';
1120 push @f, " " if " " =~ /[[:graph:]]/;
1123 push @f, " " if " " =~ /[[:graph:]]/;
1125 report_multi_result($Locale, $locales_test_number, \@f);
1127 ++$locales_test_number;
1129 $test_names{$locales_test_number} = 'Verify that [:space:] contains at least [\f\n\r\t\cK ]';
1130 for (' ', "\f", "\n", "\r", "\t", "\cK") {
1131 if ($is_utf8_locale) {
1132 use locale ':not_characters';
1133 push @f, $_ unless /[[:space:]]/;
1136 push @f, $_ unless /[[:space:]]/;
1139 report_multi_result($Locale, $locales_test_number, \@f);
1141 ++$locales_test_number;
1143 $test_names{$locales_test_number} = 'Verify that [:blank:] contains at least [\t ]';
1145 if ($is_utf8_locale) {
1146 use locale ':not_characters';
1147 push @f, $_ unless /[[:blank:]]/;
1150 push @f, $_ unless /[[:blank:]]/;
1153 report_multi_result($Locale, $locales_test_number, \@f);
1155 ++$locales_test_number;
1157 $test_names{$locales_test_number} = 'Verify that [:blank:] is a subset of [:space:]';
1158 for (map { chr } 0..255) {
1159 if ($is_utf8_locale) {
1160 use locale ':not_characters';
1161 push @f, $_ if /[[:blank:]]/ and ! /[[:space:]]/;
1164 push @f, $_ if /[[:blank:]]/ and ! /[[:space:]]/;
1167 report_multi_result($Locale, $locales_test_number, \@f);
1169 ++$locales_test_number;
1171 $test_names{$locales_test_number} = 'Verify that [:graph:] is a subset of [:print:]';
1172 for (map { chr } 0..255) {
1173 if ($is_utf8_locale) {
1174 use locale ':not_characters';
1175 push @f, $_ if /[[:graph:]]/ and ! /[[:print:]]/;
1178 push @f, $_ if /[[:graph:]]/ and ! /[[:print:]]/;
1181 report_multi_result($Locale, $locales_test_number, \@f);
1183 ++$locales_test_number;
1185 $test_names{$locales_test_number} = 'Verify that the space character is in [:print:]';
1186 if ($is_utf8_locale) {
1187 use locale ':not_characters';
1188 push @f, " " if " " !~ /[[:print:]]/;
1191 push @f, " " if " " !~ /[[:print:]]/;
1193 report_multi_result($Locale, $locales_test_number, \@f);
1195 ++$locales_test_number;
1197 $test_names{$locales_test_number} = 'Verify that isn\'t both [:cntrl:] and [:print:]';
1198 for (map { chr } 0..255) {
1199 if ($is_utf8_locale) {
1200 use locale ':not_characters';
1201 push @f, $_ if (/[[:print:]]/ and /[[:cntrl:]]/);
1204 push @f, $_ if (/[[:print:]]/ and /[[:cntrl:]]/);
1207 report_multi_result($Locale, $locales_test_number, \@f);
1209 ++$locales_test_number;
1211 $test_names{$locales_test_number} = 'Verify that isn\'t both [:alpha:] and [:digit:]';
1212 for (map { chr } 0..255) {
1213 if ($is_utf8_locale) {
1214 use locale ':not_characters';
1215 push @f, $_ if /[[:alpha:]]/ and /[[:digit:]]/;
1218 push @f, $_ if /[[:alpha:]]/ and /[[:digit:]]/;
1221 report_multi_result($Locale, $locales_test_number, \@f);
1223 ++$locales_test_number;
1225 $test_names{$locales_test_number} = 'Verify that isn\'t both [:alnum:] and [:punct:]';
1226 for (map { chr } 0..255) {
1227 if ($is_utf8_locale) {
1228 use locale ':not_characters';
1229 push @f, $_ if /[[:alnum:]]/ and /[[:punct:]]/;
1232 push @f, $_ if /[[:alnum:]]/ and /[[:punct:]]/;
1235 report_multi_result($Locale, $locales_test_number, \@f);
1237 ++$locales_test_number;
1239 $test_names{$locales_test_number} = 'Verify that isn\'t both [:xdigit:] and [:punct:]';
1240 for (map { chr } 0..255) {
1241 if ($is_utf8_locale) {
1242 use locale ':not_characters';
1243 push @f, $_ if (/[[:punct:]]/ and /[[:xdigit:]]/);
1246 push @f, $_ if (/[[:punct:]]/ and /[[:xdigit:]]/);
1249 report_multi_result($Locale, $locales_test_number, \@f);
1251 ++$locales_test_number;
1253 $test_names{$locales_test_number} = 'Verify that isn\'t both [:graph:] and [:space:]';
1254 for (map { chr } 0..255) {
1255 if ($is_utf8_locale) {
1256 use locale ':not_characters';
1257 push @f, $_ if (/[[:graph:]]/ and /[[:space:]]/);
1260 push @f, $_ if (/[[:graph:]]/ and /[[:space:]]/);
1263 report_multi_result($Locale, $locales_test_number, \@f);
1265 foreach ($first_casing_test_number..$locales_test_number) {
1266 push @problematical_tests, $_;
1270 # Test for read-only scalars' locale vs non-locale comparisons.
1276 if ($is_utf8_locale) {
1277 use locale ':not_characters';
1278 $ok = ($a cmp "qwerty") == 0;
1282 $ok = ($a cmp "qwerty") == 0;
1284 report_result($Locale, ++$locales_test_number, $ok);
1285 $test_names{$locales_test_number} = 'Verify that cmp works with a read-only scalar; no- vs locale';
1289 my ($from, $to, $lesser, $greater,
1290 @test, %test, $test, $yes, $no, $sign);
1292 ++$locales_test_number;
1293 $test_names{$locales_test_number} = 'Verify that "le", "ne", etc work';
1294 $not_necessarily_a_problem_test_number = $locales_test_number;
1297 $from = int(($_*@{$posixes{'word'}})/10);
1298 $to = $from + int(@{$posixes{'word'}}/10);
1299 $to = $#{$posixes{'word'}} if ($to > $#{$posixes{'word'}});
1300 $lesser = join('', @{$posixes{'word'}}[$from..$to]);
1301 # Select a slice one character on.
1303 $to = $#{$posixes{'word'}} if ($to > $#{$posixes{'word'}});
1304 $greater = join('', @{$posixes{'word'}}[$from..$to]);
1305 if ($is_utf8_locale) {
1306 use locale ':not_characters';
1307 ($yes, $no, $sign) = ($lesser lt $greater
1309 : ("not ", " ", -1));
1313 ($yes, $no, $sign) = ($lesser lt $greater
1315 : ("not ", " ", -1));
1317 # all these tests should FAIL (return 0). Exact lt or gt cannot
1318 # be tested because in some locales, say, eacute and E may test
1322 $no.' ($lesser le $greater)', # 1
1323 'not ($lesser ne $greater)', # 2
1324 ' ($lesser eq $greater)', # 3
1325 $yes.' ($lesser ge $greater)', # 4
1326 $yes.' ($lesser ge $greater)', # 5
1327 $yes.' ($greater le $lesser )', # 7
1328 'not ($greater ne $lesser )', # 8
1329 ' ($greater eq $lesser )', # 9
1330 $no.' ($greater ge $lesser )', # 10
1331 'not (($lesser cmp $greater) == -($sign))' # 11
1333 @test{@test} = 0 x @test;
1335 for my $ti (@test) {
1336 if ($is_utf8_locale) {
1337 use locale ':not_characters';
1338 $test{$ti} = eval $ti;
1341 # Already in 'use locale';
1342 $test{$ti} = eval $ti;
1344 $test ||= $test{$ti}
1346 report_result($Locale, $locales_test_number, $test == 0);
1348 debug "# lesser = '$lesser'\n";
1349 debug "# greater = '$greater'\n";
1350 debug "# lesser cmp greater = ",
1351 $lesser cmp $greater, "\n";
1352 debug "# greater cmp lesser = ",
1353 $greater cmp $lesser, "\n";
1354 debug "# (greater) from = $from, to = $to\n";
1355 for my $ti (@test) {
1356 debugf("# %-40s %-4s", $ti,
1357 $test{$ti} ? 'FAIL' : 'ok');
1358 if ($ti =~ /\(\.*(\$.+ +cmp +\$[^\)]+)\.*\)/) {
1359 debugf("(%s == %4d)", $1, eval $1);
1397 if (! $is_utf8_locale) {
1400 my ($x, $y) = (1.23, 1.23);
1403 printf ''; # printf used to reset locale to "C"
1408 my $z = sprintf ''; # sprintf used to reset locale to "C"
1415 local $SIG{__WARN__} =
1421 # The == (among other ops) used to warn for locales
1422 # that had something else than "." as the radix character.
1446 $ok12 = abs(($f + $g) - 3.57) < 0.01;
1448 $ok14 = $ok15 = $ok16 = 1; # Skip for non-utf8 locales
1452 $ok17 = "1.5:1.25" eq sprintf("%g:%g", $h, $i);
1454 $ok18 = $j eq sprintf("%g:%g", $h, $i);
1457 use locale ':not_characters';
1459 my ($x, $y) = (1.23, 1.23);
1461 printf ''; # printf used to reset locale to "C"
1466 my $z = sprintf ''; # sprintf used to reset locale to "C"
1472 local $SIG{__WARN__} =
1498 $ok12 = abs(($f + $g) - 3.57) < 0.01;
1501 # Look for non-ASCII error messages, and verify that the first
1502 # such is in UTF-8 (the others almost certainly will be like the
1505 foreach my $err (keys %!) {
1507 $! = eval "&Errno::$err"; # Convert to strerror() output
1508 my $strerror = "$!";
1509 if ("$strerror" =~ /\P{ASCII}/) {
1510 $ok14 = utf8::is_utf8($strerror);
1515 # Similarly, we verify that a non-ASCII radix is in UTF-8. This
1516 # also catches if there is a disparity between sprintf and
1519 my $string_g = "$g";
1520 my $sprintf_g = sprintf("%g", $g);
1522 $ok15 = $string_g =~ / ^ \p{ASCII}+ $ /x || utf8::is_utf8($string_g);
1523 $ok16 = $sprintf_g eq $string_g;
1527 $ok17 = "1.5:1.25" eq sprintf("%g:%g", $h, $i);
1529 $ok18 = $j eq sprintf("%g:%g", $h, $i);
1532 report_result($Locale, ++$locales_test_number, $ok1);
1533 $test_names{$locales_test_number} = 'Verify that an intervening printf doesn\'t change assignment results';
1534 my $first_a_test = $locales_test_number;
1536 debug "# $first_a_test..$locales_test_number: \$a = $a, \$b = $b, Locale = $Locale\n";
1538 report_result($Locale, ++$locales_test_number, $ok2);
1539 $test_names{$locales_test_number} = 'Verify that an intervening sprintf doesn\'t change assignment results';
1541 my $first_c_test = $locales_test_number;
1543 report_result($Locale, ++$locales_test_number, $ok3);
1544 $test_names{$locales_test_number} = 'Verify that a different locale radix works when doing "==" with a constant';
1546 report_result($Locale, ++$locales_test_number, $ok4);
1547 $test_names{$locales_test_number} = 'Verify that a different locale radix works when doing "==" with a scalar';
1549 report_result($Locale, ++$locales_test_number, $ok5);
1550 $test_names{$locales_test_number} = 'Verify that a different locale radix works when doing "==" with a scalar and an intervening sprintf';
1552 debug "# $first_c_test..$locales_test_number: \$c = $c, \$d = $d, Locale = $Locale\n";
1554 report_result($Locale, ++$locales_test_number, $ok6);
1555 $test_names{$locales_test_number} = 'Verify that can assign stringified under inner no-locale block';
1556 my $first_e_test = $locales_test_number;
1558 report_result($Locale, ++$locales_test_number, $ok7);
1559 $test_names{$locales_test_number} = 'Verify that "==" with a scalar still works in inner no locale';
1561 report_result($Locale, ++$locales_test_number, $ok8);
1562 $test_names{$locales_test_number} = 'Verify that "==" with a scalar and an intervening sprintf still works in inner no locale';
1564 debug "# $first_e_test..$locales_test_number: \$e = $e, no locale\n";
1566 report_result($Locale, ++$locales_test_number, $ok9);
1567 $test_names{$locales_test_number} = 'Verify that after a no-locale block, a different locale radix still works when doing "==" with a constant';
1568 my $first_f_test = $locales_test_number;
1570 report_result($Locale, ++$locales_test_number, $ok10);
1571 $test_names{$locales_test_number} = 'Verify that after a no-locale block, a different locale radix still works when doing "==" with a scalar';
1573 report_result($Locale, ++$locales_test_number, $ok11);
1574 $test_names{$locales_test_number} = 'Verify that after a no-locale block, a different locale radix still works when doing "==" with a scalar and an intervening sprintf';
1576 report_result($Locale, ++$locales_test_number, $ok12);
1577 $test_names{$locales_test_number} = 'Verify that after a no-locale block, a different locale radix can participate in an addition and function call as numeric';
1579 report_result($Locale, ++$locales_test_number, $ok13);
1580 $test_names{$locales_test_number} = 'Verify that don\'t get warning under "==" even if radix is not a dot';
1582 report_result($Locale, ++$locales_test_number, $ok14);
1583 $test_names{$locales_test_number} = 'Verify that non-ASCII UTF-8 error messages are in UTF-8';
1585 report_result($Locale, ++$locales_test_number, $ok15);
1586 $test_names{$locales_test_number} = 'Verify that a number with a UTF-8 radix has a UTF-8 stringification';
1588 report_result($Locale, ++$locales_test_number, $ok16);
1589 $test_names{$locales_test_number} = 'Verify that a sprintf of a number with a UTF-8 radix yields UTF-8';
1591 report_result($Locale, ++$locales_test_number, $ok17);
1592 $test_names{$locales_test_number} = 'Verify that a sprintf of a number outside locale scope uses a dot radix';
1594 report_result($Locale, ++$locales_test_number, $ok18);
1595 $test_names{$locales_test_number} = 'Verify that a sprintf of a number back within locale scope uses locale radix';
1597 debug "# $first_f_test..$locales_test_number: \$f = $f, \$g = $g, back to locale = $Locale\n";
1599 # Does taking lc separately differ from taking
1600 # the lc "in-line"? (This was the bug 19990704.002, change #3568.)
1601 # The bug was in the caching of the 'o'-magic.
1602 if (! $is_utf8_locale) {
1608 return $lc0 cmp $lc1;
1612 return lc($_[0]) cmp lc($_[1]);
1619 report_result($Locale, ++$locales_test_number,
1620 lcA($x, $y) == 1 && lcB($x, $y) == 1 ||
1621 lcA($x, $z) == 0 && lcB($x, $z) == 0);
1624 use locale ':not_characters';
1629 return $lc0 cmp $lc1;
1633 return lc($_[0]) cmp lc($_[1]);
1640 report_result($Locale, ++$locales_test_number,
1641 lcC($x, $y) == 1 && lcD($x, $y) == 1 ||
1642 lcC($x, $z) == 0 && lcD($x, $z) == 0);
1644 $test_names{$locales_test_number} = 'Verify "lc(foo) cmp lc(bar)" is the same as using intermediaries for the cmp';
1646 # Does lc of an UPPER (if different from the UPPER) match
1647 # case-insensitively the UPPER, and does the UPPER match
1648 # case-insensitively the lc of the UPPER. And vice versa.
1652 my $re = qr/[\[\(\{\*\+\?\|\^\$\\]/;
1655 ++$locales_test_number;
1656 $test_names{$locales_test_number} = 'Verify case insensitive matching works';
1657 foreach my $x (sort { ord $a <=> ord $b } keys %UPPER) {
1658 if (! $is_utf8_locale) {
1660 next unless uc $y eq $x;
1661 debug_more( "# UPPER=", disp_chars(($x)),
1662 "; lc=", disp_chars(($y)), "; ",
1663 "; fc=", disp_chars((fc $x)), "; ",
1664 disp_chars(($x)), "=~/", disp_chars(($y)), "/i=",
1665 $x =~ /$y/i ? 1 : 0,
1667 disp_chars(($y)), "=~/", disp_chars(($x)), "/i=",
1668 $y =~ /$x/i ? 1 : 0,
1671 # If $x and $y contain regular expression characters
1672 # AND THEY lowercase (/i) to regular expression characters,
1673 # regcomp() will be mightily confused. No, the \Q doesn't
1674 # help here (maybe regex engine internal lowercasing
1675 # is done after the \Q?) An example of this happening is
1676 # the bg_BG (Bulgarian) locale under EBCDIC (OS/390 USS):
1677 # the chr(173) (the "[") is the lowercase of the chr(235).
1679 # Similarly losing EBCDIC locales include cs_cz, cs_CZ,
1680 # el_gr, el_GR, en_us.IBM-037 (!), en_US.IBM-037 (!),
1681 # et_ee, et_EE, hr_hr, hr_HR, hu_hu, hu_HU, lt_LT,
1682 # mk_mk, mk_MK, nl_nl.IBM-037, nl_NL.IBM-037,
1683 # pl_pl, pl_PL, ro_ro, ro_RO, ru_ru, ru_RU,
1684 # sk_sk, sk_SK, sl_si, sl_SI, tr_tr, tr_TR.
1686 # Similar things can happen even under (bastardised)
1687 # non-EBCDIC locales: in many European countries before the
1688 # advent of ISO 8859-x nationally customised versions of
1689 # ISO 646 were devised, reusing certain punctuation
1690 # characters for modified characters needed by the
1691 # country/language. For example, the "|" might have
1692 # stood for U+00F6 or LATIN SMALL LETTER O WITH DIAERESIS.
1694 if ($x =~ $re || $y =~ $re) {
1695 print "# Regex characters in '$x' or '$y', skipping test $locales_test_number for locale '$Locale'\n";
1698 push @f, $x unless $x =~ /$y/i && $y =~ /$x/i;
1700 # fc is not a locale concept, so Perl uses lc for it.
1701 push @f, $x unless lc $x eq fc $x;
1704 use locale ':not_characters';
1706 next unless uc $y eq $x;
1707 debug_more( "# UPPER=", disp_chars(($x)),
1708 "; lc=", disp_chars(($y)), "; ",
1709 "; fc=", disp_chars((fc $x)), "; ",
1710 disp_chars(($x)), "=~/", disp_chars(($y)), "/i=",
1711 $x =~ /$y/i ? 1 : 0,
1713 disp_chars(($y)), "=~/", disp_chars(($x)), "/i=",
1714 $y =~ /$x/i ? 1 : 0,
1717 push @f, $x unless $x =~ /$y/i && $y =~ /$x/i;
1719 # The places where Unicode's lc is different from fc are
1720 # skipped here by virtue of the 'next unless uc...' line above
1721 push @f, $x unless lc $x eq fc $x;
1725 foreach my $x (sort { ord $a <=> ord $b } keys %lower) {
1726 if (! $is_utf8_locale) {
1728 next unless lc $y eq $x;
1729 debug_more( "# lower=", disp_chars(($x)),
1730 "; uc=", disp_chars(($y)), "; ",
1731 "; fc=", disp_chars((fc $x)), "; ",
1732 disp_chars(($x)), "=~/", disp_chars(($y)), "/i=",
1733 $x =~ /$y/i ? 1 : 0,
1735 disp_chars(($y)), "=~/", disp_chars(($x)), "/i=",
1736 $y =~ /$x/i ? 1 : 0,
1738 if ($x =~ $re || $y =~ $re) { # See above.
1739 print "# Regex characters in '$x' or '$y', skipping test $locales_test_number for locale '$Locale'\n";
1742 push @f, $x unless $x =~ /$y/i && $y =~ /$x/i;
1744 push @f, $x unless lc $x eq fc $x;
1747 use locale ':not_characters';
1749 next unless lc $y eq $x;
1750 debug_more( "# lower=", disp_chars(($x)),
1751 "; uc=", disp_chars(($y)), "; ",
1752 "; fc=", disp_chars((fc $x)), "; ",
1753 disp_chars(($x)), "=~/", disp_chars(($y)), "/i=",
1754 $x =~ /$y/i ? 1 : 0,
1756 disp_chars(($y)), "=~/", disp_chars(($x)), "/i=",
1757 $y =~ /$x/i ? 1 : 0,
1759 push @f, $x unless $x =~ /$y/i && $y =~ /$x/i;
1761 push @f, $x unless lc $x eq fc $x;
1764 report_multi_result($Locale, $locales_test_number, \@f);
1765 push @problematical_tests, $locales_test_number;
1771 ++$locales_test_number;
1772 $test_names{$locales_test_number} = 'Verify atof with locale radix and negative exponent';
1774 my $radix = POSIX::localeconv()->{decimal_point};
1776 "3.14e+9", "3${radix}14e+9", "3.14e-9", "3${radix}14e-9",
1777 "-3.14e+9", "-3${radix}14e+9", "-3.14e-9", "-3${radix}14e-9",
1780 if (! $is_utf8_locale) {
1782 for my $num (@nums) {
1784 unless sprintf("%g", $num) =~ /3.+14/;
1788 use locale ':not_characters';
1789 for my $num (@nums) {
1791 unless sprintf("%g", $num) =~ /3.+14/;
1795 report_result($Locale, $locales_test_number, @f == 0);
1797 print "# failed $locales_test_number locale '$Locale' numbers @f\n"
1802 my $final_locales_test_number = $locales_test_number;
1804 # Recount the errors.
1806 foreach $test_num ($first_locales_test_number..$final_locales_test_number) {
1807 if (%setlocale_failed) {
1810 elsif ($Problem{$test_num} || !defined $Okay{$test_num} || !@{$Okay{$test_num}}) {
1811 if (defined $not_necessarily_a_problem_test_number
1812 && $test_num == $not_necessarily_a_problem_test_number)
1814 print "# The failure of test $not_necessarily_a_problem_test_number is not necessarily fatal.\n";
1815 print "# It usually indicates a problem in the environment,\n";
1816 print "# not in Perl itself.\n";
1818 if ($Okay{$test_num} && grep { $_ == $test_num } @problematical_tests) {
1819 no warnings 'experimental::autoderef';
1820 # Round to nearest .1%
1821 my $percent_fail = (int(.5 + (1000 * scalar(keys $Problem{$test_num})
1822 / scalar(@Locale))))
1824 if (! $debug && $percent_fail < $acceptable_fold_failure_percentage)
1826 $test_names{$test_num} .= 'TODO';
1827 print "# ", 100 - $percent_fail, "% of locales pass the following test, so it is likely that the failures\n";
1828 print "# are errors in the locale definitions. The test is marked TODO, as the\n";
1829 print "# problem is not likely to be Perl's\n";
1834 print "# The code points that had this failure are given above. Look for lines\n";
1835 print "# that match 'failed $test_num'\n";
1838 print "# For more details, rerun, with environment variable PERL_DEBUG_FULL_TEST=1.\n";
1839 print "# Then look at that output for lines that match 'failed $test_num'\n";
1843 print "ok $test_num";
1844 if (defined $test_names{$test_num}) {
1845 # If TODO is in the test name, make it thus
1846 my $todo = $test_names{$test_num} =~ s/TODO\s*//;
1847 print " $test_names{$test_num}";
1848 print " # TODO" if $todo;
1853 $test_num = $final_locales_test_number;
1855 unless ( $^O eq 'dragonfly' ) {
1859 local $SIG{__WARN__} = sub {
1860 $warned = $_[0] =~ /uninitialized/;
1862 my $z = "y" . setlocale(&POSIX::LC_ALL, "xyzzy");
1863 ok($warned, "variable set to setlocale(BAD LOCALE) is considered uninitialized");
1866 # Test that tainting and case changing works on utf8 strings. These tests are
1867 # placed last to avoid disturbing the hard-coded test numbers that existed at
1868 # the time these were added above this in this file.
1869 # This also tests that locale overrides unicode_strings in the same scope for
1871 setlocale(&POSIX::LC_ALL, "C");
1874 use feature 'unicode_strings';
1876 foreach my $function ("uc", "ucfirst", "lc", "lcfirst", "fc") {
1877 my @list; # List of code points to test for $function
1879 # Used to calculate the changed case for ASCII characters by using the
1880 # ord, instead of using one of the functions under test.
1881 my $ascii_case_change_delta;
1882 my $above_latin1_case_change_delta; # Same for the specific ords > 255
1885 # We test an ASCII character, which should change case;
1886 # a Latin1 character, which shouldn't change case under this C locale,
1887 # an above-Latin1 character that when the case is changed would cross
1888 # the 255/256 boundary, so doesn't change case
1889 # (the \x{149} is one of these, but changes into 2 characters, the
1890 # first one of which doesn't cross the boundary.
1891 # the final one in each list is an above-Latin1 character whose case
1892 # does change. The code below uses its position in its list as a
1893 # marker to indicate that it, unlike the other code points above
1894 # ASCII, has a successful case change
1896 # All casing operations under locale (but not :not_characters) should
1898 if ($function =~ /^u/) {
1899 @list = ("", "a", "\xe0", "\xff", "\x{fb00}", "\x{149}", "\x{101}");
1900 $ascii_case_change_delta = -32;
1901 $above_latin1_case_change_delta = -1;
1904 @list = ("", "A", "\xC0", "\x{17F}", "\x{100}");
1905 $ascii_case_change_delta = +32;
1906 $above_latin1_case_change_delta = +1;
1908 foreach my $is_utf8_locale (0 .. 1) {
1909 foreach my $j (0 .. $#list) {
1910 my $char = $list[$j];
1912 for my $encoded_in_utf8 (0 .. 1) {
1915 if (! $is_utf8_locale) {
1916 $should_be = ($j == $#list)
1917 ? chr(ord($char) + $above_latin1_case_change_delta)
1918 : (length $char == 0 || ord($char) > 127)
1920 : chr(ord($char) + $ascii_case_change_delta);
1922 # This monstrosity is in order to avoid using an eval,
1923 # which might perturb the results
1924 $changed = ($function eq "uc")
1926 : ($function eq "ucfirst")
1928 : ($function eq "lc")
1930 : ($function eq "lcfirst")
1932 : ($function eq "fc")
1934 : die("Unexpected function \"$function\"");
1940 # For utf8-locales the case changing functions
1941 # should work just like they do outside of locale.
1942 # Can use eval here because not testing it when
1944 $should_be = eval "$function('$char')";
1945 die "Unexpected eval error $@ from 'eval \"$function('$char')\"'" if $@;
1948 use locale ':not_characters';
1949 $changed = ($function eq "uc")
1951 : ($function eq "ucfirst")
1953 : ($function eq "lc")
1955 : ($function eq "lcfirst")
1957 : ($function eq "fc")
1959 : die("Unexpected function \"$function\"");
1961 ok($changed eq $should_be,
1962 "$function(\"$char\") in C locale "
1963 . (($is_utf8_locale)
1964 ? "(use locale ':not_characters'"
1966 . (($encoded_in_utf8)
1967 ? "; encoded in utf8)"
1968 : "; not encoded in utf8)")
1969 . " should be \"$should_be\", got \"$changed\"");
1971 # Tainting shouldn't happen for use locale :not_character
1974 ? check_taint($changed)
1975 : check_taint_not($changed);
1977 # Use UTF-8 next time through the loop
1978 utf8::upgrade($char);
1985 # Give final advice.
1989 foreach ($first_locales_test_number..$final_locales_test_number) {
1991 my @f = sort keys %{ $Problem{$_} };
1992 my $f = join(" ", @f);
1993 $f =~ s/(.{50,60}) /$1\n#\t/g;
1996 "# The locale ", (@f == 1 ? "definition" : "definitions"), "\n#\n",
1998 "# on your system may have errors because the locale test $_\n",
1999 "# \"$test_names{$_}\"\n",
2000 "# failed in ", (@f == 1 ? "that locale" : "those locales"),
2004 # If your users are not using these locales you are safe for the moment,
2005 # but please report this failure first to perlbug\@perl.com using the
2006 # perlbug script (as described in the INSTALL file) so that the exact
2007 # details of the failures can be sorted out first and then your operating
2008 # system supplier can be alerted about these anomalies.
2015 # Tell which locales were okay and which were not.
2020 foreach my $l (@Locale) {
2022 if ($setlocale_failed{$l}) {
2027 ($first_locales_test_number..$final_locales_test_number)
2029 $p++ if $Problem{$t}{$l};
2032 push @s, $l if $p == 0;
2033 push @F, $l unless $p == 0;
2037 my $s = join(" ", @s);
2038 $s =~ s/(.{50,60}) /$1\n#\t/g;
2041 "# The following locales\n#\n",
2043 "# tested okay.\n#\n",
2045 warn "# None of your locales were fully okay.\n";
2049 my $F = join(" ", @F);
2050 $F =~ s/(.{50,60}) /$1\n#\t/g;
2054 $details = "# For more details, rerun, with environment variable PERL_DEBUG_FULL_TEST=1.\n";
2056 elsif ($debug == 1) {
2057 $details = "# For even more details, rerun, with environment variable PERL_DEBUG_FULL_TEST=2.\n";
2061 "# The following locales\n#\n",
2063 "# had problems.\n#\n",
2066 warn "# None of your locales were broken.\n";
2070 print "1..$test_num\n";