X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/9717af6d049902fc887c412facb2d15e785ef1a4..e463df90b78a57edd46d5b19a56006b28f5029d6:/lib/locale.t diff --git a/lib/locale.t b/lib/locale.t index 04ad46e..e8cedbc 100644 --- a/lib/locale.t +++ b/lib/locale.t @@ -7,6 +7,11 @@ # To make a TODO test, add the string 'TODO' to its %test_names value +my $is_ebcdic = ord("A") == 193; + +no warnings 'locale'; # We test even weird locales; and do some scary things + # in ok locales + binmode STDOUT, ':utf8'; binmode STDERR, ':utf8'; @@ -14,20 +19,20 @@ BEGIN { chdir 't' if -d 't'; @INC = '../lib'; unshift @INC, '.'; - require Config; import Config; - if (!$Config{d_setlocale} || $Config{ccflags} =~ /\bD?NO_LOCALE\b/) { + require './loc_tools.pl'; + unless (locales_enabled('LC_CTYPE')) { print "1..0\n"; exit; } - require './loc_tools.pl'; $| = 1; + require Config; import Config; } use strict; -use feature 'fc'; +use feature 'fc', 'postderef'; # =1 adds debugging output; =2 increases the verbosity somewhat -my $debug = $ENV{PERL_DEBUG_FULL_TEST} // 0; +our $debug = $ENV{PERL_DEBUG_FULL_TEST} // 0; # Certain tests have been shown to be problematical for a few locales. Don't # fail them unless at least this percentage of the tested locales fail. @@ -40,6 +45,19 @@ my $acceptable_failure_percentage = ($^O =~ / ^ ( AIX ) $ /ix) # The list of test numbers of the problematic tests. my %problematical_tests; +# If any %problematical_tests fails in one of these locales, it is +# considered a TODO. +my %known_bad_locales = ( + irix => qr/ ^ (?: cs | hu | sk ) $/x, + darwin => qr/ ^ lt_LT.ISO8859 /ix, + os390 => qr/ ^ italian /ix, + ); + +# cygwin isn't returning proper radix length in this locale, but supposedly to +# be fixed in later versions. +if ($^O eq 'cygwin' && version->new(($Config{osvers} =~ /^(\d+(?:\.\d+)+)/)[0]) le v2.4.1) { + $known_bad_locales{'cygwin'} = qr/ ^ ps_AF /ix; +} use Dumpvalue; @@ -48,11 +66,17 @@ my $dumper = Dumpvalue->new( quoteHighBit => 0, unctrl => "quote" ); + sub debug { return unless $debug; - my($mess) = join "", @_; - chop $mess; - print $dumper->stringify($mess,1), "\n"; + my($mess) = join "", '# ', @_; + chomp $mess; + print STDERR $dumper->stringify($mess,1), "\n"; +} + +sub note { + local $debug = 1; + debug @_; } sub debug_more { @@ -61,7 +85,7 @@ sub debug_more { } sub debugf { - printf @_ if $debug; + printf STDERR @_ if $debug; } $a = 'abc %9'; @@ -76,6 +100,7 @@ sub ok { print "ok " . ++$test_num; print " $message"; print "\n"; + return ($result) ? 1 : 0; } # First we'll do a lot of taint checking for locales. @@ -103,6 +128,20 @@ sub check_taint_not ($;$) { ok((not is_tainted($_[0])), "verify that isn't tainted$message_tail"); } +foreach my $category (qw(ALL COLLATE CTYPE MESSAGES MONETARY NUMERIC TIME)) { + my $short_result = locales_enabled($category); + ok ($short_result == 0 || $short_result == 1, + "Verify locales_enabled('$category') returns 0 or 1"); + debug("locales_enabled('$category') returned '$short_result'"); + my $long_result = locales_enabled("LC_$category"); + if (! ok ($long_result == $short_result, + " and locales_enabled('LC_$category') returns " + . "the same value") + ) { + debug("locales_enabled('LC_$category') returned $long_result"); + } +} + "\tb\t" =~ /^m?(\s)(.*)\1$/; check_taint_not $&, "not tainted outside 'use locale'"; ; @@ -699,15 +738,15 @@ my $final_without_setlocale = $test_num; # Find locales. -debug "# Scanning for locales...\n"; +debug "Scanning for locales...\n"; require POSIX; import POSIX ':locale_h'; -my @Locale = find_locales([ &POSIX::LC_CTYPE, &POSIX::LC_ALL ]); +my @Locale = find_locales([ &POSIX::LC_CTYPE, &POSIX::LC_NUMERIC, &POSIX::LC_ALL ]); -debug "# Locales =\n"; +debug "Locales =\n"; for ( @Locale ) { - debug "# $_\n"; + debug "$_\n"; } unless (@Locale) { @@ -722,6 +761,7 @@ my %posixes; my %Problem; my %Okay; +my %Known_bad_locale; # Failed test for a locale known to be bad my %Testing; my @Added_alpha; # Alphas that aren't in the C locale. my %test_names; @@ -805,16 +845,78 @@ sub disp_chars { return $output; } +sub disp_str ($) { + my $string = shift; + + # Displays the string unambiguously. ASCII printables are always output + # as-is, though perhaps separated by blanks from other characters. If + # entirely printable ASCII, just returns the string. Otherwise if valid + # UTF-8 it uses the character names for non-printable-ASCII. Otherwise it + # outputs hex for each non-ASCII-printable byte. + + return $string if $string =~ / ^ [[:print:]]* $/xa; + + my $result = ""; + my $prev_was_punct = 1; # Beginning is considered punct + if (utf8::valid($string) && utf8::is_utf8($string)) { + use charnames (); + foreach my $char (split "", $string) { + + # Keep punctuation adjacent to other characters; otherwise + # separate them with a blank + if ($char =~ /[[:punct:]]/a) { + $result .= $char; + $prev_was_punct = 1; + } + elsif ($char =~ /[[:print:]]/a) { + $result .= " " unless $prev_was_punct; + $result .= $char; + $prev_was_punct = 0; + } + else { + $result .= " " unless $prev_was_punct; + my $name = charnames::viacode(ord $char); + $result .= (defined $name) ? $name : ':unknown:'; + $prev_was_punct = 0; + } + } + } + else { + use bytes; + foreach my $char (split "", $string) { + if ($char =~ /[[:punct:]]/a) { + $result .= $char; + $prev_was_punct = 1; + } + elsif ($char =~ /[[:print:]]/a) { + $result .= " " unless $prev_was_punct; + $result .= $char; + $prev_was_punct = 0; + } + else { + $result .= " " unless $prev_was_punct; + $result .= sprintf("%02X", ord $char); + $prev_was_punct = 0; + } + } + } + + return $result; +} + sub report_result { my ($Locale, $i, $pass_fail, $message) = @_; - $message //= ""; - $message = " ($message)" if $message; - unless ($pass_fail) { - $Problem{$i}{$Locale} = 1; - debug "# failed $i ($test_names{$i}) with locale '$Locale'$message\n"; - } else { + if ($pass_fail) { push @{$Okay{$i}}, $Locale; } + else { + $message //= ""; + $message = " ($message)" if $message; + $Known_bad_locale{$i}{$Locale} = 1 if exists $known_bad_locales{$^O} + && $Locale =~ $known_bad_locales{$^O}; + $Problem{$i}{$Locale} = 1; + debug "failed $i ($test_names{$i}) with locale '$Locale'$message\n"; + } } sub report_multi_result { @@ -838,8 +940,8 @@ my %setlocale_failed; # List of locales that setlocale() didn't work on foreach my $Locale (@Locale) { $locales_test_number = $first_locales_test_number - 1; - debug "#\n"; - debug "# Locale = $Locale\n"; + debug "\n"; + debug "Locale = $Locale\n"; unless (setlocale(&POSIX::LC_ALL, $Locale)) { $setlocale_failed{$Locale} = $Locale; @@ -857,14 +959,9 @@ foreach my $Locale (@Locale) { my $is_utf8_locale = is_locale_utf8($Locale); - debug "# is utf8 locale? = $is_utf8_locale\n"; + debug "is utf8 locale? = $is_utf8_locale\n"; - my $radix = localeconv()->{decimal_point}; - if ($radix !~ / ^ [[:ascii:]] + $/x) { - use bytes; - $radix = disp_chars(split "", $radix); - } - debug "# radix = $radix\n"; + debug "radix = " . disp_str(localeconv()->{decimal_point}) . "\n"; if (! $is_utf8_locale) { use locale; @@ -882,7 +979,7 @@ foreach my $Locale (@Locale) { @{$posixes{'punct'}} = grep /[[:punct:]]/, map {chr } 0..255; @{$posixes{'upper'}} = grep /[[:upper:]]/, map {chr } 0..255; @{$posixes{'xdigit'}} = grep /[[:xdigit:]]/, map {chr } 0..255; - @{$posixes{'cased'}} = grep /[[:upper:]]/i, map {chr } 0..255; + @{$posixes{'cased'}} = grep /[[:upper:][:lower:]]/i, map {chr } 0..255; # Sieve the uppercase and the lowercase. @@ -913,7 +1010,7 @@ foreach my $Locale (@Locale) { @{$posixes{'punct'}} = grep /[[:punct:]]/, map {chr } 0..255; @{$posixes{'upper'}} = grep /[[:upper:]]/, map {chr } 0..255; @{$posixes{'xdigit'}} = grep /[[:xdigit:]]/, map {chr } 0..255; - @{$posixes{'cased'}} = grep /[[:upper:]]/i, map {chr } 0..255; + @{$posixes{'cased'}} = grep /[[:upper:][:lower:]]/i, map {chr } 0..255; for (@{$posixes{'word'}}) { if (/[^\d_]/) { # skip digits and the _ if (uc($_) eq $_) { @@ -928,21 +1025,21 @@ foreach my $Locale (@Locale) { # Ordered, where possible, in groups of "this is a subset of the next # one" - debug "# :upper: = ", disp_chars(@{$posixes{'upper'}}), "\n"; - debug "# :lower: = ", disp_chars(@{$posixes{'lower'}}), "\n"; - debug "# :cased: = ", disp_chars(@{$posixes{'cased'}}), "\n"; - debug "# :alpha: = ", disp_chars(@{$posixes{'alpha'}}), "\n"; - debug "# :alnum: = ", disp_chars(@{$posixes{'alnum'}}), "\n"; - debug "# w = ", disp_chars(@{$posixes{'word'}}), "\n"; - debug "# :graph: = ", disp_chars(@{$posixes{'graph'}}), "\n"; - debug "# :print: = ", disp_chars(@{$posixes{'print'}}), "\n"; - debug "# d = ", disp_chars(@{$posixes{'digit'}}), "\n"; - debug "# :xdigit: = ", disp_chars(@{$posixes{'xdigit'}}), "\n"; - debug "# :blank: = ", disp_chars(@{$posixes{'blank'}}), "\n"; - debug "# s = ", disp_chars(@{$posixes{'space'}}), "\n"; - debug "# :punct: = ", disp_chars(@{$posixes{'punct'}}), "\n"; - debug "# :cntrl: = ", disp_chars(@{$posixes{'cntrl'}}), "\n"; - debug "# :ascii: = ", disp_chars(@{$posixes{'ascii'}}), "\n"; + debug ":upper: = ", disp_chars(@{$posixes{'upper'}}), "\n"; + debug ":lower: = ", disp_chars(@{$posixes{'lower'}}), "\n"; + debug ":cased: = ", disp_chars(@{$posixes{'cased'}}), "\n"; + debug ":alpha: = ", disp_chars(@{$posixes{'alpha'}}), "\n"; + debug ":alnum: = ", disp_chars(@{$posixes{'alnum'}}), "\n"; + debug ' \w = ', disp_chars(@{$posixes{'word'}}), "\n"; + debug ":graph: = ", disp_chars(@{$posixes{'graph'}}), "\n"; + debug ":print: = ", disp_chars(@{$posixes{'print'}}), "\n"; + debug ' \d = ', disp_chars(@{$posixes{'digit'}}), "\n"; + debug ":xdigit: = ", disp_chars(@{$posixes{'xdigit'}}), "\n"; + debug ":blank: = ", disp_chars(@{$posixes{'blank'}}), "\n"; + debug ' \s = ', disp_chars(@{$posixes{'space'}}), "\n"; + debug ":punct: = ", disp_chars(@{$posixes{'punct'}}), "\n"; + debug ":cntrl: = ", disp_chars(@{$posixes{'cntrl'}}), "\n"; + debug ":ascii: = ", disp_chars(@{$posixes{'ascii'}}), "\n"; foreach (keys %UPPER) { @@ -966,10 +1063,10 @@ foreach my $Locale (@Locale) { } } - debug "# UPPER = ", disp_chars(sort { ord $a <=> ord $b } keys %UPPER), "\n"; - debug "# lower = ", disp_chars(sort { ord $a <=> ord $b } keys %lower), "\n"; - debug "# BoThCaSe = ", disp_chars(sort { ord $a <=> ord $b } keys %BoThCaSe), "\n"; - debug "# Unassigned = ", disp_chars(sort { ord $a <=> ord $b } keys %Unassigned), "\n"; + debug "UPPER = ", disp_chars(sort { ord $a <=> ord $b } keys %UPPER), "\n"; + debug "lower = ", disp_chars(sort { ord $a <=> ord $b } keys %lower), "\n"; + debug "BoThCaSe = ", disp_chars(sort { ord $a <=> ord $b } keys %BoThCaSe), "\n"; + debug "Unassigned = ", disp_chars(sort { ord $a <=> ord $b } keys %Unassigned), "\n"; my @failures; my @fold_failures; @@ -1040,7 +1137,7 @@ foreach my $Locale (@Locale) { @Added_alpha = sort { ord $a <=> ord $b } @Added_alpha; - debug "# Added_alpha = ", disp_chars(@Added_alpha), "\n"; + debug "Added_alpha = ", disp_chars(@Added_alpha), "\n"; # Cross-check the whole 8-bit character set. @@ -1107,7 +1204,7 @@ foreach my $Locale (@Locale) { (/[[:xdigit:]]/ xor /[[:^xdigit:]]/) || # effectively is what [:cased:] would be if it existed. - (/[[:upper:]]/i xor /[[:^upper:]]/i); + (/[[:upper:][:lower:]]/i xor /[^[:upper:][:lower:]]/i); } else { push @f, $_ unless (/[[:alpha:]]/ xor /[[:^alpha:]]/) || @@ -1123,7 +1220,7 @@ foreach my $Locale (@Locale) { (/[[:upper:]]/ xor /[[:^upper:]]/) || (/[[:word:]]/ xor /[[:^word:]]/) || (/[[:xdigit:]]/ xor /[[:^xdigit:]]/) || - (/[[:upper:]]/i xor /[[:^upper:]]/i); + (/[[:upper:][:lower:]]/i xor /[^[:upper:][:lower:]]/i); } } report_multi_result($Locale, $locales_test_number, \@f); @@ -1625,25 +1722,123 @@ foreach my $Locale (@Locale) { } report_result($Locale, $locales_test_number, $test == 0); if ($test) { - debug "# lesser = '$lesser'\n"; - debug "# greater = '$greater'\n"; - debug "# lesser cmp greater = ", + debug "lesser = '$lesser'\n"; + debug "greater = '$greater'\n"; + debug "lesser cmp greater = ", $lesser cmp $greater, "\n"; - debug "# greater cmp lesser = ", + debug "greater cmp lesser = ", $greater cmp $lesser, "\n"; - debug "# (greater) from = $from, to = $to\n"; + debug "(greater) from = $from, to = $to\n"; for my $ti (@test) { debugf("# %-40s %-4s", $ti, $test{$ti} ? 'FAIL' : 'ok'); if ($ti =~ /\(\.*(\$.+ +cmp +\$[^\)]+)\.*\)/) { debugf("(%s == %4d)", $1, eval $1); } - debug "\n#"; + debugf("\n#"); } last; } } + + use locale; + + my @sorted_controls = sort @{$posixes{'cntrl'}}; + my $output = ""; + for my $control (@sorted_controls) { + $output .= " " . disp_chars($control); + } + debug "sorted :cntrl: = $output\n"; + + ++$locales_test_number; + $test_names{$locales_test_number} + = 'Verify that \0 sorts before any other control'; + my $ok = $sorted_controls[0] eq "\0"; + report_result($Locale, $locales_test_number, $ok); + shift @sorted_controls; + my $lowest_control = $sorted_controls[0]; + + ++$locales_test_number; + $test_names{$locales_test_number} + = 'Skip in locales where all controls have primary sorting weight; ' + . 'otherwise verify that \0 doesn\'t have primary sorting weight'; + if ("a${lowest_control}c" lt "ab") { + report_result($Locale, $locales_test_number, 1); + } + else { + my $ok = "ab" lt "a\0c"; + report_result($Locale, $locales_test_number, $ok); + } + + ++$locales_test_number; + $test_names{$locales_test_number} + = 'Verify that strings with embedded NUL collate'; + $ok = "a\0a\0a" lt "a${lowest_control}a${lowest_control}a"; + report_result($Locale, $locales_test_number, $ok); + + ++$locales_test_number; + $test_names{$locales_test_number} + = 'Verify that strings with embedded NUL and ' + . 'extra trailing NUL collate'; + $ok = "a\0a\0" lt "a${lowest_control}a${lowest_control}"; + report_result($Locale, $locales_test_number, $ok); + + ++$locales_test_number; + $test_names{$locales_test_number} + = 'Verify that empty strings collate'; + $ok = "" le ""; + report_result($Locale, $locales_test_number, $ok); + + ++$locales_test_number; + $test_names{$locales_test_number} + = "Skip in non-UTF-8 locales; otherwise verify that UTF8ness " + . "doesn't matter with collation"; + if (! $is_utf8_locale) { + report_result($Locale, $locales_test_number, 1); + } + else { + + # khw can't think of anything better. Start with a string that is + # higher than its UTF-8 representation in both EBCDIC and ASCII + my $string = chr utf8::unicode_to_native(0xff); + my $utf8_string = $string; + utf8::upgrade($utf8_string); + + # 8 should be lt 9 in all locales (except ones that aren't + # ASCII-based, which might fail this) + $ok = ("a${string}8") lt ("a${utf8_string}9"); + report_result($Locale, $locales_test_number, $ok); + } + + ++$locales_test_number; + $test_names{$locales_test_number} + = "Skip in UTF-8 locales; otherwise verify that single byte " + . "collates before 0x100 and above"; + if ($is_utf8_locale) { + report_result($Locale, $locales_test_number, 1); + } + else { + my $max_collating = chr 0; # Find byte that collates highest + for my $i (0 .. 255) { + my $char = chr $i; + $max_collating = $char if $char gt $max_collating; + } + $ok = $max_collating lt chr 0x100; + report_result($Locale, $locales_test_number, $ok); + } + + ++$locales_test_number; + $test_names{$locales_test_number} + = "Skip in UTF-8 locales; otherwise verify that 0x100 and " + . "above collate in code point order"; + if ($is_utf8_locale) { + report_result($Locale, $locales_test_number, 1); + } + else { + $ok = chr 0x100 lt chr 0x101; + report_result($Locale, $locales_test_number, $ok); + } } my $ok1; @@ -1667,6 +1862,7 @@ foreach my $Locale (@Locale) { my $ok18; my $ok19; my $ok20; + my $ok21; my $c; my $d; @@ -1677,8 +1873,6 @@ foreach my $Locale (@Locale) { my $i; my $j; - my @times = CORE::localtime(); - if (! $is_utf8_locale) { use locale; @@ -1737,9 +1931,6 @@ foreach my $Locale (@Locale) { $ok17 = "1.5:1.25" eq sprintf("%g:%g", $h, $i); } $ok18 = $j eq sprintf("%g:%g", $h, $i); - $ok19 = POSIX::strftime("%p",@times) ne "%p"; # [perl #119425] - my $date = POSIX::strftime("%A %B %Z",@times); - $ok20 = $date =~ / ^ \p{ASCII}+ $ /x || ! utf8::is_utf8($date); } else { use locale ':not_characters'; @@ -1788,18 +1979,26 @@ foreach my $Locale (@Locale) { # Look for non-ASCII error messages, and verify that the first # such is in UTF-8 (the others almost certainly will be like the - # first). + # first). This is only done if the current locale has LC_MESSAGES $ok14 = 1; $ok14_5 = 1; - foreach my $err (keys %!) { - use Errno; - $! = eval "&Errno::$err"; # Convert to strerror() output - my $strerror = "$!"; - if ("$strerror" =~ /\P{ASCII}/) { - $ok14 = utf8::is_utf8($strerror); - no locale; - $ok14_5 = "$!" !~ /\P{ASCII}/; - last; + if ( locales_enabled('LC_MESSAGES') + && setlocale(&POSIX::LC_MESSAGES, $Locale)) + { + foreach my $err (keys %!) { + use Errno; + $! = eval "&Errno::$err"; # Convert to strerror() output + my $errnum = 0+$!; + my $strerror = "$!"; + if ("$strerror" =~ /\P{ASCII}/) { + $ok14 = utf8::is_utf8($strerror); + no locale; + $ok14_5 = "$!" !~ /\P{ASCII}/; + debug( disp_str( + "non-ASCII \$! for error $errnum='$strerror'")) + if ! $ok14_5; + last; + } } } @@ -1818,35 +2017,73 @@ foreach my $Locale (@Locale) { $ok17 = "1.5:1.25" eq sprintf("%g:%g", $h, $i); } $ok18 = $j eq sprintf("%g:%g", $h, $i); - $ok19 = POSIX::strftime("%p",@times) ne "%p"; # [perl #119425] - my $date = POSIX::strftime("%A %B %Z",@times); - $ok20 = $date =~ / ^ \p{ASCII}+ $ /x || utf8::is_utf8($date); + } + + $ok19 = $ok20 = 1; + if (setlocale(&POSIX::LC_TIME, $Locale)) { # These tests aren't affected by + # :not_characters + my @times = CORE::localtime(); + + use locale; + $ok19 = POSIX::strftime("%p", @times) ne "%p"; # [perl #119425] + my $date = POSIX::strftime("'%A' '%B' '%Z' '%p'", @times); + debug("'Day' 'Month' 'TZ' 'am/pm' = ", disp_str($date)); + + # If there is any non-ascii, it better be UTF-8 in a UTF-8 locale, and + # not UTF-8 if the locale isn't UTF-8. + $ok20 = $date =~ / ^ \p{ASCII}+ $ /x + || $is_utf8_locale == utf8::is_utf8($date); + } + + $ok21 = 1; + if (locales_enabled('LC_MESSAGES')) { + foreach my $err (keys %!) { + no locale; + use Errno; + $! = eval "&Errno::$err"; # Convert to strerror() output + my $strerror = "$!"; + if ($strerror =~ /\P{ASCII}/) { + $ok21 = 0; + debug(disp_str("non-ASCII strerror=$strerror")); + last; + } + } } report_result($Locale, ++$locales_test_number, $ok1); $test_names{$locales_test_number} = 'Verify that an intervening printf doesn\'t change assignment results'; my $first_a_test = $locales_test_number; - debug "# $first_a_test..$locales_test_number: \$a = $a, \$b = $b, Locale = $Locale\n"; + debug "$first_a_test..$locales_test_number: \$a = $a, \$b = $b, Locale = $Locale\n"; report_result($Locale, ++$locales_test_number, $ok2); $test_names{$locales_test_number} = 'Verify that an intervening sprintf doesn\'t change assignment results'; my $first_c_test = $locales_test_number; - report_result($Locale, ++$locales_test_number, $ok3); - $test_names{$locales_test_number} = 'Verify that a different locale radix works when doing "==" with a constant'; - $problematical_tests{$locales_test_number} = 1; + $test_names{++$locales_test_number} = 'Verify that a different locale radix works when doing "==" with a constant'; + if ($Config{usequadmath}) { + print "# Skip: no locale radix with usequadmath ($test_names{$locales_test_number})\n"; + report_result($Locale, $locales_test_number, 1); + } else { + report_result($Locale, $locales_test_number, $ok3); + $problematical_tests{$locales_test_number} = 1; + } - report_result($Locale, ++$locales_test_number, $ok4); - $test_names{$locales_test_number} = 'Verify that a different locale radix works when doing "==" with a scalar'; - $problematical_tests{$locales_test_number} = 1; + $test_names{++$locales_test_number} = 'Verify that a different locale radix works when doing "==" with a scalar'; + if ($Config{usequadmath}) { + print "# Skip: no locale radix with usequadmath ($test_names{$locales_test_number})\n"; + report_result($Locale, $locales_test_number, 1); + } else { + report_result($Locale, $locales_test_number, $ok4); + $problematical_tests{$locales_test_number} = 1; + } report_result($Locale, ++$locales_test_number, $ok5); $test_names{$locales_test_number} = 'Verify that a different locale radix works when doing "==" with a scalar and an intervening sprintf'; $problematical_tests{$locales_test_number} = 1; - debug "# $first_c_test..$locales_test_number: \$c = $c, \$d = $d, Locale = $Locale\n"; + debug "$first_c_test..$locales_test_number: \$c = $c, \$d = $d, Locale = $Locale\n"; report_result($Locale, ++$locales_test_number, $ok6); $test_names{$locales_test_number} = 'Verify that can assign stringified under inner no-locale block'; @@ -1855,11 +2092,16 @@ foreach my $Locale (@Locale) { report_result($Locale, ++$locales_test_number, $ok7); $test_names{$locales_test_number} = 'Verify that "==" with a scalar still works in inner no locale'; - report_result($Locale, ++$locales_test_number, $ok8); - $test_names{$locales_test_number} = 'Verify that "==" with a scalar and an intervening sprintf still works in inner no locale'; - $problematical_tests{$locales_test_number} = 1; + $test_names{++$locales_test_number} = 'Verify that "==" with a scalar and an intervening sprintf still works in inner no locale'; + if ($Config{usequadmath}) { + print "# Skip: no locale radix with usequadmath ($test_names{$locales_test_number})\n"; + report_result($Locale, $locales_test_number, 1); + } else { + report_result($Locale, $locales_test_number, $ok8); + $problematical_tests{$locales_test_number} = 1; + } - debug "# $first_e_test..$locales_test_number: \$e = $e, no locale\n"; + debug "$first_e_test..$locales_test_number: \$e = $e, no locale\n"; report_result($Locale, ++$locales_test_number, $ok9); $test_names{$locales_test_number} = 'Verify that after a no-locale block, a different locale radix still works when doing "==" with a constant'; @@ -1870,9 +2112,14 @@ foreach my $Locale (@Locale) { $test_names{$locales_test_number} = 'Verify that after a no-locale block, a different locale radix still works when doing "==" with a scalar'; $problematical_tests{$locales_test_number} = 1; - report_result($Locale, ++$locales_test_number, $ok11); - $test_names{$locales_test_number} = 'Verify that after a no-locale block, a different locale radix still works when doing "==" with a scalar and an intervening sprintf'; - $problematical_tests{$locales_test_number} = 1; + $test_names{++$locales_test_number} = 'Verify that after a no-locale block, a different locale radix still works when doing "==" with a scalar and an intervening sprintf'; + if ($Config{usequadmath}) { + print "# Skip: no locale radix with usequadmath ($test_names{$locales_test_number})\n"; + report_result($Locale, $locales_test_number, 1); + } else { + report_result($Locale, $locales_test_number, $ok11); + $problematical_tests{$locales_test_number} = 1; + } report_result($Locale, ++$locales_test_number, $ok12); $test_names{$locales_test_number} = 'Verify that after a no-locale block, a different locale radix can participate in an addition and function call as numeric'; @@ -1893,23 +2140,30 @@ foreach my $Locale (@Locale) { report_result($Locale, ++$locales_test_number, $ok16); $test_names{$locales_test_number} = 'Verify that a sprintf of a number with a UTF-8 radix yields UTF-8'; + $problematical_tests{$locales_test_number} = 1; report_result($Locale, ++$locales_test_number, $ok17); $test_names{$locales_test_number} = 'Verify that a sprintf of a number outside locale scope uses a dot radix'; report_result($Locale, ++$locales_test_number, $ok18); $test_names{$locales_test_number} = 'Verify that a sprintf of a number back within locale scope uses locale radix'; + $problematical_tests{$locales_test_number} = 1; report_result($Locale, ++$locales_test_number, $ok19); $test_names{$locales_test_number} = 'Verify that strftime doesn\'t return "%p" in locales where %p is empty'; report_result($Locale, ++$locales_test_number, $ok20); $test_names{$locales_test_number} = 'Verify that strftime returns date with UTF-8 flag appropriately set'; + $problematical_tests{$locales_test_number} = 1; # This is broken in + # OS X 10.9.3 + + report_result($Locale, ++$locales_test_number, $ok21); + $test_names{$locales_test_number} = '"$!" is ASCII only outside of locale scope'; - debug "# $first_f_test..$locales_test_number: \$f = $f, \$g = $g, back to locale = $Locale\n"; + debug "$first_f_test..$locales_test_number: \$f = $f, \$g = $g, back to locale = $Locale\n"; # Does taking lc separately differ from taking - # the lc "in-line"? (This was the bug 19990704.002, change #3568.) + # the lc "in-line"? (This was the bug 19990704.002 (#965), change #3568.) # The bug was in the caching of the 'o'-magic. if (! $is_utf8_locale) { use locale; @@ -1970,14 +2224,14 @@ foreach my $Locale (@Locale) { if (! $is_utf8_locale) { my $y = lc $x; next unless uc $y eq $x; - debug_more( "# UPPER=", disp_chars(($x)), + debug_more( "UPPER=", disp_chars(($x)), "; lc=", disp_chars(($y)), "; ", "; fc=", disp_chars((fc $x)), "; ", disp_chars(($x)), "=~/", disp_chars(($y)), "/i=", - $x =~ /$y/i ? 1 : 0, + $x =~ /\Q$y/i ? 1 : 0, "; ", disp_chars(($y)), "=~/", disp_chars(($x)), "/i=", - $y =~ /$x/i ? 1 : 0, + $y =~ /\Q$x/i ? 1 : 0, "\n"); # # If $x and $y contain regular expression characters @@ -2007,7 +2261,7 @@ foreach my $Locale (@Locale) { print "# Regex characters in '$x' or '$y', skipping test $locales_test_number for locale '$Locale'\n"; next; } - push @f, $x unless $x =~ /$y/i && $y =~ /$x/i; + push @f, $x unless $x =~ /\Q$y/i && $y =~ /\Q$x/i; # fc is not a locale concept, so Perl uses lc for it. push @f, $x unless lc $x eq fc $x; @@ -2016,17 +2270,17 @@ foreach my $Locale (@Locale) { use locale ':not_characters'; my $y = lc $x; next unless uc $y eq $x; - debug_more( "# UPPER=", disp_chars(($x)), + debug_more( "UPPER=", disp_chars(($x)), "; lc=", disp_chars(($y)), "; ", "; fc=", disp_chars((fc $x)), "; ", disp_chars(($x)), "=~/", disp_chars(($y)), "/i=", - $x =~ /$y/i ? 1 : 0, + $x =~ /\Q$y/i ? 1 : 0, "; ", disp_chars(($y)), "=~/", disp_chars(($x)), "/i=", - $y =~ /$x/i ? 1 : 0, + $y =~ /\Q$x/i ? 1 : 0, "\n"); - push @f, $x unless $x =~ /$y/i && $y =~ /$x/i; + push @f, $x unless $x =~ /\Q$y/i && $y =~ /\Q$x/i; # The places where Unicode's lc is different from fc are # skipped here by virtue of the 'next unless uc...' line above @@ -2038,20 +2292,20 @@ foreach my $Locale (@Locale) { if (! $is_utf8_locale) { my $y = uc $x; next unless lc $y eq $x; - debug_more( "# lower=", disp_chars(($x)), + debug_more( "lower=", disp_chars(($x)), "; uc=", disp_chars(($y)), "; ", "; fc=", disp_chars((fc $x)), "; ", disp_chars(($x)), "=~/", disp_chars(($y)), "/i=", - $x =~ /$y/i ? 1 : 0, + $x =~ /\Q$y/i ? 1 : 0, "; ", disp_chars(($y)), "=~/", disp_chars(($x)), "/i=", - $y =~ /$x/i ? 1 : 0, + $y =~ /\Q$x/i ? 1 : 0, "\n"); if ($x =~ $re || $y =~ $re) { # See above. print "# Regex characters in '$x' or '$y', skipping test $locales_test_number for locale '$Locale'\n"; next; } - push @f, $x unless $x =~ /$y/i && $y =~ /$x/i; + push @f, $x unless $x =~ /\Q$y/i && $y =~ /\Q$x/i; push @f, $x unless lc $x eq fc $x; } @@ -2059,16 +2313,16 @@ foreach my $Locale (@Locale) { use locale ':not_characters'; my $y = uc $x; next unless lc $y eq $x; - debug_more( "# lower=", disp_chars(($x)), + debug_more( "lower=", disp_chars(($x)), "; uc=", disp_chars(($y)), "; ", "; fc=", disp_chars((fc $x)), "; ", disp_chars(($x)), "=~/", disp_chars(($y)), "/i=", - $x =~ /$y/i ? 1 : 0, + $x =~ /\Q$y/i ? 1 : 0, "; ", disp_chars(($y)), "=~/", disp_chars(($x)), "/i=", - $y =~ /$x/i ? 1 : 0, + $y =~ /\Q$x/i ? 1 : 0, "\n"); - push @f, $x unless $x =~ /$y/i && $y =~ /$x/i; + push @f, $x unless $x =~ /\Q$y/i && $y =~ /\Q$x/i; push @f, $x unless lc $x eq fc $x; } @@ -2105,9 +2359,14 @@ foreach my $Locale (@Locale) { } } - report_result($Locale, $locales_test_number, @f == 0); - if (@f) { - print "# failed $locales_test_number locale '$Locale' numbers @f\n" + if ($Config{usequadmath}) { + print "# Skip: no locale radix with usequadmath ($Locale)\n"; + report_result($Locale, $locales_test_number, 1); + } else { + report_result($Locale, $locales_test_number, @f == 0); + if (@f) { + print "# failed $locales_test_number locale '$Locale' numbers @f\n" + } } } } @@ -2116,11 +2375,15 @@ my $final_locales_test_number = $locales_test_number; # Recount the errors. +TEST_NUM: foreach $test_num ($first_locales_test_number..$final_locales_test_number) { if (%setlocale_failed) { print "not "; } - elsif ($Problem{$test_num} || !defined $Okay{$test_num} || !@{$Okay{$test_num}}) { + elsif ($Problem{$test_num} + || ! defined $Okay{$test_num} + || ! @{$Okay{$test_num}}) + { if (defined $not_necessarily_a_problem_test_number && $test_num == $not_necessarily_a_problem_test_number) { @@ -2128,18 +2391,52 @@ foreach $test_num ($first_locales_test_number..$final_locales_test_number) { print "# It usually indicates a problem in the environment,\n"; print "# not in Perl itself.\n"; } - if ($Okay{$test_num} && grep { $_ == $test_num } keys %problematical_tests) { - no warnings 'experimental::autoderef'; + + # If there are any locales that pass this test, or are known-bad, it + # may be that there are enough passes that we TODO the failure. + if (($Okay{$test_num} || $Known_bad_locale{$test_num}) + && grep { $_ == $test_num } keys %problematical_tests) + { + no warnings 'experimental::postderef'; + + # Don't count the known-bad failures when calculating the + # percentage that fail. + my $known_failures = (exists $Known_bad_locale{$test_num}) + ? scalar(keys $Known_bad_locale{$test_num}->%*) + : 0; + my $adjusted_failures = scalar(keys $Problem{$test_num}->%*) + - $known_failures; + + # Specially handle failures where only known-bad locales fail. + # This makes the diagnositics clearer. + if ($adjusted_failures <= 0) { + print "not ok $test_num $test_names{$test_num} # TODO fails only on ", + "known bad locales: ", + join " ", keys $Known_bad_locale{$test_num}->%*, "\n"; + next TEST_NUM; + } + # Round to nearest .1% - my $percent_fail = (int(.5 + (1000 * scalar(keys $Problem{$test_num}) + my $percent_fail = (int(.5 + (1000 * $adjusted_failures / scalar(@Locale)))) / 10; - if (! $debug && $percent_fail < $acceptable_failure_percentage) - { - $test_names{$test_num} .= 'TODO'; - print "# ", 100 - $percent_fail, "% of locales pass the following test, so it is likely that the failures\n"; - print "# are errors in the locale definitions. The test is marked TODO, as the\n"; - print "# problem is not likely to be Perl's\n"; + if ($percent_fail < $acceptable_failure_percentage) { + if (! $debug) { + $test_names{$test_num} .= 'TODO'; + print "# ", 100 - $percent_fail, "% of locales not known to be problematic on this platform\n"; + print "# pass the following test, so it is likely that the failures\n"; + print "# are errors in the locale definitions. The test is marked TODO, as the\n"; + print "# problem is not likely to be Perl's\n"; + } + } + if ($debug) { + print "# $percent_fail% of locales (", + scalar(keys $Problem{$test_num}->%*), + " of ", + scalar(@Locale), + ") fail the above test (TODO cut-off is ", + $acceptable_failure_percentage, + "%)\n"; } } print "#\n"; @@ -2209,13 +2506,18 @@ setlocale(&POSIX::LC_ALL, "C"); # All casing operations under locale (but not :not_characters) should # taint if ($function =~ /^u/) { - @list = ("", "a", "\xe0", "\xff", "\x{fb00}", "\x{149}", "\x{101}"); - $ascii_case_change_delta = -32; + @list = ("", "a", + chr(utf8::unicode_to_native(0xe0)), + chr(utf8::unicode_to_native(0xff)), + "\x{fb00}", "\x{149}", "\x{101}"); + $ascii_case_change_delta = ($is_ebcdic) ? +64 : -32; $above_latin1_case_change_delta = -1; } else { - @list = ("", "A", "\xC0", "\x{17F}", "\x{100}"); - $ascii_case_change_delta = +32; + @list = ("", "A", + chr(utf8::unicode_to_native(0xC0)), + "\x{17F}", "\x{100}"); + $ascii_case_change_delta = ($is_ebcdic) ? -64 : +32; $above_latin1_case_change_delta = +1; } foreach my $is_utf8_locale (0 .. 1) { @@ -2226,11 +2528,12 @@ setlocale(&POSIX::LC_ALL, "C"); my $should_be; my $changed; if (! $is_utf8_locale) { + no warnings 'locale'; $should_be = ($j == $#list) ? chr(ord($char) + $above_latin1_case_change_delta) - : (length $char == 0 || ord($char) > 127) - ? $char - : chr(ord($char) + $ascii_case_change_delta); + : (length $char == 0 || utf8::native_to_unicode(ord($char)) > 127) + ? $char + : chr(ord($char) + $ascii_case_change_delta); # This monstrosity is in order to avoid using an eval, # which might perturb the results @@ -2302,6 +2605,12 @@ my $didwarn = 0; foreach ($first_locales_test_number..$final_locales_test_number) { if ($Problem{$_}) { my @f = sort keys %{ $Problem{$_} }; + + # Don't list the failures caused by known-bad locales. + if (exists $known_bad_locales{$^O}) { + @f = grep { $_ !~ $known_bad_locales{$^O} } @f; + next unless @f; + } my $f = join(" ", @f); $f =~ s/(.{50,60}) /$1\n#\t/g; print @@ -2315,7 +2624,7 @@ foreach ($first_locales_test_number..$final_locales_test_number) { print <