X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/28acfe03fc59abea4ef2451b134d560f411183ab..b05fd80e8a75b8bba137db6cd6688f7aa8a0ec39:/lib/locale.t diff --git a/lib/locale.t b/lib/locale.t index 081783b..31b40f9 100644 --- a/lib/locale.t +++ b/lib/locale.t @@ -19,17 +19,27 @@ BEGIN { print "1..0\n"; exit; } + require './loc_tools.pl'; $| = 1; } use strict; use feature 'fc'; -my $debug = 0; +# =1 adds debugging output; =2 increases the verbosity somewhat +my $debug = $ENV{PERL_DEBUG_FULL_TEST} // 0; # Certain tests have been shown to be problematical for a few locales. Don't # fail them unless at least this percentage of the tested locales fail. -my $acceptable_fold_failure_percentage = 5; +# On AIX machines, many locales call a no-break space a graphic. +# (There aren't 1000 locales currently in existence, so 99.9 works) +my $acceptable_failure_percentage = ($^O =~ / ^ ( AIX ) $ /ix) + ? 99.9 + : 5; + +# The list of test numbers of the problematic tests. +my %problematical_tests; + use Dumpvalue; @@ -40,37 +50,21 @@ my $dumper = Dumpvalue->new( ); sub debug { return unless $debug; - my($mess) = join "", @_; + my($mess) = join "", '# ', @_; chop $mess; print $dumper->stringify($mess,1), "\n"; } +sub debug_more { + return unless $debug > 1; + return debug(@_); +} + sub debugf { printf @_ if $debug; } -my $have_setlocale = 0; -eval { - require POSIX; - import POSIX ':locale_h'; - $have_setlocale++; -}; - -# Visual C's CRT goes silly on strings of the form "en_US.ISO8859-1" -# and mingw32 uses said silly CRT -# This doesn't seem to be an issue any more, at least on Windows XP, -# so re-enable the tests for Windows XP onwards. -my $winxp = ($^O eq 'MSWin32' && defined &Win32::GetOSVersion && - join('.', (Win32::GetOSVersion())[1..2]) >= 5.1); -$have_setlocale = 0 if ((($^O eq 'MSWin32' && !$winxp) || $^O eq 'NetWare') && - $Config{cc} =~ /^(cl|gcc)/i); - -# UWIN seems to loop after taint tests, just skip for now -$have_setlocale = 0 if ($^O =~ /^uwin/); - -sub LC_ALL (); - -$a = 'abc %'; +$a = 'abc %9'; my $test_num = 0; @@ -97,13 +91,15 @@ sub is_tainted { # hello, camel two. sub check_taint ($;$) { my $message_tail = $_[1] // ""; - $message_tail = ": $message_tail" if $message_tail; + + # Extra blanks are so aligns with taint_not output + $message_tail = ": $message_tail" if $message_tail; ok is_tainted($_[0]), "verify that is tainted$message_tail"; } sub check_taint_not ($;$) { my $message_tail = $_[1] // ""; - $message_tail = ": $message_tail" if $message_tail; + $message_tail = ": $message_tail" if $message_tail; ok((not is_tainted($_[0])), "verify that isn't tainted$message_tail"); } @@ -113,325 +109,583 @@ check_taint_not $&, "not tainted outside 'use locale'"; use locale; # engage locale and therefore locale taint. -check_taint_not $a; - -check_taint uc($a); -check_taint "\U$a"; -check_taint ucfirst($a); -check_taint "\u$a"; -check_taint lc($a); -check_taint fc($a); -check_taint "\L$a"; -check_taint "\F$a"; -check_taint lcfirst($a); -check_taint "\l$a"; - -check_taint_not sprintf('%e', 123.456); -check_taint_not sprintf('%f', 123.456); -check_taint_not sprintf('%g', 123.456); -check_taint_not sprintf('%d', 123.456); -check_taint_not sprintf('%x', 123.456); +# BE SURE TO COPY ANYTHING YOU ADD to these tests to the block below for +# ":notcharacters" + +check_taint_not $a, '$a'; + +check_taint uc($a), 'uc($a)'; +check_taint "\U$a", '"\U$a"'; +check_taint ucfirst($a), 'ucfirst($a)'; +check_taint "\u$a", '"\u$a"'; +check_taint lc($a), 'lc($a)'; +check_taint fc($a), 'fc($a)'; +check_taint "\L$a", '"\L$a"'; +check_taint "\F$a", '"\F$a"'; +check_taint lcfirst($a), 'lcfirst($a)'; +check_taint "\l$a", '"\l$a"'; + +check_taint_not sprintf('%e', 123.456), "sprintf('%e', 123.456)"; +check_taint_not sprintf('%f', 123.456), "sprintf('%f', 123.456)"; +check_taint_not sprintf('%g', 123.456), "sprintf('%g', 123.456)"; +check_taint_not sprintf('%d', 123.456), "sprintf('%d', 123.456)"; +check_taint_not sprintf('%x', 123.456), "sprintf('%x', 123.456)"; $_ = $a; # untaint $_ $_ = uc($a); # taint $_ -check_taint $_; +check_taint $_, '$_ = uc($a)'; /(\w)/; # taint $&, $`, $', $+, $1. -check_taint $&; -check_taint $`; -check_taint $'; -check_taint $+; -check_taint $1; -check_taint_not $2; +check_taint $&, "\$& from /(\\w)/"; +check_taint $`, "\t\$`"; +check_taint $', "\t\$'"; +check_taint $+, "\t\$+"; +check_taint $1, "\t\$1"; +check_taint_not $2, "\t\$2"; /(.)/; # untaint $&, $`, $', $+, $1. -check_taint_not $&; -check_taint_not $`; -check_taint_not $'; -check_taint_not $+; -check_taint_not $1; -check_taint_not $2; +check_taint_not $&, "\$& from /(.)/"; +check_taint_not $`, "\t\$`"; +check_taint_not $', "\t\$'"; +check_taint_not $+, "\t\$+"; +check_taint_not $1, "\t\$1"; +check_taint_not $2, "\t\$2"; /(\W)/; # taint $&, $`, $', $+, $1. -check_taint $&; -check_taint $`; -check_taint $'; -check_taint $+; -check_taint $1; -check_taint_not $2; +check_taint $&, "\$& from /(\\W)/"; +check_taint $`, "\t\$`"; +check_taint $', "\t\$'"; +check_taint $+, "\t\$+"; +check_taint $1, "\t\$1"; +check_taint_not $2, "\t\$2"; + +/(.)/; # untaint $&, $`, $', $+, $1. +check_taint_not $&, "\$& from /(.)/"; +check_taint_not $`, "\t\$`"; +check_taint_not $', "\t\$'"; +check_taint_not $+, "\t\$+"; +check_taint_not $1, "\t\$1"; +check_taint_not $2, "\t\$2"; /(\s)/; # taint $&, $`, $', $+, $1. -check_taint $&; -check_taint $`; -check_taint $'; -check_taint $+; -check_taint $1; -check_taint_not $2; +check_taint $&, "\$& from /(\\s)/"; +check_taint $`, "\t\$`"; +check_taint $', "\t\$'"; +check_taint $+, "\t\$+"; +check_taint $1, "\t\$1"; +check_taint_not $2, "\t\$2"; + +/(.)/; # untaint $&, $`, $', $+, $1. +check_taint_not $&, "\$& from /(.)/"; /(\S)/; # taint $&, $`, $', $+, $1. -check_taint $&; -check_taint $`; -check_taint $'; -check_taint $+; -check_taint $1; -check_taint_not $2; +check_taint $&, "\$& from /(\\S)/"; +check_taint $`, "\t\$`"; +check_taint $', "\t\$'"; +check_taint $+, "\t\$+"; +check_taint $1, "\t\$1"; +check_taint_not $2, "\t\$2"; + +/(.)/; # untaint $&, $`, $', $+, $1. +check_taint_not $&, "\$& from /(.)/"; + +"0" =~ /(\d)/; # taint $&, $`, $', $+, $1. +check_taint $&, "\$& from /(\\d)/"; +check_taint $`, "\t\$`"; +check_taint $', "\t\$'"; +check_taint $+, "\t\$+"; +check_taint $1, "\t\$1"; +check_taint_not $2, "\t\$2"; + +/(.)/; # untaint $&, $`, $', $+, $1. +check_taint_not $&, "\$& from /(.)/"; + +/(\D)/; # taint $&, $`, $', $+, $1. +check_taint $&, "\$& from /(\\D)/"; +check_taint $`, "\t\$`"; +check_taint $', "\t\$'"; +check_taint $+, "\t\$+"; +check_taint $1, "\t\$1"; +check_taint_not $2, "\t\$2"; + +/(.)/; # untaint $&, $`, $', $+, $1. +check_taint_not $&, "\$& from /(.)/"; + +/([[:alnum:]])/; # taint $&, $`, $', $+, $1. +check_taint $&, "\$& from /([[:alnum:]])/"; +check_taint $`, "\t\$`"; +check_taint $', "\t\$'"; +check_taint $+, "\t\$+"; +check_taint $1, "\t\$1"; +check_taint_not $2, "\t\$2"; + +/(.)/; # untaint $&, $`, $', $+, $1. +check_taint_not $&, "\$& from /(.)/"; + +/([[:^alnum:]])/; # taint $&, $`, $', $+, $1. +check_taint $&, "\$& from /([[:^alnum:]])/"; +check_taint $`, "\t\$`"; +check_taint $', "\t\$'"; +check_taint $+, "\t\$+"; +check_taint $1, "\t\$1"; +check_taint_not $2, "\t\$2"; + +"a" =~ /(a)|(\w)/; # taint $&, $`, $', $+, $1. +check_taint $&, "\$& from /(a)|(\\w)/"; +check_taint $`, "\t\$`"; +check_taint $', "\t\$'"; +check_taint $+, "\t\$+"; +check_taint $1, "\t\$1"; +ok($1 eq 'a', ("\t" x 5) . "\$1 is 'a'"); +ok(! defined $2, ("\t" x 5) . "\$2 is undefined"); +check_taint_not $2, "\t\$2"; +check_taint_not $3, "\t\$3"; + +/(.)/; # untaint $&, $`, $', $+, $1. +check_taint_not $&, "\$& from /(.)/"; + +"\N{CYRILLIC SMALL LETTER A}" =~ /(\N{CYRILLIC CAPITAL LETTER A})/i; # no tainting because no locale dependence +check_taint_not $&, "\$& from /(\\N{CYRILLIC CAPITAL LETTER A})/i"; +check_taint_not $`, "\t\$`"; +check_taint_not $', "\t\$'"; +check_taint_not $+, "\t\$+"; +check_taint_not $1, "\t\$1"; +ok($1 eq "\N{CYRILLIC SMALL LETTER A}", ("\t" x 4) . "\t\$1 is 'small cyrillic a'"); +check_taint_not $2, "\t\$2"; + +/(.)/; # untaint $&, $`, $', $+, $1. +check_taint_not $&, "\$& from /./"; + +"(\N{KELVIN SIGN})" =~ /(\N{KELVIN SIGN})/i; # taints because depends on locale +check_taint $&, "\$& from /(\\N{KELVIN SIGN})/i"; +check_taint $`, "\t\$`"; +check_taint $', "\t\$'"; +check_taint $+, "\t\$+"; +check_taint $1, "\t\$1"; +check_taint_not $2, "\t\$2"; + +/(.)/; # untaint $&, $`, $', $+, $1. +check_taint_not $&, "\$& from /(.)/"; + +"a:" =~ /(.)\b(.)/; # taint $&, $`, $', $+, $1. +check_taint $&, "\$& from /(.)\\b(.)/"; +check_taint $`, "\t\$`"; +check_taint $', "\t\$'"; +check_taint $+, "\t\$+"; +check_taint $1, "\t\$1"; +check_taint $2, "\t\$2"; +check_taint_not $3, "\t\$3"; + +/(.)/; # untaint $&, $`, $', $+, $1. +check_taint_not $&, "\$& from /./"; + +"aa" =~ /(.)\B(.)/; # taint $&, $`, $', $+, $1. +check_taint $&, "\$& from /(.)\\B(.)/"; +check_taint $`, "\t\$`"; +check_taint $', "\t\$'"; +check_taint $+, "\t\$+"; +check_taint $1, "\t\$1"; +check_taint $2, "\t\$2"; +check_taint_not $3, "\t\$3"; + +/(.)/; # untaint $&, $`, $', $+, $1. +check_taint_not $&, "\$& from /./"; + +"aaa" =~ /(.).(\1)/i; # notaint because not locale dependent +check_taint_not $&, "\$ & from /(.).(\\1)/"; +check_taint_not $`, "\t\$`"; +check_taint_not $', "\t\$'"; +check_taint_not $+, "\t\$+"; +check_taint_not $1, "\t\$1"; +check_taint_not $2, "\t\$2"; +check_taint_not $3, "\t\$3"; + +/(.)/; # untaint $&, $`, $', $+, $1. +check_taint_not $&, "\$ & from /./"; $_ = $a; # untaint $_ -check_taint_not $_; +check_taint_not $_, 'untainting $_ works'; /(b)/; # this must not taint -check_taint_not $&; -check_taint_not $`; -check_taint_not $'; -check_taint_not $+; -check_taint_not $1; -check_taint_not $2; +check_taint_not $&, "\$ & from /(b)/"; +check_taint_not $`, "\t\$`"; +check_taint_not $', "\t\$'"; +check_taint_not $+, "\t\$+"; +check_taint_not $1, "\t\$1"; +check_taint_not $2, "\t\$2"; $_ = $a; # untaint $_ -check_taint_not $_; +check_taint_not $_, 'untainting $_ works'; $b = uc($a); # taint $b s/(.+)/$b/; # this must taint only the $_ -check_taint $_; -check_taint_not $&; -check_taint_not $`; -check_taint_not $'; -check_taint_not $+; -check_taint_not $1; -check_taint_not $2; +check_taint $_, '$_ (wasn\'t tainted) from s/(.+)/$b/ where $b is tainted'; +check_taint_not $&, "\t\$&"; +check_taint_not $`, "\t\$`"; +check_taint_not $', "\t\$'"; +check_taint_not $+, "\t\$+"; +check_taint_not $1, "\t\$1"; +check_taint_not $2, "\t\$2"; $_ = $a; # untaint $_ s/(.+)/b/; # this must not taint -check_taint_not $_; -check_taint_not $&; -check_taint_not $`; -check_taint_not $'; -check_taint_not $+; -check_taint_not $1; -check_taint_not $2; +check_taint_not $_, '$_ (wasn\'t tainted) from s/(.+)/b/'; +check_taint_not $&, "\t\$&"; +check_taint_not $`, "\t\$`"; +check_taint_not $', "\t\$'"; +check_taint_not $+, "\t\$+"; +check_taint_not $1, "\t\$1"; +check_taint_not $2, "\t\$2"; $b = $a; # untaint $b ($b = $a) =~ s/\w/$&/; -check_taint $b; # $b should be tainted. -check_taint_not $a; # $a should be not. +check_taint $b, '$b from ($b = $a) =~ s/\w/$&/'; # $b should be tainted. +check_taint_not $a, '$a from ($b = $a) =~ s/\w/$&/'; # $a should be not. $_ = $a; # untaint $_ s/(\w)/\l$1/; # this must taint -check_taint $_; -check_taint $&; -check_taint $`; -check_taint $'; -check_taint $+; -check_taint $1; -check_taint_not $2; +check_taint $_, '$_ (wasn\'t tainted) from s/(\w)/\l$1/,'; # this must taint +check_taint $&, "\t\$&"; +check_taint $`, "\t\$`"; +check_taint $', "\t\$'"; +check_taint $+, "\t\$+"; +check_taint $1, "\t\$1"; +check_taint_not $2, "\t\$2"; $_ = $a; # untaint $_ s/(\w)/\L$1/; # this must taint -check_taint $_; -check_taint $&; -check_taint $`; -check_taint $'; -check_taint $+; -check_taint $1; -check_taint_not $2; +check_taint $_, '$_ (wasn\'t tainted) from s/(\w)/\L$1/,'; +check_taint $&, "\t\$&"; +check_taint $`, "\t\$`"; +check_taint $', "\t\$'"; +check_taint $+, "\t\$+"; +check_taint $1, "\t\$1"; +check_taint_not $2, "\t\$2"; $_ = $a; # untaint $_ s/(\w)/\u$1/; # this must taint -check_taint $_; -check_taint $&; -check_taint $`; -check_taint $'; -check_taint $+; -check_taint $1; -check_taint_not $2; +check_taint $_, '$_ (wasn\'t tainted) from s/(\w)/\u$1/'; +check_taint $&, "\t\$&"; +check_taint $`, "\t\$`"; +check_taint $', "\t\$'"; +check_taint $+, "\t\$+"; +check_taint $1, "\t\$1"; +check_taint_not $2, "\t\$2"; $_ = $a; # untaint $_ s/(\w)/\U$1/; # this must taint -check_taint $_; -check_taint $&; -check_taint $`; -check_taint $'; -check_taint $+; -check_taint $1; -check_taint_not $2; +check_taint $_, '$_ (wasn\'t tainted) from s/(\w)/\U$1/'; +check_taint $&, "\t\$&"; +check_taint $`, "\t\$`"; +check_taint $', "\t\$'"; +check_taint $+, "\t\$+"; +check_taint $1, "\t\$1"; +check_taint_not $2, "\t\$2"; # After all this tainting $a should be cool. -check_taint_not $a; +check_taint_not $a, '$a still not tainted'; + +"a" =~ /([a-z])/; +check_taint_not $1, '"a" =~ /([a-z])/'; +"foo.bar_baz" =~ /^(.*)[._](.*?)$/; # Bug 120675 +check_taint_not $1, '"foo.bar_baz" =~ /^(.*)[._](.*?)$/'; + +# BE SURE TO COPY ANYTHING YOU ADD to the block below { # This is just the previous tests copied here with a different # compile-time pragma. use locale ':not_characters'; # engage restricted locale with different # tainting rules - - check_taint_not $a; - - check_taint_not uc($a); - check_taint_not "\U$a"; - check_taint_not ucfirst($a); - check_taint_not "\u$a"; - check_taint_not lc($a); - check_taint_not fc($a); - check_taint_not "\L$a"; - check_taint_not "\F$a"; - check_taint_not lcfirst($a); - check_taint_not "\l$a"; - - check_taint_not sprintf('%e', 123.456); - check_taint_not sprintf('%f', 123.456); - check_taint_not sprintf('%g', 123.456); - check_taint_not sprintf('%d', 123.456); - check_taint_not sprintf('%x', 123.456); + check_taint_not $a, '$a'; + + check_taint_not uc($a), 'uc($a)'; + check_taint_not "\U$a", '"\U$a"'; + check_taint_not ucfirst($a), 'ucfirst($a)'; + check_taint_not "\u$a", '"\u$a"'; + check_taint_not lc($a), 'lc($a)'; + check_taint_not fc($a), 'fc($a)'; + check_taint_not "\L$a", '"\L$a"'; + check_taint_not "\F$a", '"\F$a"'; + check_taint_not lcfirst($a), 'lcfirst($a)'; + check_taint_not "\l$a", '"\l$a"'; + + check_taint_not sprintf('%e', 123.456), "sprintf('%e', 123.456)"; + check_taint_not sprintf('%f', 123.456), "sprintf('%f', 123.456)"; + check_taint_not sprintf('%g', 123.456), "sprintf('%g', 123.456)"; + check_taint_not sprintf('%d', 123.456), "sprintf('%d', 123.456)"; + check_taint_not sprintf('%x', 123.456), "sprintf('%x', 123.456)"; $_ = $a; # untaint $_ - $_ = uc($a); # taint $_ + $_ = uc($a); - check_taint_not $_; + check_taint_not $_, '$_ = uc($a)'; - /(\w)/; # taint $&, $`, $', $+, $1. - check_taint_not $&; - check_taint_not $`; - check_taint_not $'; - check_taint_not $+; - check_taint_not $1; - check_taint_not $2; + /(\w)/; + check_taint_not $&, "\$& from /(\\w)/"; + check_taint_not $`, "\t\$`"; + check_taint_not $', "\t\$'"; + check_taint_not $+, "\t\$+"; + check_taint_not $1, "\t\$1"; + check_taint_not $2, "\t\$2"; /(.)/; # untaint $&, $`, $', $+, $1. - check_taint_not $&; - check_taint_not $`; - check_taint_not $'; - check_taint_not $+; - check_taint_not $1; - check_taint_not $2; - - /(\W)/; # taint $&, $`, $', $+, $1. - check_taint_not $&; - check_taint_not $`; - check_taint_not $'; - check_taint_not $+; - check_taint_not $1; - check_taint_not $2; - - /(\s)/; # taint $&, $`, $', $+, $1. - check_taint_not $&; - check_taint_not $`; - check_taint_not $'; - check_taint_not $+; - check_taint_not $1; - check_taint_not $2; - - /(\S)/; # taint $&, $`, $', $+, $1. - check_taint_not $&; - check_taint_not $`; - check_taint_not $'; - check_taint_not $+; - check_taint_not $1; - check_taint_not $2; + check_taint_not $&, "\$& from /(.)/"; + check_taint_not $`, "\t\$`"; + check_taint_not $', "\t\$'"; + check_taint_not $+, "\t\$+"; + check_taint_not $1, "\t\$1"; + check_taint_not $2, "\t\$2"; + + /(\W)/; + check_taint_not $&, "\$& from /(\\W)/"; + check_taint_not $`, "\t\$`"; + check_taint_not $', "\t\$'"; + check_taint_not $+, "\t\$+"; + check_taint_not $1, "\t\$1"; + check_taint_not $2, "\t\$2"; - $_ = $a; # untaint $_ + /(.)/; # untaint $&, $`, $', $+, $1. + check_taint_not $&, "\$& from /(.)/"; + check_taint_not $`, "\t\$`"; + check_taint_not $', "\t\$'"; + check_taint_not $+, "\t\$+"; + check_taint_not $1, "\t\$1"; + check_taint_not $2, "\t\$2"; + + /(\s)/; + check_taint_not $&, "\$& from /(\\s)/"; + check_taint_not $`, "\t\$`"; + check_taint_not $', "\t\$'"; + check_taint_not $+, "\t\$+"; + check_taint_not $1, "\t\$1"; + check_taint_not $2, "\t\$2"; - check_taint_not $_; + /(.)/; # untaint $&, $`, $', $+, $1. + check_taint_not $&, "\$& from /(.)/"; - /(b)/; # this must not taint - check_taint_not $&; - check_taint_not $`; - check_taint_not $'; - check_taint_not $+; - check_taint_not $1; - check_taint_not $2; + /(\S)/; + check_taint_not $&, "\$& from /(\\S)/"; + check_taint_not $`, "\t\$`"; + check_taint_not $', "\t\$'"; + check_taint_not $+, "\t\$+"; + check_taint_not $1, "\t\$1"; + check_taint_not $2, "\t\$2"; - $_ = $a; # untaint $_ + /(.)/; # untaint $&, $`, $', $+, $1. + check_taint_not $&, "\$& from /(.)/"; + + "0" =~ /(\d)/; + check_taint_not $&, "\$& from /(\\d)/"; + check_taint_not $`, "\t\$`"; + check_taint_not $', "\t\$'"; + check_taint_not $+, "\t\$+"; + check_taint_not $1, "\t\$1"; + check_taint_not $2, "\t\$2"; + + /(.)/; # untaint $&, $`, $', $+, $1. + check_taint_not $&, "\$& from /(.)/"; + + /(\D)/; + check_taint_not $&, "\$& from /(\\D)/"; + check_taint_not $`, "\t\$`"; + check_taint_not $', "\t\$'"; + check_taint_not $+, "\t\$+"; + check_taint_not $1, "\t\$1"; + check_taint_not $2, "\t\$2"; + + /(.)/; # untaint $&, $`, $', $+, $1. + check_taint_not $&, "\$& from /(.)/"; + + /([[:alnum:]])/; + check_taint_not $&, "\$& from /([[:alnum:]])/"; + check_taint_not $`, "\t\$`"; + check_taint_not $', "\t\$'"; + check_taint_not $+, "\t\$+"; + check_taint_not $1, "\t\$1"; + check_taint_not $2, "\t\$2"; + + /(.)/; # untaint $&, $`, $', $+, $1. + check_taint_not $&, "\$& from /(.)/"; + + /([[:^alnum:]])/; + check_taint_not $&, "\$& from /([[:^alnum:]])/"; + check_taint_not $`, "\t\$`"; + check_taint_not $', "\t\$'"; + check_taint_not $+, "\t\$+"; + check_taint_not $1, "\t\$1"; + check_taint_not $2, "\t\$2"; + + "a" =~ /(a)|(\w)/; + check_taint_not $&, "\$& from /(a)|(\\w)/"; + check_taint_not $`, "\t\$`"; + check_taint_not $', "\t\$'"; + check_taint_not $+, "\t\$+"; + check_taint_not $1, "\t\$1"; + ok($1 eq 'a', ("\t" x 5) . "\$1 is 'a'"); + ok(! defined $2, ("\t" x 5) . "\$2 is undefined"); + check_taint_not $2, "\t\$2"; + check_taint_not $3, "\t\$3"; + + /(.)/; # untaint $&, $`, $', $+, $1. + check_taint_not $&, "\$& from /(.)/"; + + "\N{CYRILLIC SMALL LETTER A}" =~ /(\N{CYRILLIC CAPITAL LETTER A})/i; + check_taint_not $&, "\$& from /(\\N{CYRILLIC CAPITAL LETTER A})/i"; + check_taint_not $`, "\t\$`"; + check_taint_not $', "\t\$'"; + check_taint_not $+, "\t\$+"; + check_taint_not $1, "\t\$1"; + ok($1 eq "\N{CYRILLIC SMALL LETTER A}", ("\t" x 4) . "\t\$1 is 'small cyrillic a'"); + check_taint_not $2, "\t\$2"; + + /(.)/; # untaint $&, $`, $', $+, $1. + check_taint_not $&, "\$& from /./"; + + "(\N{KELVIN SIGN})" =~ /(\N{KELVIN SIGN})/i; + check_taint_not $&, "\$& from /(\\N{KELVIN SIGN})/i"; + check_taint_not $`, "\t\$`"; + check_taint_not $', "\t\$'"; + check_taint_not $+, "\t\$+"; + check_taint_not $1, "\t\$1"; + check_taint_not $2, "\t\$2"; + + /(.)/; # untaint $&, $`, $', $+, $1. + check_taint_not $&, "\$& from /(.)/"; + + "a:" =~ /(.)\b(.)/; + check_taint_not $&, "\$& from /(.)\\b(.)/"; + check_taint_not $`, "\t\$`"; + check_taint_not $', "\t\$'"; + check_taint_not $+, "\t\$+"; + check_taint_not $1, "\t\$1"; + check_taint_not $2, "\t\$2"; + check_taint_not $3, "\t\$3"; + + /(.)/; # untaint $&, $`, $', $+, $1. + check_taint_not $&, "\$& from /./"; + + "aa" =~ /(.)\B(.)/; + check_taint_not $&, "\$& from /(.)\\B(.)/"; + check_taint_not $`, "\t\$`"; + check_taint_not $', "\t\$'"; + check_taint_not $+, "\t\$+"; + check_taint_not $1, "\t\$1"; + check_taint_not $2, "\t\$2"; + check_taint_not $3, "\t\$3"; + + /(.)/; # untaint $&, $`, $', $+, $1. + check_taint_not $&, "\$& from /./"; + + "aaa" =~ /(.).(\1)/i; # notaint because not locale dependent + check_taint_not $&, "\$ & from /(.).(\\1)/"; + check_taint_not $`, "\t\$`"; + check_taint_not $', "\t\$'"; + check_taint_not $+, "\t\$+"; + check_taint_not $1, "\t\$1"; + check_taint_not $2, "\t\$2"; + check_taint_not $3, "\t\$3"; - check_taint_not $_; + /(.)/; # untaint $&, $`, $', $+, $1. + check_taint_not $&, "\$ & from /./"; + + $_ = $a; # untaint $_ - $b = uc($a); # taint $b - s/(.+)/$b/; # this must taint only the $_ + check_taint_not $_, 'untainting $_ works'; - check_taint_not $_; - check_taint_not $&; - check_taint_not $`; - check_taint_not $'; - check_taint_not $+; - check_taint_not $1; - check_taint_not $2; + /(b)/; + check_taint_not $&, "\$ & from /(b)/"; + check_taint_not $`, "\t\$`"; + check_taint_not $', "\t\$'"; + check_taint_not $+, "\t\$+"; + check_taint_not $1, "\t\$1"; + check_taint_not $2, "\t\$2"; $_ = $a; # untaint $_ - s/(.+)/b/; # this must not taint - check_taint_not $_; - check_taint_not $&; - check_taint_not $`; - check_taint_not $'; - check_taint_not $+; - check_taint_not $1; - check_taint_not $2; + check_taint_not $_, 'untainting $_ works'; + + s/(.+)/b/; + check_taint_not $_, '$_ (wasn\'t tainted) from s/(.+)/b/'; + check_taint_not $&, "\t\$&"; + check_taint_not $`, "\t\$`"; + check_taint_not $', "\t\$'"; + check_taint_not $+, "\t\$+"; + check_taint_not $1, "\t\$1"; + check_taint_not $2, "\t\$2"; $b = $a; # untaint $b ($b = $a) =~ s/\w/$&/; - check_taint_not $b; # $b should be tainted. - check_taint_not $a; # $a should be not. + check_taint_not $b, '$b from ($b = $a) =~ s/\w/$&/'; + check_taint_not $a, '$a from ($b = $a) =~ s/\w/$&/'; $_ = $a; # untaint $_ - s/(\w)/\l$1/; # this must taint - check_taint_not $_; - check_taint_not $&; - check_taint_not $`; - check_taint_not $'; - check_taint_not $+; - check_taint_not $1; - check_taint_not $2; + s/(\w)/\l$1/; + check_taint_not $_, '$_ (wasn\'t tainted) from s/(\w)/\l$1/,'; # this must taint + check_taint_not $&, "\t\$&"; + check_taint_not $`, "\t\$`"; + check_taint_not $', "\t\$'"; + check_taint_not $+, "\t\$+"; + check_taint_not $1, "\t\$1"; + check_taint_not $2, "\t\$2"; $_ = $a; # untaint $_ - s/(\w)/\L$1/; # this must taint - check_taint_not $_; - check_taint_not $&; - check_taint_not $`; - check_taint_not $'; - check_taint_not $+; - check_taint_not $1; - check_taint_not $2; + s/(\w)/\L$1/; + check_taint_not $_, '$_ (wasn\'t tainted) from s/(\w)/\L$1/,'; + check_taint_not $&, "\t\$&"; + check_taint_not $`, "\t\$`"; + check_taint_not $', "\t\$'"; + check_taint_not $+, "\t\$+"; + check_taint_not $1, "\t\$1"; + check_taint_not $2, "\t\$2"; $_ = $a; # untaint $_ - s/(\w)/\u$1/; # this must taint - check_taint_not $_; - check_taint_not $&; - check_taint_not $`; - check_taint_not $'; - check_taint_not $+; - check_taint_not $1; - check_taint_not $2; + s/(\w)/\u$1/; + check_taint_not $_, '$_ (wasn\'t tainted) from s/(\w)/\u$1/'; + check_taint_not $&, "\t\$&"; + check_taint_not $`, "\t\$`"; + check_taint_not $', "\t\$'"; + check_taint_not $+, "\t\$+"; + check_taint_not $1, "\t\$1"; + check_taint_not $2, "\t\$2"; $_ = $a; # untaint $_ - s/(\w)/\U$1/; # this must taint - check_taint_not $_; - check_taint_not $&; - check_taint_not $`; - check_taint_not $'; - check_taint_not $+; - check_taint_not $1; - check_taint_not $2; + s/(\w)/\U$1/; + check_taint_not $_, '$_ (wasn\'t tainted) from s/(\w)/\U$1/'; + check_taint_not $&, "\t\$&"; + check_taint_not $`, "\t\$`"; + check_taint_not $', "\t\$'"; + check_taint_not $+, "\t\$+"; + check_taint_not $1, "\t\$1"; + check_taint_not $2, "\t\$2"; # After all this tainting $a should be cool. - check_taint_not $a; + check_taint_not $a, '$a still not tainted'; + + "a" =~ /([a-z])/; + check_taint_not $1, '"a" =~ /([a-z])/'; + "foo.bar_baz" =~ /^(.*)[._](.*?)$/; # Bug 120675 + check_taint_not $1, '"foo.bar_baz" =~ /^(.*)[._](.*?)$/'; + } # Here are in scope of 'use locale' @@ -440,294 +694,252 @@ check_taint_not $a; # Let us do some *real* locale work now, # unless setlocale() is missing (i.e. minitest). -unless ($have_setlocale) { - print "1..$test_num\n"; - exit; -} - # The test number before our first setlocale() my $final_without_setlocale = $test_num; # Find locales. -debug "# Scanning for locales...\n"; - -# Note that it's okay that some languages have their native names -# capitalized here even though that's not "right". They are lowercased -# anyway later during the scanning process (and besides, some clueless -# vendor might have them capitalized erroneously anyway). - -my $locales = < ord $b } @_; + my $output = ""; + my $range_start; + my $start_class; + push @chars, chr(258); # This sentinel simplifies the loop termination + # logic + foreach my $i (0 .. @chars - 1) { + my $char = $chars[$i]; + my $range_end; + my $class; + + # We avoid using [:posix:] classes, as these are being tested in this + # file. Each equivalence class below is for things that can appear in + # a range; those that can't be in a range have class -1. 0 for those + # which should be output in hex; and >0 for the other ranges + if ($char =~ /[A-Z]/) { + $class = 2; + } + elsif ($char =~ /[a-z]/) { + $class = 3; + } + elsif ($char =~ /[0-9]/) { + $class = 4; + } + # Uncomment to get literal punctuation displayed instead of hex + #elsif ($char =~ /[[\]!"#\$\%&\'()*+,.\/:\\;<=>?\@\^_`{|}~-]/) { + # $class = -1; # Punct never appears in a range + #} + else { + $class = 0; # Output in hex + } -# Sanitize the environment so that we can run the external 'locale' -# program without the taint mode getting grumpy. - -# $ENV{PATH} is special in VMS. -delete $ENV{PATH} if $^O ne 'VMS' or $Config{d_setenv}; - -# Other subversive stuff. -delete @ENV{qw(IFS CDPATH ENV BASH_ENV)}; - -if (-x "/usr/bin/locale" && open(LOCALES, "/usr/bin/locale -a 2>/dev/null|")) { - while () { - # It seems that /usr/bin/locale steadfastly outputs 8 bit data, which - # ain't great when we're running this testPERL_UNICODE= so that utf8 - # locales will cause all IO hadles to default to (assume) utf8 - next unless utf8::valid($_); - chomp; - trylocale($_); - } - close(LOCALES); -} elsif ($^O eq 'VMS' && defined($ENV{'SYS$I18N_LOCALE'}) && -d 'SYS$I18N_LOCALE') { -# The SYS$I18N_LOCALE logical name search list was not present on -# VAX VMS V5.5-12, but was on AXP && VAX VMS V6.2 as well as later versions. - opendir(LOCALES, "SYS\$I18N_LOCALE:"); - while ($_ = readdir(LOCALES)) { - chomp; - trylocale($_); - } - close(LOCALES); -} elsif ($^O eq 'openbsd' && -e '/usr/share/locale') { - - # OpenBSD doesn't have a locale executable, so reading /usr/share/locale - # is much easier and faster than the last resort method. - - opendir(LOCALES, '/usr/share/locale'); - while ($_ = readdir(LOCALES)) { - chomp; - trylocale($_); - } - close(LOCALES); -} else { - - # This is going to be slow. - - foreach my $locale (split(/\n/, $locales)) { - my ($locale_name, $language_codes, $country_codes, $encodings) = - split(/:/, $locale); - my @enc = decode_encodings($encodings); - foreach my $loc (split(/ /, $locale_name)) { - trylocale($loc); - foreach my $enc (@enc) { - trylocale("$loc.$enc"); - } - $loc = lc $loc; - foreach my $enc (@enc) { - trylocale("$loc.$enc"); - } - } - foreach my $lang (split(/ /, $language_codes)) { - trylocale($lang); - foreach my $country (split(/ /, $country_codes)) { - my $lc = "${lang}_${country}"; - trylocale($lc); - foreach my $enc (@enc) { - trylocale("$lc.$enc"); - } - my $lC = "${lang}_\U${country}"; - trylocale($lC); - foreach my $enc (@enc) { - trylocale("$lC.$enc"); - } - } - } - } -} + if (! defined $range_start) { + if ($class < 0) { + $output .= " " . $char; + } + else { + $range_start = ord $char; + $start_class = $class; + } + } # A range ends if not consecutive, or the class-type changes + elsif (ord $char != ($range_end = ord($chars[$i-1])) + 1 + || $class != $start_class) + { -setlocale(LC_ALL, "C"); + # Here, the current character is not in the range. This means the + # previous character must have been. Output the range up through + # that one. + my $range_length = $range_end - $range_start + 1; + if ($start_class > 0) { + $output .= " " . chr($range_start); + $output .= "-" . chr($range_end) if $range_length > 1; + } + else { + $output .= sprintf(" %02X", $range_start); + $output .= sprintf("-%02X", $range_end) if $range_length > 1; + } -if ($^O eq 'darwin') { - # Darwin 8/Mac OS X 10.4 and 10.5 have bad Basque locales: perl bug #35895, - # Apple bug ID# 4139653. It also has a problem in Byelorussian. - (my $v) = $Config{osvers} =~ /^(\d+)/; - if ($v >= 8 and $v < 10) { - debug "# Skipping eu_ES, be_BY locales -- buggy in Darwin\n"; - @Locale = grep ! m/^(eu_ES(?:\..*)?|be_BY\.CP1131)$/, @Locale; - } elsif ($v < 12) { - debug "# Skipping be_BY locales -- buggy in Darwin\n"; - @Locale = grep ! m/^be_BY\.CP1131$/, @Locale; + # Handle the new current character, as potentially beginning a new + # range + undef $range_start; + redo; + } } + + $output =~ s/^ //; + return $output; } -@Locale = sort @Locale; +sub disp_str ($) { + my $string = shift; -debug "# Locales =\n"; -for ( @Locale ) { - debug "# $_\n"; -} + # Displays the string unambiguously. ASCII printables are always output + # as-is, though perhaps separated by blanks from other characters. If + # entirely printable ASCII, just returns the string. Otherwise if valid + # UTF-8 it uses the character names for non-printable-ASCII. Otherwise it + # outputs hex for each non-ASCII-printable byte. -my %Problem; -my %Okay; -my %Testing; -my @Neoalpha; # Alnums that aren't in the C locale. -my %test_names; + return $string if $string =~ / ^ [[:print:]]* $/xa; -sub tryneoalpha { - my ($Locale, $i, $test, $message) = @_; + my $result = ""; + my $prev_was_punct = 1; # Beginning is considered punct + if (utf8::valid($string) && utf8::is_utf8($string)) { + use charnames (); + foreach my $char (split "", $string) { + + # Keep punctuation adjacent to other characters; otherwise + # separate them with a blank + if ($char =~ /[[:punct:]]/a) { + $result .= $char; + $prev_was_punct = 1; + } + elsif ($char =~ /[[:print:]]/a) { + $result .= " " unless $prev_was_punct; + $result .= $char; + $prev_was_punct = 0; + } + else { + $result .= " " unless $prev_was_punct; + $result .= charnames::viacode(ord $char); + $prev_was_punct = 0; + } + } + } + else { + use bytes; + foreach my $char (split "", $string) { + if ($char =~ /[[:punct:]]/a) { + $result .= $char; + $prev_was_punct = 1; + } + elsif ($char =~ /[[:print:]]/a) { + $result .= " " unless $prev_was_punct; + $result .= $char; + $prev_was_punct = 0; + } + else { + $result .= " " unless $prev_was_punct; + $result .= sprintf("%02X", ord $char); + $prev_was_punct = 0; + } + } + } + + return $result; +} + +sub report_result { + my ($Locale, $i, $pass_fail, $message) = @_; $message //= ""; $message = " ($message)" if $message; - unless ($test) { + unless ($pass_fail) { $Problem{$i}{$Locale} = 1; - debug "# failed $i with locale '$Locale'$message\n"; + debug "failed $i ($test_names{$i}) with locale '$Locale'$message\n"; } else { push @{$Okay{$i}}, $Locale; } } +sub report_multi_result { + my ($Locale, $i, $results_ref) = @_; + + # $results_ref points to an array, each element of which is a character that was + # in error for this test numbered '$i'. If empty, the test passed + + my $message = ""; + if (@$results_ref) { + $message = join " ", "for", disp_chars(@$results_ref); + } + report_result($Locale, $i, @$results_ref == 0, $message); +} + my $first_locales_test_number = $final_without_setlocale + 1; my $locales_test_number; my $not_necessarily_a_problem_test_number; my $first_casing_test_number; -my $final_casing_test_number; my %setlocale_failed; # List of locales that setlocale() didn't work on -foreach $Locale (@Locale) { +foreach my $Locale (@Locale) { $locales_test_number = $first_locales_test_number - 1; - debug "# Locale = $Locale\n"; + debug "\n"; + debug "Locale = $Locale\n"; - unless (setlocale(LC_ALL, $Locale)) { + unless (setlocale(&POSIX::LC_ALL, $Locale)) { $setlocale_failed{$Locale} = $Locale; next; } - # We test UTF-8 locales only under ':not_characters'; otherwise they have - # documented deficiencies. Non- UTF-8 locales are tested only under plain - # 'use locale', as otherwise we would have to convert everything in them - # to Unicode. - my $is_utf8_locale = $Locale =~ /UTF-?8/i; + # We test UTF-8 locales only under ':not_characters'; It is easier to + # test them in other test files than here. Non- UTF-8 locales are tested + # only under plain 'use locale', as otherwise we would have to convert + # everything in them to Unicode. - my %UPPER = (); - my %lower = (); - my %BoThCaSe = (); + my %UPPER = (); # All alpha X for which uc(X) == X and lc(X) != X + my %lower = (); # All alpha X for which lc(X) == X and uc(X) != X + my %BoThCaSe = (); # All alpha X for which uc(X) == lc(X) == X + + my $is_utf8_locale = is_locale_utf8($Locale); + + debug "is utf8 locale? = $is_utf8_locale\n"; + + debug "radix = " . disp_str(localeconv()->{decimal_point}) . "\n"; if (! $is_utf8_locale) { use locale; - @Alnum_ = sort grep /\w/, map { chr } 0..255; - - debug "# w = ", join("",@Alnum_), "\n"; + @{$posixes{'word'}} = grep /\w/, map { chr } 0..255; + @{$posixes{'digit'}} = grep /\d/, map { chr } 0..255; + @{$posixes{'space'}} = grep /\s/, map { chr } 0..255; + @{$posixes{'alpha'}} = grep /[[:alpha:]]/, map {chr } 0..255; + @{$posixes{'alnum'}} = grep /[[:alnum:]]/, map {chr } 0..255; + @{$posixes{'ascii'}} = grep /[[:ascii:]]/, map {chr } 0..255; + @{$posixes{'blank'}} = grep /[[:blank:]]/, map {chr } 0..255; + @{$posixes{'cntrl'}} = grep /[[:cntrl:]]/, map {chr } 0..255; + @{$posixes{'graph'}} = grep /[[:graph:]]/, map {chr } 0..255; + @{$posixes{'lower'}} = grep /[[:lower:]]/, map {chr } 0..255; + @{$posixes{'print'}} = grep /[[:print:]]/, map {chr } 0..255; + @{$posixes{'punct'}} = grep /[[:punct:]]/, map {chr } 0..255; + @{$posixes{'upper'}} = grep /[[:upper:]]/, map {chr } 0..255; + @{$posixes{'xdigit'}} = grep /[[:xdigit:]]/, map {chr } 0..255; + @{$posixes{'cased'}} = grep /[[:upper:]]/i, map {chr } 0..255; # Sieve the uppercase and the lowercase. - for (@Alnum_) { + for (@{$posixes{'word'}}) { if (/[^\d_]/) { # skip digits and the _ if (uc($_) eq $_) { $UPPER{$_} = $_; @@ -740,9 +952,22 @@ foreach $Locale (@Locale) { } else { use locale ':not_characters'; - @Alnum_ = sort grep /\w/, map { chr } 0..255; - debug "# w = ", join("",@Alnum_), "\n"; - for (@Alnum_) { + @{$posixes{'word'}} = grep /\w/, map { chr } 0..255; + @{$posixes{'digit'}} = grep /\d/, map { chr } 0..255; + @{$posixes{'space'}} = grep /\s/, map { chr } 0..255; + @{$posixes{'alpha'}} = grep /[[:alpha:]]/, map {chr } 0..255; + @{$posixes{'alnum'}} = grep /[[:alnum:]]/, map {chr } 0..255; + @{$posixes{'ascii'}} = grep /[[:ascii:]]/, map {chr } 0..255; + @{$posixes{'blank'}} = grep /[[:blank:]]/, map {chr } 0..255; + @{$posixes{'cntrl'}} = grep /[[:cntrl:]]/, map {chr } 0..255; + @{$posixes{'graph'}} = grep /[[:graph:]]/, map {chr } 0..255; + @{$posixes{'lower'}} = grep /[[:lower:]]/, map {chr } 0..255; + @{$posixes{'print'}} = grep /[[:print:]]/, map {chr } 0..255; + @{$posixes{'punct'}} = grep /[[:punct:]]/, map {chr } 0..255; + @{$posixes{'upper'}} = grep /[[:upper:]]/, map {chr } 0..255; + @{$posixes{'xdigit'}} = grep /[[:xdigit:]]/, map {chr } 0..255; + @{$posixes{'cased'}} = grep /[[:upper:]]/i, map {chr } 0..255; + for (@{$posixes{'word'}}) { if (/[^\d_]/) { # skip digits and the _ if (uc($_) eq $_) { $UPPER{$_} = $_; @@ -753,7 +978,27 @@ foreach $Locale (@Locale) { } } } + + # Ordered, where possible, in groups of "this is a subset of the next + # one" + debug ":upper: = ", disp_chars(@{$posixes{'upper'}}), "\n"; + debug ":lower: = ", disp_chars(@{$posixes{'lower'}}), "\n"; + debug ":cased: = ", disp_chars(@{$posixes{'cased'}}), "\n"; + debug ":alpha: = ", disp_chars(@{$posixes{'alpha'}}), "\n"; + debug ":alnum: = ", disp_chars(@{$posixes{'alnum'}}), "\n"; + debug " w = ", disp_chars(@{$posixes{'word'}}), "\n"; + debug ":graph: = ", disp_chars(@{$posixes{'graph'}}), "\n"; + debug ":print: = ", disp_chars(@{$posixes{'print'}}), "\n"; + debug " d = ", disp_chars(@{$posixes{'digit'}}), "\n"; + debug ":xdigit: = ", disp_chars(@{$posixes{'xdigit'}}), "\n"; + debug ":blank: = ", disp_chars(@{$posixes{'blank'}}), "\n"; + debug " s = ", disp_chars(@{$posixes{'space'}}), "\n"; + debug ":punct: = ", disp_chars(@{$posixes{'punct'}}), "\n"; + debug ":cntrl: = ", disp_chars(@{$posixes{'cntrl'}}), "\n"; + debug ":ascii: = ", disp_chars(@{$posixes{'ascii'}}), "\n"; + foreach (keys %UPPER) { + $BoThCaSe{$_}++ if exists $lower{$_}; } foreach (keys %lower) { @@ -764,241 +1009,694 @@ foreach $Locale (@Locale) { delete $lower{$_}; } - debug "# UPPER = ", join("", sort keys %UPPER ), "\n"; - debug "# lower = ", join("", sort keys %lower ), "\n"; - debug "# BoThCaSe = ", join("", sort keys %BoThCaSe), "\n"; - - my @failures; - my @fold_failures; - foreach my $x (sort keys %UPPER) { - my $ok; - my $fold_ok; + my %Unassigned; + foreach my $ord ( 0 .. 255 ) { + $Unassigned{chr $ord} = 1; + } + foreach my $class (keys %posixes) { + foreach my $char (@{$posixes{$class}}) { + delete $Unassigned{$char}; + } + } + + debug "UPPER = ", disp_chars(sort { ord $a <=> ord $b } keys %UPPER), "\n"; + debug "lower = ", disp_chars(sort { ord $a <=> ord $b } keys %lower), "\n"; + debug "BoThCaSe = ", disp_chars(sort { ord $a <=> ord $b } keys %BoThCaSe), "\n"; + debug "Unassigned = ", disp_chars(sort { ord $a <=> ord $b } keys %Unassigned), "\n"; + + my @failures; + my @fold_failures; + foreach my $x (sort { ord $a <=> ord $b } keys %UPPER) { + my $ok; + my $fold_ok; + if ($is_utf8_locale) { + use locale ':not_characters'; + $ok = $x =~ /[[:upper:]]/; + $fold_ok = $x =~ /[[:lower:]]/i; + } + else { + use locale; + $ok = $x =~ /[[:upper:]]/; + $fold_ok = $x =~ /[[:lower:]]/i; + } + push @failures, $x unless $ok; + push @fold_failures, $x unless $fold_ok; + } + $locales_test_number++; + $first_casing_test_number = $locales_test_number; + $test_names{$locales_test_number} = 'Verify that /[[:upper:]]/ matches all alpha X for which uc(X) == X and lc(X) != X'; + report_multi_result($Locale, $locales_test_number, \@failures); + + $locales_test_number++; + + $test_names{$locales_test_number} = 'Verify that /[[:lower:]]/i matches all alpha X for which uc(X) == X and lc(X) != X'; + report_multi_result($Locale, $locales_test_number, \@fold_failures); + + undef @failures; + undef @fold_failures; + + foreach my $x (sort { ord $a <=> ord $b } keys %lower) { + my $ok; + my $fold_ok; + if ($is_utf8_locale) { + use locale ':not_characters'; + $ok = $x =~ /[[:lower:]]/; + $fold_ok = $x =~ /[[:upper:]]/i; + } + else { + use locale; + $ok = $x =~ /[[:lower:]]/; + $fold_ok = $x =~ /[[:upper:]]/i; + } + push @failures, $x unless $ok; + push @fold_failures, $x unless $fold_ok; + } + + $locales_test_number++; + $test_names{$locales_test_number} = 'Verify that /[[:lower:]]/ matches all alpha X for which lc(X) == X and uc(X) != X'; + report_multi_result($Locale, $locales_test_number, \@failures); + + $locales_test_number++; + $test_names{$locales_test_number} = 'Verify that /[[:upper:]]/i matches all alpha X for which lc(X) == X and uc(X) != X'; + report_multi_result($Locale, $locales_test_number, \@fold_failures); + + { # Find the alphabetic characters that are not considered alphabetics + # in the default (C) locale. + + no locale; + + @Added_alpha = (); + for (keys %UPPER, keys %lower, keys %BoThCaSe) { + push(@Added_alpha, $_) if (/\W/); + } + } + + @Added_alpha = sort { ord $a <=> ord $b } @Added_alpha; + + debug "Added_alpha = ", disp_chars(@Added_alpha), "\n"; + + # Cross-check the whole 8-bit character set. + + ++$locales_test_number; + my @f; + $test_names{$locales_test_number} = 'Verify that \w and [:word:] are identical'; + for (map { chr } 0..255) { + if ($is_utf8_locale) { + use locale ':not_characters'; + push @f, $_ unless /[[:word:]]/ == /\w/; + } + else { + push @f, $_ unless /[[:word:]]/ == /\w/; + } + } + report_multi_result($Locale, $locales_test_number, \@f); + + ++$locales_test_number; + undef @f; + $test_names{$locales_test_number} = 'Verify that \d and [:digit:] are identical'; + for (map { chr } 0..255) { + if ($is_utf8_locale) { + use locale ':not_characters'; + push @f, $_ unless /[[:digit:]]/ == /\d/; + } + else { + push @f, $_ unless /[[:digit:]]/ == /\d/; + } + } + report_multi_result($Locale, $locales_test_number, \@f); + + ++$locales_test_number; + undef @f; + $test_names{$locales_test_number} = 'Verify that \s and [:space:] are identical'; + for (map { chr } 0..255) { + if ($is_utf8_locale) { + use locale ':not_characters'; + push @f, $_ unless /[[:space:]]/ == /\s/; + } + else { + push @f, $_ unless /[[:space:]]/ == /\s/; + } + } + report_multi_result($Locale, $locales_test_number, \@f); + + ++$locales_test_number; + undef @f; + $test_names{$locales_test_number} = 'Verify that [:posix:] and [:^posix:] are mutually exclusive'; + for (map { chr } 0..255) { + if ($is_utf8_locale) { + use locale ':not_characters'; + push @f, $_ unless (/[[:alpha:]]/ xor /[[:^alpha:]]/) || + (/[[:alnum:]]/ xor /[[:^alnum:]]/) || + (/[[:ascii:]]/ xor /[[:^ascii:]]/) || + (/[[:blank:]]/ xor /[[:^blank:]]/) || + (/[[:cntrl:]]/ xor /[[:^cntrl:]]/) || + (/[[:digit:]]/ xor /[[:^digit:]]/) || + (/[[:graph:]]/ xor /[[:^graph:]]/) || + (/[[:lower:]]/ xor /[[:^lower:]]/) || + (/[[:print:]]/ xor /[[:^print:]]/) || + (/[[:space:]]/ xor /[[:^space:]]/) || + (/[[:upper:]]/ xor /[[:^upper:]]/) || + (/[[:word:]]/ xor /[[:^word:]]/) || + (/[[:xdigit:]]/ xor /[[:^xdigit:]]/) || + + # effectively is what [:cased:] would be if it existed. + (/[[:upper:]]/i xor /[[:^upper:]]/i); + } + else { + push @f, $_ unless (/[[:alpha:]]/ xor /[[:^alpha:]]/) || + (/[[:alnum:]]/ xor /[[:^alnum:]]/) || + (/[[:ascii:]]/ xor /[[:^ascii:]]/) || + (/[[:blank:]]/ xor /[[:^blank:]]/) || + (/[[:cntrl:]]/ xor /[[:^cntrl:]]/) || + (/[[:digit:]]/ xor /[[:^digit:]]/) || + (/[[:graph:]]/ xor /[[:^graph:]]/) || + (/[[:lower:]]/ xor /[[:^lower:]]/) || + (/[[:print:]]/ xor /[[:^print:]]/) || + (/[[:space:]]/ xor /[[:^space:]]/) || + (/[[:upper:]]/ xor /[[:^upper:]]/) || + (/[[:word:]]/ xor /[[:^word:]]/) || + (/[[:xdigit:]]/ xor /[[:^xdigit:]]/) || + (/[[:upper:]]/i xor /[[:^upper:]]/i); + } + } + report_multi_result($Locale, $locales_test_number, \@f); + + # The rules for the relationships are given in: + # http://www.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap07.html + + + ++$locales_test_number; + undef @f; + $test_names{$locales_test_number} = 'Verify that [:lower:] contains at least a-z'; + for ('a' .. 'z') { + if ($is_utf8_locale) { + use locale ':not_characters'; + push @f, $_ unless /[[:lower:]]/; + } + else { + push @f, $_ unless /[[:lower:]]/; + } + } + report_multi_result($Locale, $locales_test_number, \@f); + + ++$locales_test_number; + undef @f; + $test_names{$locales_test_number} = 'Verify that [:lower:] is a subset of [:alpha:]'; + for (map { chr } 0..255) { + if ($is_utf8_locale) { + use locale ':not_characters'; + push @f, $_ if /[[:lower:]]/ and ! /[[:alpha:]]/; + } + else { + push @f, $_ if /[[:lower:]]/ and ! /[[:alpha:]]/; + } + } + report_multi_result($Locale, $locales_test_number, \@f); + + ++$locales_test_number; + undef @f; + $test_names{$locales_test_number} = 'Verify that [:upper:] contains at least A-Z'; + for ('A' .. 'Z') { + if ($is_utf8_locale) { + use locale ':not_characters'; + push @f, $_ unless /[[:upper:]]/; + } + else { + push @f, $_ unless /[[:upper:]]/; + } + } + report_multi_result($Locale, $locales_test_number, \@f); + + ++$locales_test_number; + undef @f; + $test_names{$locales_test_number} = 'Verify that [:upper:] is a subset of [:alpha:]'; + for (map { chr } 0..255) { + if ($is_utf8_locale) { + use locale ':not_characters'; + push @f, $_ if /[[:upper:]]/ and ! /[[:alpha:]]/; + } + else { + push @f, $_ if /[[:upper:]]/ and ! /[[:alpha:]]/; + } + } + report_multi_result($Locale, $locales_test_number, \@f); + + ++$locales_test_number; + undef @f; + $test_names{$locales_test_number} = 'Verify that /[[:lower:]]/i is a subset of [:alpha:]'; + for (map { chr } 0..255) { + if ($is_utf8_locale) { + use locale ':not_characters'; + push @f, $_ if /[[:lower:]]/i and ! /[[:alpha:]]/; + } + else { + push @f, $_ if /[[:lower:]]/i and ! /[[:alpha:]]/; + } + } + report_multi_result($Locale, $locales_test_number, \@f); + + ++$locales_test_number; + undef @f; + $test_names{$locales_test_number} = 'Verify that [:alpha:] is a subset of [:alnum:]'; + for (map { chr } 0..255) { + if ($is_utf8_locale) { + use locale ':not_characters'; + push @f, $_ if /[[:alpha:]]/ and ! /[[:alnum:]]/; + } + else { + push @f, $_ if /[[:alpha:]]/ and ! /[[:alnum:]]/; + } + } + report_multi_result($Locale, $locales_test_number, \@f); + + ++$locales_test_number; + undef @f; + $test_names{$locales_test_number} = 'Verify that [:digit:] contains at least 0-9'; + for ('0' .. '9') { + if ($is_utf8_locale) { + use locale ':not_characters'; + push @f, $_ unless /[[:digit:]]/; + } + else { + push @f, $_ unless /[[:digit:]]/; + } + } + report_multi_result($Locale, $locales_test_number, \@f); + + ++$locales_test_number; + undef @f; + $test_names{$locales_test_number} = 'Verify that [:digit:] is a subset of [:alnum:]'; + for (map { chr } 0..255) { + if ($is_utf8_locale) { + use locale ':not_characters'; + push @f, $_ if /[[:digit:]]/ and ! /[[:alnum:]]/; + } + else { + push @f, $_ if /[[:digit:]]/ and ! /[[:alnum:]]/; + } + } + report_multi_result($Locale, $locales_test_number, \@f); + + ++$locales_test_number; + undef @f; + $test_names{$locales_test_number} = 'Verify that [:digit:] matches either 10 or 20 code points'; + report_result($Locale, $locales_test_number, @{$posixes{'digit'}} == 10 || @{$posixes{'digit'}} == 20); + + ++$locales_test_number; + undef @f; + $test_names{$locales_test_number} = 'Verify that if there is a second set of digits in [:digit:], they are consecutive'; + if (@{$posixes{'digit'}} == 20) { + my $previous_ord; + for (map { chr } 0..255) { + next unless /[[:digit:]]/; + next if /[0-9]/; + if (defined $previous_ord) { + if ($is_utf8_locale) { + use locale ':not_characters'; + push @f, $_ if ord $_ != $previous_ord + 1; + } + else { + push @f, $_ if ord $_ != $previous_ord + 1; + } + } + $previous_ord = ord $_; + } + } + report_multi_result($Locale, $locales_test_number, \@f); + + ++$locales_test_number; + undef @f; + my @xdigit_digits; # :digit: & :xdigit: + $test_names{$locales_test_number} = 'Verify that [:xdigit:] contains one or two blocks of 10 consecutive [:digit:] chars'; + for (map { chr } 0..255) { + if ($is_utf8_locale) { + use locale ':not_characters'; + # For utf8 locales, we actually use a stricter test: that :digit: + # is a subset of :xdigit:, as we know that only 0-9 should match + push @f, $_ if /[[:digit:]]/ and ! /[[:xdigit:]]/; + } + else { + push @xdigit_digits, $_ if /[[:digit:]]/ and /[[:xdigit:]]/; + } + } + if (! $is_utf8_locale) { + + # For non-utf8 locales, @xdigit_digits is a list of the characters + # that are both :xdigit: and :digit:. Because :digit: is stored in + # increasing code point order (unless the tests above failed), + # @xdigit_digits is as well. There should be exactly 10 or + # 20 of these. + if (@xdigit_digits != 10 && @xdigit_digits != 20) { + @f = @xdigit_digits; + } + else { + + # Look for contiguity in the series, adding any wrong ones to @f + my @temp = @xdigit_digits; + while (@temp > 1) { + push @f, $temp[1] if ($temp[0] != $temp[1] - 1) + + # Skip this test for the 0th character of + # the second block of 10, as it won't be + # contiguous with the previous block + && (! defined $xdigit_digits[10] + || $temp[1] != $xdigit_digits[10]); + shift @temp; + } + } + } + + report_multi_result($Locale, $locales_test_number, \@f); + + ++$locales_test_number; + undef @f; + $test_names{$locales_test_number} = 'Verify that [:xdigit:] contains at least A-F, a-f'; + for ('A' .. 'F', 'a' .. 'f') { + if ($is_utf8_locale) { + use locale ':not_characters'; + push @f, $_ unless /[[:xdigit:]]/; + } + else { + push @f, $_ unless /[[:xdigit:]]/; + } + } + report_multi_result($Locale, $locales_test_number, \@f); + + ++$locales_test_number; + undef @f; + $test_names{$locales_test_number} = 'Verify that any additional members of [:xdigit:], are in groups of 6 consecutive code points'; + my $previous_ord; + my $count = 0; + for my $chr (map { chr } 0..255) { + next unless $chr =~ /[[:xdigit:]]/; + if ($is_utf8_locale) { + next if $chr =~ /[[:digit:]]/; + } + else { + next if grep { $chr eq $_ } @xdigit_digits; + } + next if $chr =~ /[A-Fa-f]/; + if (defined $previous_ord) { + if ($is_utf8_locale) { + use locale ':not_characters'; + push @f, $chr if ord $chr != $previous_ord + 1; + } + else { + push @f, $chr if ord $chr != $previous_ord + 1; + } + } + $count++; + if ($count == 6) { + undef $previous_ord; + } + else { + $previous_ord = ord $chr; + } + } + report_multi_result($Locale, $locales_test_number, \@f); + + ++$locales_test_number; + undef @f; + $test_names{$locales_test_number} = 'Verify that [:xdigit:] is a subset of [:graph:]'; + for (map { chr } 0..255) { + if ($is_utf8_locale) { + use locale ':not_characters'; + push @f, $_ if /[[:xdigit:]]/ and ! /[[:graph:]]/; + } + else { + push @f, $_ if /[[:xdigit:]]/ and ! /[[:graph:]]/; + } + } + report_multi_result($Locale, $locales_test_number, \@f); + + # Note that xdigit doesn't have to be a subset of alnum + + ++$locales_test_number; + undef @f; + $test_names{$locales_test_number} = 'Verify that [:punct:] is a subset of [:graph:]'; + for (map { chr } 0..255) { + if ($is_utf8_locale) { + use locale ':not_characters'; + push @f, $_ if /[[:punct:]]/ and ! /[[:graph:]]/; + } + else { + push @f, $_ if /[[:punct:]]/ and ! /[[:graph:]]/; + } + } + report_multi_result($Locale, $locales_test_number, \@f); + + ++$locales_test_number; + undef @f; + $test_names{$locales_test_number} = 'Verify that the space character is not in [:graph:]'; + if ($is_utf8_locale) { + use locale ':not_characters'; + push @f, " " if " " =~ /[[:graph:]]/; + } + else { + push @f, " " if " " =~ /[[:graph:]]/; + } + report_multi_result($Locale, $locales_test_number, \@f); + + ++$locales_test_number; + undef @f; + $test_names{$locales_test_number} = 'Verify that [:space:] contains at least [\f\n\r\t\cK ]'; + for (' ', "\f", "\n", "\r", "\t", "\cK") { + if ($is_utf8_locale) { + use locale ':not_characters'; + push @f, $_ unless /[[:space:]]/; + } + else { + push @f, $_ unless /[[:space:]]/; + } + } + report_multi_result($Locale, $locales_test_number, \@f); + + ++$locales_test_number; + undef @f; + $test_names{$locales_test_number} = 'Verify that [:blank:] contains at least [\t ]'; + for (' ', "\t") { + if ($is_utf8_locale) { + use locale ':not_characters'; + push @f, $_ unless /[[:blank:]]/; + } + else { + push @f, $_ unless /[[:blank:]]/; + } + } + report_multi_result($Locale, $locales_test_number, \@f); + + ++$locales_test_number; + undef @f; + $test_names{$locales_test_number} = 'Verify that [:blank:] is a subset of [:space:]'; + for (map { chr } 0..255) { + if ($is_utf8_locale) { + use locale ':not_characters'; + push @f, $_ if /[[:blank:]]/ and ! /[[:space:]]/; + } + else { + push @f, $_ if /[[:blank:]]/ and ! /[[:space:]]/; + } + } + report_multi_result($Locale, $locales_test_number, \@f); + + ++$locales_test_number; + undef @f; + $test_names{$locales_test_number} = 'Verify that [:graph:] is a subset of [:print:]'; + for (map { chr } 0..255) { + if ($is_utf8_locale) { + use locale ':not_characters'; + push @f, $_ if /[[:graph:]]/ and ! /[[:print:]]/; + } + else { + push @f, $_ if /[[:graph:]]/ and ! /[[:print:]]/; + } + } + report_multi_result($Locale, $locales_test_number, \@f); + + ++$locales_test_number; + undef @f; + $test_names{$locales_test_number} = 'Verify that the space character is in [:print:]'; + if ($is_utf8_locale) { + use locale ':not_characters'; + push @f, " " if " " !~ /[[:print:]]/; + } + else { + push @f, " " if " " !~ /[[:print:]]/; + } + report_multi_result($Locale, $locales_test_number, \@f); + + ++$locales_test_number; + undef @f; + $test_names{$locales_test_number} = 'Verify that isn\'t both [:cntrl:] and [:print:]'; + for (map { chr } 0..255) { if ($is_utf8_locale) { use locale ':not_characters'; - $ok = $x =~ /[[:upper:]]/; - $fold_ok = $x =~ /[[:lower:]]/i; + push @f, $_ if (/[[:print:]]/ and /[[:cntrl:]]/); } else { - use locale; - $ok = $x =~ /[[:upper:]]/; - $fold_ok = $x =~ /[[:lower:]]/i; + push @f, $_ if (/[[:print:]]/ and /[[:cntrl:]]/); } - push @failures, $x unless $ok; - push @fold_failures, $x unless $fold_ok; } - my $message = ""; - $locales_test_number++; - $first_casing_test_number = $locales_test_number; - $test_names{$locales_test_number} = 'Verify that /[[:upper:]]/ matches sieved uppercase characters.'; - $message = 'Failed for ' . join ", ", @failures if @failures; - tryneoalpha($Locale, $locales_test_number, scalar @failures == 0, $message); - - $message = ""; - $locales_test_number++; - - $test_names{$locales_test_number} = 'Verify that /[[:lower:]]/i matches sieved uppercase characters.'; - $message = 'Failed for ' . join ", ", @fold_failures if @fold_failures; - tryneoalpha($Locale, $locales_test_number, scalar @fold_failures == 0, $message); + report_multi_result($Locale, $locales_test_number, \@f); - $message = ""; - undef @failures; - undef @fold_failures; - - foreach my $x (sort keys %lower) { - my $ok; - my $fold_ok; + ++$locales_test_number; + undef @f; + $test_names{$locales_test_number} = 'Verify that isn\'t both [:alpha:] and [:digit:]'; + for (map { chr } 0..255) { if ($is_utf8_locale) { use locale ':not_characters'; - $ok = $x =~ /[[:lower:]]/; - $fold_ok = $x =~ /[[:upper:]]/i; + push @f, $_ if /[[:alpha:]]/ and /[[:digit:]]/; } else { - use locale; - $ok = $x =~ /[[:lower:]]/; - $fold_ok = $x =~ /[[:upper:]]/i; + push @f, $_ if /[[:alpha:]]/ and /[[:digit:]]/; } - push @failures, $x unless $ok; - push @fold_failures, $x unless $fold_ok; } + report_multi_result($Locale, $locales_test_number, \@f); - $locales_test_number++; - $test_names{$locales_test_number} = 'Verify that /[[:lower:]]/ matches sieved lowercase characters.'; - $message = 'Failed for ' . join ", ", @failures if @failures; - tryneoalpha($Locale, $locales_test_number, scalar @failures == 0, $message); - $message = ""; - $locales_test_number++; - $final_casing_test_number = $locales_test_number; - $test_names{$locales_test_number} = 'Verify that /[[:upper:]]/i matches sieved lowercase characters.'; - $message = 'Failed for ' . join ", ", @fold_failures if @fold_failures; - tryneoalpha($Locale, $locales_test_number, scalar @fold_failures == 0, $message); - - { # Find the alphabetic characters that are not considered alphabetics - # in the default (C) locale. - - no locale; - - @Neoalpha = (); - for (keys %UPPER, keys %lower) { - push(@Neoalpha, $_) if (/\W/); - } + ++$locales_test_number; + undef @f; + $test_names{$locales_test_number} = 'Verify that isn\'t both [:alnum:] and [:punct:]'; + for (map { chr } 0..255) { + if ($is_utf8_locale) { + use locale ':not_characters'; + push @f, $_ if /[[:alnum:]]/ and /[[:punct:]]/; + } + else { + push @f, $_ if /[[:alnum:]]/ and /[[:punct:]]/; + } } + report_multi_result($Locale, $locales_test_number, \@f); - @Neoalpha = sort @Neoalpha; + ++$locales_test_number; + undef @f; + $test_names{$locales_test_number} = 'Verify that isn\'t both [:xdigit:] and [:punct:]'; + for (map { chr } 0..255) { + if ($is_utf8_locale) { + use locale ':not_characters'; + push @f, $_ if (/[[:punct:]]/ and /[[:xdigit:]]/); + } + else { + push @f, $_ if (/[[:punct:]]/ and /[[:xdigit:]]/); + } + } + report_multi_result($Locale, $locales_test_number, \@f); - debug "# Neoalpha = ", join("",@Neoalpha), "\n"; + ++$locales_test_number; + undef @f; + $test_names{$locales_test_number} = 'Verify that isn\'t both [:graph:] and [:space:]'; + for (map { chr } 0..255) { + if ($is_utf8_locale) { + use locale ':not_characters'; + push @f, $_ if (/[[:graph:]]/ and /[[:space:]]/); + } + else { + push @f, $_ if (/[[:graph:]]/ and /[[:space:]]/); + } + } + report_multi_result($Locale, $locales_test_number, \@f); - my $first_Neoalpha_test_number = $locales_test_number + 1; - my $final_Neoalpha_test_number = $first_Neoalpha_test_number + 3; - if (@Neoalpha == 0) { - # If we have no Neoalphas the remaining tests are no-ops. - debug "# no Neoalpha, skipping tests $first_Neoalpha_test_number..$final_Neoalpha_test_number for locale '$Locale'\n"; - foreach ($locales_test_number+1..$final_Neoalpha_test_number) { - push @{$Okay{$_}}, $Locale; - $locales_test_number++; - } - } else { + foreach ($first_casing_test_number..$locales_test_number) { + $problematical_tests{$_} = 1; + } - # Test \w. - my $word = join('', @Neoalpha); + # Test for read-only scalars' locale vs non-locale comparisons. - ++$locales_test_number; - $test_names{$locales_test_number} = 'Verify that alnums outside the C locale match \w'; + { + no locale; my $ok; + $a = "qwerty"; if ($is_utf8_locale) { use locale ':not_characters'; - $ok = $word =~ /^(\w+)$/; + $ok = ($a cmp "qwerty") == 0; } else { - # Already in 'use locale'; this tests that exiting scopes works - $ok = $word =~ /^(\w+)$/; + use locale; + $ok = ($a cmp "qwerty") == 0; } - tryneoalpha($Locale, $locales_test_number, $ok); + report_result($Locale, ++$locales_test_number, $ok); + $test_names{$locales_test_number} = 'Verify that cmp works with a read-only scalar; no- vs locale'; + } - # Cross-check the whole 8-bit character set. + { + my ($from, $to, $lesser, $greater, + @test, %test, $test, $yes, $no, $sign); ++$locales_test_number; - $test_names{$locales_test_number} = 'Verify that \w and \W are mutually exclusive, as are \d, \D; \s, \S'; - for (map { chr } 0..255) { - if ($is_utf8_locale) { - use locale ':not_characters'; - $ok = (/\w/ xor /\W/) || - (/\d/ xor /\D/) || - (/\s/ xor /\S/); - } - else { - $ok = (/\w/ xor /\W/) || - (/\d/ xor /\D/) || - (/\s/ xor /\S/); - } - tryneoalpha($Locale, $locales_test_number, $ok); - } - - # Test for read-only scalars' locale vs non-locale comparisons. - - { - no locale; - $a = "qwerty"; + $test_names{$locales_test_number} = 'Verify that "le", "ne", etc work'; + $not_necessarily_a_problem_test_number = $locales_test_number; + for (0..9) { + # Select a slice. + $from = int(($_*@{$posixes{'word'}})/10); + $to = $from + int(@{$posixes{'word'}}/10); + $to = $#{$posixes{'word'}} if ($to > $#{$posixes{'word'}}); + $lesser = join('', @{$posixes{'word'}}[$from..$to]); + # Select a slice one character on. + $from++; $to++; + $to = $#{$posixes{'word'}} if ($to > $#{$posixes{'word'}}); + $greater = join('', @{$posixes{'word'}}[$from..$to]); if ($is_utf8_locale) { use locale ':not_characters'; - $ok = ($a cmp "qwerty") == 0; + ($yes, $no, $sign) = ($lesser lt $greater + ? (" ", "not ", 1) + : ("not ", " ", -1)); } else { use locale; - $ok = ($a cmp "qwerty") == 0; + ($yes, $no, $sign) = ($lesser lt $greater + ? (" ", "not ", 1) + : ("not ", " ", -1)); } - tryneoalpha($Locale, ++$locales_test_number, $ok); - $test_names{$locales_test_number} = 'Verify that cmp works with a read-only scalar; no- vs locale'; - } - - { - my ($from, $to, $lesser, $greater, - @test, %test, $test, $yes, $no, $sign); - - ++$locales_test_number; - $test_names{$locales_test_number} = 'Verify that "le", "ne", etc work'; - $not_necessarily_a_problem_test_number = $locales_test_number; - for (0..9) { - # Select a slice. - $from = int(($_*@Alnum_)/10); - $to = $from + int(@Alnum_/10); - $to = $#Alnum_ if ($to > $#Alnum_); - $lesser = join('', @Alnum_[$from..$to]); - # Select a slice one character on. - $from++; $to++; - $to = $#Alnum_ if ($to > $#Alnum_); - $greater = join('', @Alnum_[$from..$to]); + # all these tests should FAIL (return 0). Exact lt or gt cannot + # be tested because in some locales, say, eacute and E may test + # equal. + @test = + ( + $no.' ($lesser le $greater)', # 1 + 'not ($lesser ne $greater)', # 2 + ' ($lesser eq $greater)', # 3 + $yes.' ($lesser ge $greater)', # 4 + $yes.' ($lesser ge $greater)', # 5 + $yes.' ($greater le $lesser )', # 7 + 'not ($greater ne $lesser )', # 8 + ' ($greater eq $lesser )', # 9 + $no.' ($greater ge $lesser )', # 10 + 'not (($lesser cmp $greater) == -($sign))' # 11 + ); + @test{@test} = 0 x @test; + $test = 0; + for my $ti (@test) { if ($is_utf8_locale) { use locale ':not_characters'; - ($yes, $no, $sign) = ($lesser lt $greater - ? (" ", "not ", 1) - : ("not ", " ", -1)); + $test{$ti} = eval $ti; } else { - use locale; - ($yes, $no, $sign) = ($lesser lt $greater - ? (" ", "not ", 1) - : ("not ", " ", -1)); + # Already in 'use locale'; + $test{$ti} = eval $ti; } - # all these tests should FAIL (return 0). - # Exact lt or gt cannot be tested because - # in some locales, say, eacute and E may test equal. - @test = - ( - $no.' ($lesser le $greater)', # 1 - 'not ($lesser ne $greater)', # 2 - ' ($lesser eq $greater)', # 3 - $yes.' ($lesser ge $greater)', # 4 - $yes.' ($lesser ge $greater)', # 5 - $yes.' ($greater le $lesser )', # 7 - 'not ($greater ne $lesser )', # 8 - ' ($greater eq $lesser )', # 9 - $no.' ($greater ge $lesser )', # 10 - 'not (($lesser cmp $greater) == -($sign))' # 11 - ); - @test{@test} = 0 x @test; - $test = 0; - for my $ti (@test) { - if ($is_utf8_locale) { - use locale ':not_characters'; - $test{$ti} = eval $ti; - } - else { - # Already in 'use locale'; - $test{$ti} = eval $ti; + $test ||= $test{$ti} + } + report_result($Locale, $locales_test_number, $test == 0); + if ($test) { + debug "lesser = '$lesser'\n"; + debug "greater = '$greater'\n"; + debug "lesser cmp greater = ", + $lesser cmp $greater, "\n"; + debug "greater cmp lesser = ", + $greater cmp $lesser, "\n"; + debug "(greater) from = $from, to = $to\n"; + for my $ti (@test) { + debugf("# %-40s %-4s", $ti, + $test{$ti} ? 'FAIL' : 'ok'); + if ($ti =~ /\(\.*(\$.+ +cmp +\$[^\)]+)\.*\)/) { + debugf("(%s == %4d)", $1, eval $1); } - $test ||= $test{$ti} - } - tryneoalpha($Locale, $locales_test_number, $test == 0); - if ($test) { - debug "# lesser = '$lesser'\n"; - debug "# greater = '$greater'\n"; - debug "# lesser cmp greater = ", - $lesser cmp $greater, "\n"; - debug "# greater cmp lesser = ", - $greater cmp $lesser, "\n"; - debug "# (greater) from = $from, to = $to\n"; - for my $ti (@test) { - debugf("# %-40s %-4s", $ti, - $test{$ti} ? 'FAIL' : 'ok'); - if ($ti =~ /\(\.*(\$.+ +cmp +\$[^\)]+)\.*\)/) { - debugf("(%s == %4d)", $1, eval $1); - } - debug "\n#"; - } - - last; - } - } - } - } + debugf("\n#"); + } - if ($locales_test_number != $final_Neoalpha_test_number) { - die("The delta for \$final_Neoalpha needs to be updated from " - . ($final_Neoalpha_test_number - $first_Neoalpha_test_number) - . " to " - . ($locales_test_number - $first_Neoalpha_test_number) - ); + last; + } + } } my $ok1; @@ -1015,14 +1713,23 @@ foreach $Locale (@Locale) { my $ok12; my $ok13; my $ok14; + my $ok14_5; my $ok15; my $ok16; + my $ok17; + my $ok18; + my $ok19; + my $ok20; + my $ok21; my $c; my $d; my $e; my $f; my $g; + my $h; + my $i; + my $j; if (! $is_utf8_locale) { use locale; @@ -1066,14 +1773,22 @@ foreach $Locale (@Locale) { $f = "1.23"; $g = 2.34; + $h = 1.5; + $i = 1.25; + $j = "$h:$i"; $ok9 = $f == 1.23; $ok10 = $f == $x; $ok11 = $f == $c; $ok12 = abs(($f + $g) - 3.57) < 0.01; $ok13 = $w == 0; - $ok14 = $ok15 = $ok16 = 1; # Skip for non-utf8 locales + $ok14 = $ok14_5 = $ok15 = $ok16 = 1; # Skip for non-utf8 locales + } + { + no locale; + $ok17 = "1.5:1.25" eq sprintf("%g:%g", $h, $i); } + $ok18 = $j eq sprintf("%g:%g", $h, $i); } else { use locale ':not_characters'; @@ -1110,6 +1825,9 @@ foreach $Locale (@Locale) { $f = "1.23"; $g = 2.34; + $h = 1.5; + $i = 1.25; + $j = "$h:$i"; $ok9 = $f == 1.23; $ok10 = $f == $x; @@ -1119,21 +1837,20 @@ foreach $Locale (@Locale) { # Look for non-ASCII error messages, and verify that the first # such is in UTF-8 (the others almost certainly will be like the - # first). + # first). This is only done if the current locale has LC_MESSAGES $ok14 = 1; - foreach my $err (keys %!) { - use Errno; - $! = eval "&Errno::$err"; # Convert to strerror() output - my $strerror = "$!"; - if ("$strerror" =~ /\P{ASCII}/) { - my $utf8_strerror = $strerror; - utf8::upgrade($utf8_strerror); - - # If $! was already in UTF-8, the upgrade was a no-op; - # otherwise they will be different byte strings. - use bytes; - $ok14 = $utf8_strerror eq $strerror; - last; + $ok14_5 = 1; + if (setlocale(&POSIX::LC_MESSAGES, $Locale)) { + foreach my $err (keys %!) { + use Errno; + $! = eval "&Errno::$err"; # Convert to strerror() output + my $strerror = "$!"; + if ("$strerror" =~ /\P{ASCII}/) { + $ok14 = utf8::is_utf8($strerror); + no locale; + $ok14_5 = "$!" !~ /\P{ASCII}/; + last; + } } } @@ -1142,78 +1859,135 @@ foreach $Locale (@Locale) { # stringification. my $string_g = "$g"; + my $sprintf_g = sprintf("%g", $g); - my $utf8_string_g = "$g"; - utf8::upgrade($utf8_string_g); + $ok15 = $string_g =~ / ^ \p{ASCII}+ $ /x || utf8::is_utf8($string_g); + $ok16 = $sprintf_g eq $string_g; + } + { + no locale; + $ok17 = "1.5:1.25" eq sprintf("%g:%g", $h, $i); + } + $ok18 = $j eq sprintf("%g:%g", $h, $i); + } + + $ok19 = $ok20 = 1; + if (setlocale(&POSIX::LC_TIME, $Locale)) { # These tests aren't affected by + # :not_characters + my @times = CORE::localtime(); + + use locale; + $ok19 = POSIX::strftime("%p", @times) ne "%p"; # [perl #119425] + my $date = POSIX::strftime("'%A' '%B' '%Z' '%p'", @times); + debug("'Day' 'Month' 'TZ' 'am/pm' = ", disp_str($date)); + + # If there is any non-ascii, it better be UTF-8 in a UTF-8 locale, and + # not UTF-8 if the locale isn't UTF-8. + $ok20 = $date =~ / ^ \p{ASCII}+ $ /x + || $is_utf8_locale == utf8::is_utf8($date); + } - my $utf8_sprintf_g = sprintf("%g", $g); - utf8::upgrade($utf8_sprintf_g); - use bytes; - $ok15 = $utf8_string_g eq $string_g; - $ok16 = $utf8_sprintf_g eq $string_g; + $ok21 = 1; + foreach my $err (keys %!) { + no locale; + use Errno; + $! = eval "&Errno::$err"; # Convert to strerror() output + my $strerror = "$!"; + if ("$strerror" =~ /\P{ASCII}/) { + $ok21 = 0; + last; } } - tryneoalpha($Locale, ++$locales_test_number, $ok1); + report_result($Locale, ++$locales_test_number, $ok1); $test_names{$locales_test_number} = 'Verify that an intervening printf doesn\'t change assignment results'; my $first_a_test = $locales_test_number; - debug "# $first_a_test..$locales_test_number: \$a = $a, \$b = $b, Locale = $Locale\n"; + debug "$first_a_test..$locales_test_number: \$a = $a, \$b = $b, Locale = $Locale\n"; - tryneoalpha($Locale, ++$locales_test_number, $ok2); + report_result($Locale, ++$locales_test_number, $ok2); $test_names{$locales_test_number} = 'Verify that an intervening sprintf doesn\'t change assignment results'; my $first_c_test = $locales_test_number; - tryneoalpha($Locale, ++$locales_test_number, $ok3); + report_result($Locale, ++$locales_test_number, $ok3); $test_names{$locales_test_number} = 'Verify that a different locale radix works when doing "==" with a constant'; + $problematical_tests{$locales_test_number} = 1; - tryneoalpha($Locale, ++$locales_test_number, $ok4); + report_result($Locale, ++$locales_test_number, $ok4); $test_names{$locales_test_number} = 'Verify that a different locale radix works when doing "==" with a scalar'; + $problematical_tests{$locales_test_number} = 1; - tryneoalpha($Locale, ++$locales_test_number, $ok5); + report_result($Locale, ++$locales_test_number, $ok5); $test_names{$locales_test_number} = 'Verify that a different locale radix works when doing "==" with a scalar and an intervening sprintf'; + $problematical_tests{$locales_test_number} = 1; - debug "# $first_c_test..$locales_test_number: \$c = $c, \$d = $d, Locale = $Locale\n"; + debug "$first_c_test..$locales_test_number: \$c = $c, \$d = $d, Locale = $Locale\n"; - tryneoalpha($Locale, ++$locales_test_number, $ok6); + report_result($Locale, ++$locales_test_number, $ok6); $test_names{$locales_test_number} = 'Verify that can assign stringified under inner no-locale block'; my $first_e_test = $locales_test_number; - tryneoalpha($Locale, ++$locales_test_number, $ok7); + report_result($Locale, ++$locales_test_number, $ok7); $test_names{$locales_test_number} = 'Verify that "==" with a scalar still works in inner no locale'; - tryneoalpha($Locale, ++$locales_test_number, $ok8); + report_result($Locale, ++$locales_test_number, $ok8); $test_names{$locales_test_number} = 'Verify that "==" with a scalar and an intervening sprintf still works in inner no locale'; + $problematical_tests{$locales_test_number} = 1; - debug "# $first_e_test..$locales_test_number: \$e = $e, no locale\n"; + debug "$first_e_test..$locales_test_number: \$e = $e, no locale\n"; - tryneoalpha($Locale, ++$locales_test_number, $ok9); + report_result($Locale, ++$locales_test_number, $ok9); $test_names{$locales_test_number} = 'Verify that after a no-locale block, a different locale radix still works when doing "==" with a constant'; + $problematical_tests{$locales_test_number} = 1; my $first_f_test = $locales_test_number; - tryneoalpha($Locale, ++$locales_test_number, $ok10); + report_result($Locale, ++$locales_test_number, $ok10); $test_names{$locales_test_number} = 'Verify that after a no-locale block, a different locale radix still works when doing "==" with a scalar'; + $problematical_tests{$locales_test_number} = 1; - tryneoalpha($Locale, ++$locales_test_number, $ok11); + report_result($Locale, ++$locales_test_number, $ok11); $test_names{$locales_test_number} = 'Verify that after a no-locale block, a different locale radix still works when doing "==" with a scalar and an intervening sprintf'; + $problematical_tests{$locales_test_number} = 1; - tryneoalpha($Locale, ++$locales_test_number, $ok12); + report_result($Locale, ++$locales_test_number, $ok12); $test_names{$locales_test_number} = 'Verify that after a no-locale block, a different locale radix can participate in an addition and function call as numeric'; + $problematical_tests{$locales_test_number} = 1; - tryneoalpha($Locale, ++$locales_test_number, $ok13); + report_result($Locale, ++$locales_test_number, $ok13); $test_names{$locales_test_number} = 'Verify that don\'t get warning under "==" even if radix is not a dot'; + $problematical_tests{$locales_test_number} = 1; - tryneoalpha($Locale, ++$locales_test_number, $ok14); + report_result($Locale, ++$locales_test_number, $ok14); $test_names{$locales_test_number} = 'Verify that non-ASCII UTF-8 error messages are in UTF-8'; - tryneoalpha($Locale, ++$locales_test_number, $ok15); + report_result($Locale, ++$locales_test_number, $ok14_5); + $test_names{$locales_test_number} = '... and are ASCII outside "use locale"'; + + report_result($Locale, ++$locales_test_number, $ok15); $test_names{$locales_test_number} = 'Verify that a number with a UTF-8 radix has a UTF-8 stringification'; - tryneoalpha($Locale, ++$locales_test_number, $ok16); + report_result($Locale, ++$locales_test_number, $ok16); $test_names{$locales_test_number} = 'Verify that a sprintf of a number with a UTF-8 radix yields UTF-8'; - debug "# $first_f_test..$locales_test_number: \$f = $f, \$g = $g, back to locale = $Locale\n"; + report_result($Locale, ++$locales_test_number, $ok17); + $test_names{$locales_test_number} = 'Verify that a sprintf of a number outside locale scope uses a dot radix'; + + report_result($Locale, ++$locales_test_number, $ok18); + $test_names{$locales_test_number} = 'Verify that a sprintf of a number back within locale scope uses locale radix'; + + report_result($Locale, ++$locales_test_number, $ok19); + $test_names{$locales_test_number} = 'Verify that strftime doesn\'t return "%p" in locales where %p is empty'; + + report_result($Locale, ++$locales_test_number, $ok20); + $test_names{$locales_test_number} = 'Verify that strftime returns date with UTF-8 flag appropriately set'; + $problematical_tests{$locales_test_number} = 1; # This is broken in + # OS X 10.9.3 + + report_result($Locale, ++$locales_test_number, $ok21); + $test_names{$locales_test_number} = '"$!" is ASCII only outside of locale scope'; + + debug "$first_f_test..$locales_test_number: \$f = $f, \$g = $g, back to locale = $Locale\n"; # Does taking lc separately differ from taking # the lc "in-line"? (This was the bug 19990704.002, change #3568.) @@ -1235,7 +2009,7 @@ foreach $Locale (@Locale) { my $y = "aa"; my $z = "AB"; - tryneoalpha($Locale, ++$locales_test_number, + report_result($Locale, ++$locales_test_number, lcA($x, $y) == 1 && lcB($x, $y) == 1 || lcA($x, $z) == 0 && lcB($x, $z) == 0); } @@ -1256,7 +2030,7 @@ foreach $Locale (@Locale) { my $y = "aa"; my $z = "AB"; - tryneoalpha($Locale, ++$locales_test_number, + report_result($Locale, ++$locales_test_number, lcC($x, $y) == 1 && lcD($x, $y) == 1 || lcC($x, $z) == 0 && lcD($x, $z) == 0); } @@ -1273,13 +2047,19 @@ foreach $Locale (@Locale) { my @f = (); ++$locales_test_number; $test_names{$locales_test_number} = 'Verify case insensitive matching works'; - foreach my $x (sort keys %UPPER) { + foreach my $x (sort { ord $a <=> ord $b } keys %UPPER) { if (! $is_utf8_locale) { my $y = lc $x; next unless uc $y eq $x; - print "# UPPER $x lc $y ", - $x =~ /$y/i ? 1 : 0, " ", - $y =~ /$x/i ? 1 : 0, "\n" if 0; + debug_more( "UPPER=", disp_chars(($x)), + "; lc=", disp_chars(($y)), "; ", + "; fc=", disp_chars((fc $x)), "; ", + disp_chars(($x)), "=~/", disp_chars(($y)), "/i=", + $x =~ /$y/i ? 1 : 0, + "; ", + disp_chars(($y)), "=~/", disp_chars(($x)), "/i=", + $y =~ /$x/i ? 1 : 0, + "\n"); # # If $x and $y contain regular expression characters # AND THEY lowercase (/i) to regular expression characters, @@ -1308,9 +2088,7 @@ foreach $Locale (@Locale) { print "# Regex characters in '$x' or '$y', skipping test $locales_test_number for locale '$Locale'\n"; next; } - # With utf8 both will fail since the locale concept - # of upper/lower does not work well in Unicode. - push @f, $x unless $x =~ /$y/i == $y =~ /$x/i; + push @f, $x unless $x =~ /$y/i && $y =~ /$x/i; # fc is not a locale concept, so Perl uses lc for it. push @f, $x unless lc $x eq fc $x; @@ -1319,12 +2097,16 @@ foreach $Locale (@Locale) { use locale ':not_characters'; my $y = lc $x; next unless uc $y eq $x; - print "# UPPER $x lc $y ", - $x =~ /$y/i ? 1 : 0, " ", - $y =~ /$x/i ? 1 : 0, "\n" if 0; + debug_more( "UPPER=", disp_chars(($x)), + "; lc=", disp_chars(($y)), "; ", + "; fc=", disp_chars((fc $x)), "; ", + disp_chars(($x)), "=~/", disp_chars(($y)), "/i=", + $x =~ /$y/i ? 1 : 0, + "; ", + disp_chars(($y)), "=~/", disp_chars(($x)), "/i=", + $y =~ /$x/i ? 1 : 0, + "\n"); - # Here, we can fully test things, unlike plain 'use locale', - # because this form does work well with Unicode push @f, $x unless $x =~ /$y/i && $y =~ /$x/i; # The places where Unicode's lc is different from fc are @@ -1333,20 +2115,24 @@ foreach $Locale (@Locale) { } } - foreach my $x (sort keys %lower) { + foreach my $x (sort { ord $a <=> ord $b } keys %lower) { if (! $is_utf8_locale) { my $y = uc $x; next unless lc $y eq $x; - print "# lower $x uc $y ", - $x =~ /$y/i ? 1 : 0, " ", - $y =~ /$x/i ? 1 : 0, "\n" if 0; + debug_more( "lower=", disp_chars(($x)), + "; uc=", disp_chars(($y)), "; ", + "; fc=", disp_chars((fc $x)), "; ", + disp_chars(($x)), "=~/", disp_chars(($y)), "/i=", + $x =~ /$y/i ? 1 : 0, + "; ", + disp_chars(($y)), "=~/", disp_chars(($x)), "/i=", + $y =~ /$x/i ? 1 : 0, + "\n"); if ($x =~ $re || $y =~ $re) { # See above. print "# Regex characters in '$x' or '$y', skipping test $locales_test_number for locale '$Locale'\n"; next; } - # With utf8 both will fail since the locale concept - # of upper/lower does not work well in Unicode. - push @f, $x unless $x =~ /$y/i == $y =~ /$x/i; + push @f, $x unless $x =~ /$y/i && $y =~ /$x/i; push @f, $x unless lc $x eq fc $x; } @@ -1354,18 +2140,22 @@ foreach $Locale (@Locale) { use locale ':not_characters'; my $y = uc $x; next unless lc $y eq $x; - print "# lower $x uc $y ", - $x =~ /$y/i ? 1 : 0, " ", - $y =~ /$x/i ? 1 : 0, "\n" if 0; + debug_more( "lower=", disp_chars(($x)), + "; uc=", disp_chars(($y)), "; ", + "; fc=", disp_chars((fc $x)), "; ", + disp_chars(($x)), "=~/", disp_chars(($y)), "/i=", + $x =~ /$y/i ? 1 : 0, + "; ", + disp_chars(($y)), "=~/", disp_chars(($x)), "/i=", + $y =~ /$x/i ? 1 : 0, + "\n"); push @f, $x unless $x =~ /$y/i && $y =~ /$x/i; push @f, $x unless lc $x eq fc $x; } } - tryneoalpha($Locale, $locales_test_number, @f == 0); - if (@f) { - print "# failed $locales_test_number locale '$Locale' characters @f\n" - } + report_multi_result($Locale, $locales_test_number, \@f); + $problematical_tests{$locales_test_number} = 1; } # [perl #109318] @@ -1373,6 +2163,7 @@ foreach $Locale (@Locale) { my @f = (); ++$locales_test_number; $test_names{$locales_test_number} = 'Verify atof with locale radix and negative exponent'; + $problematical_tests{$locales_test_number} = 1; my $radix = POSIX::localeconv()->{decimal_point}; my @nums = ( @@ -1395,7 +2186,7 @@ foreach $Locale (@Locale) { } } - tryneoalpha($Locale, $locales_test_number, @f == 0); + report_result($Locale, $locales_test_number, @f == 0); if (@f) { print "# failed $locales_test_number locale '$Locale' numbers @f\n" } @@ -1406,125 +2197,82 @@ my $final_locales_test_number = $locales_test_number; # Recount the errors. -foreach ($first_locales_test_number..$final_locales_test_number) { +foreach $test_num ($first_locales_test_number..$final_locales_test_number) { if (%setlocale_failed) { print "not "; } - elsif ($Problem{$_} || !defined $Okay{$_} || !@{$Okay{$_}}) { + elsif ($Problem{$test_num} || !defined $Okay{$test_num} || !@{$Okay{$test_num}}) { if (defined $not_necessarily_a_problem_test_number - && $_ == $not_necessarily_a_problem_test_number) + && $test_num == $not_necessarily_a_problem_test_number) { print "# The failure of test $not_necessarily_a_problem_test_number is not necessarily fatal.\n"; print "# It usually indicates a problem in the environment,\n"; print "# not in Perl itself.\n"; } - if ($Okay{$_} && ($_ >= $first_casing_test_number - && $_ <= $final_casing_test_number)) - { - my $percent_fail = int(.5 + (100 * scalar(keys $Problem{$_}) - / scalar(@{$Okay{$_}}))); - if ($percent_fail < $acceptable_fold_failure_percentage) { - $test_names{$_} .= 'TODO'; - print "# ", 100 - $percent_fail, "% of locales pass the following test, so it is likely that the failures\n"; - print "# are errors in the locale definitions. The test is marked TODO, as the\n"; - print "# problem is not likely to be Perl's\n"; + if ($Okay{$test_num} && grep { $_ == $test_num } keys %problematical_tests) { + no warnings 'experimental::autoderef'; + # Round to nearest .1% + my $percent_fail = (int(.5 + (1000 * scalar(keys $Problem{$test_num}) + / scalar(@Locale)))) + / 10; + if ($percent_fail < $acceptable_failure_percentage) { + if (! $debug) { + $test_names{$test_num} .= 'TODO'; + print "# ", 100 - $percent_fail, "% of locales pass the following test, so it is likely that the failures\n"; + print "# are errors in the locale definitions. The test is marked TODO, as the\n"; + print "# problem is not likely to be Perl's\n"; + } + } + if ($debug) { + print "# $percent_fail% of locales (", + scalar(keys $Problem{$test_num}), + " of ", + scalar(@Locale), + ") fail the above test (TODO cut-off is ", + $acceptable_failure_percentage, + "%)\n"; } } + print "#\n"; + if ($debug) { + print "# The code points that had this failure are given above. Look for lines\n"; + print "# that match 'failed $test_num'\n"; + } + else { + print "# For more details, rerun, with environment variable PERL_DEBUG_FULL_TEST=1.\n"; + print "# Then look at that output for lines that match 'failed $test_num'\n"; + } print "not "; } - print "ok $_"; - if (defined $test_names{$_}) { + print "ok $test_num"; + if (defined $test_names{$test_num}) { # If TODO is in the test name, make it thus - my $todo = $test_names{$_} =~ s/TODO\s*//; - print " $test_names{$_}"; + my $todo = $test_names{$test_num} =~ s/TODO\s*//; + print " $test_names{$test_num}"; print " # TODO" if $todo; } print "\n"; } -# Give final advice. - -my $didwarn = 0; - -foreach ($first_locales_test_number..$final_locales_test_number) { - if ($Problem{$_}) { - my @f = sort keys %{ $Problem{$_} }; - my $f = join(" ", @f); - $f =~ s/(.{50,60}) /$1\n#\t/g; - print - "#\n", - "# The locale ", (@f == 1 ? "definition" : "definitions"), "\n#\n", - "#\t", $f, "\n#\n", - "# on your system may have errors because the locale test $_\n", - "# failed in ", (@f == 1 ? "that locale" : "those locales"), - ".\n"; - print < 255 # that we use - # We test an ASCII character, which should change case and be tainted; + # We test an ASCII character, which should change case; # a Latin1 character, which shouldn't change case under this C locale, - # and is tainted. # an above-Latin1 character that when the case is changed would cross - # the 255/256 boundary, so doesn't change case and isn't tainted - # (the \x{149} is one of these, but changes into 2 characters, the + # the 255/256 boundary, so doesn't change case + # (the \x{149} is one of these, but changes into 2 characters, the # first one of which doesn't cross the boundary. # the final one in each list is an above-Latin1 character whose case - # does change, and shouldn't be tainted. The code below uses its - # position in its list as a marker to indicate that it, unlike the - # other code points above ASCII, has a successful case change + # does change. The code below uses its position in its list as a + # marker to indicate that it, unlike the other code points above + # ASCII, has a successful case change + # + # All casing operations under locale (but not :not_characters) should + # taint if ($function =~ /^u/) { @list = ("", "a", "\xe0", "\xff", "\x{fb00}", "\x{149}", "\x{101}"); $ascii_case_change_delta = -32; @@ -1622,9 +2372,9 @@ setlocale(LC_ALL, "C"); : "; not encoded in utf8)") . " should be \"$should_be\", got \"$changed\""); - # Tainting shouldn't happen for utf8 locales, empty - # strings, or those characters above 255. - (! $is_utf8_locale && length($char) > 0 && ord($char) < 256) + # Tainting shouldn't happen for use locale :not_character + # (a utf8 locale) + (! $is_utf8_locale) ? check_taint($changed) : check_taint_not($changed); @@ -1636,6 +2386,91 @@ setlocale(LC_ALL, "C"); } } +# Give final advice. + +my $didwarn = 0; + +foreach ($first_locales_test_number..$final_locales_test_number) { + if ($Problem{$_}) { + my @f = sort keys %{ $Problem{$_} }; + my $f = join(" ", @f); + $f =~ s/(.{50,60}) /$1\n#\t/g; + print + "#\n", + "# The locale ", (@f == 1 ? "definition" : "definitions"), "\n#\n", + "#\t", $f, "\n#\n", + "# on your system may have errors because the locale test $_\n", + "# \"$test_names{$_}\"\n", + "# failed in ", (@f == 1 ? "that locale" : "those locales"), + ".\n"; + print <