# http://markmail.org/message/5jwam4xsx4amsdnv. Also on AIX machines, many
# locales call a no-break space a graphic.
# (There aren't 1000 locales currently in existence, so 99.9 works)
-my $acceptable_fold_failure_percentage = ($^O =~ / ^ ( MSWin32 | AIX ) $ /ix)
- ? 99.9
- : 5;
+my $acceptable_failure_percentage = ($^O =~ / ^ ( MSWin32 | AIX ) $ /ix)
+ ? 99.9
+ : 5;
# The list of test numbers of the problematic tests.
my %problematical_tests;
sub check_taint ($;$) {
my $message_tail = $_[1] // "";
- $message_tail = ": $message_tail" if $message_tail;
+
+ # Extra blanks are so aligns with taint_not output
+ $message_tail = ": $message_tail" if $message_tail;
ok is_tainted($_[0]), "verify that is tainted$message_tail";
}
sub check_taint_not ($;$) {
my $message_tail = $_[1] // "";
- $message_tail = ": $message_tail" if $message_tail;
+ $message_tail = ": $message_tail" if $message_tail;
ok((not is_tainted($_[0])), "verify that isn't tainted$message_tail");
}
use locale; # engage locale and therefore locale taint.
-check_taint_not $a, "\t\$a";
-
-check_taint uc($a);
-check_taint "\U$a";
-check_taint ucfirst($a);
-check_taint "\u$a";
-check_taint lc($a);
-check_taint fc($a);
-check_taint "\L$a";
-check_taint "\F$a";
-check_taint lcfirst($a);
-check_taint "\l$a";
-
-check_taint_not sprintf('%e', 123.456);
-check_taint_not sprintf('%f', 123.456);
-check_taint_not sprintf('%g', 123.456);
-check_taint_not sprintf('%d', 123.456);
-check_taint_not sprintf('%x', 123.456);
+
+check_taint_not $a, '$a';
+
+check_taint uc($a), 'uc($a)';
+check_taint "\U$a", '"\U$a"';
+check_taint ucfirst($a), 'ucfirst($a)';
+check_taint "\u$a", '"\u$a"';
+check_taint lc($a), 'lc($a)';
+check_taint fc($a), 'fc($a)';
+check_taint "\L$a", '"\L$a"';
+check_taint "\F$a", '"\F$a"';
+check_taint lcfirst($a), 'lcfirst($a)';
+check_taint "\l$a", '"\l$a"';
+
+check_taint_not sprintf('%e', 123.456), "sprintf('%e', 123.456)";
+check_taint_not sprintf('%f', 123.456), "sprintf('%f', 123.456)";
+check_taint_not sprintf('%g', 123.456), "sprintf('%g', 123.456)";
+check_taint_not sprintf('%d', 123.456), "sprintf('%d', 123.456)";
+check_taint_not sprintf('%x', 123.456), "sprintf('%x', 123.456)";
$_ = $a; # untaint $_
$_ = uc($a); # taint $_
-check_taint $_, "\t\$_";
+check_taint $_, '$_ = uc($a)';
/(\w)/; # taint $&, $`, $', $+, $1.
-check_taint $&, "\t/(\\w)/ \$&";
+check_taint $&, "\$& from /(\\w)/";
check_taint $`, "\t\$`";
check_taint $', "\t\$'";
check_taint $+, "\t\$+";
check_taint_not $2, "\t\$2";
/(.)/; # untaint $&, $`, $', $+, $1.
-check_taint_not $&, "\t/(.)/ \$&";
+check_taint_not $&, "\$& from /(.)/";
check_taint_not $`, "\t\$`";
check_taint_not $', "\t\$'";
check_taint_not $+, "\t\$+";
check_taint_not $2, "\t\$2";
/(\W)/; # taint $&, $`, $', $+, $1.
-check_taint $&, "\t/(\\W)/ \$&";
+check_taint $&, "\$& from /(\\W)/";
check_taint $`, "\t\$`";
check_taint $', "\t\$'";
check_taint $+, "\t\$+";
check_taint_not $2, "\t\$2";
/(.)/; # untaint $&, $`, $', $+, $1.
-check_taint_not $&, "\t/(.)/ \$&";
+check_taint_not $&, "\$& from /(.)/";
check_taint_not $`, "\t\$`";
check_taint_not $', "\t\$'";
check_taint_not $+, "\t\$+";
check_taint_not $2, "\t\$2";
/(\s)/; # taint $&, $`, $', $+, $1.
-check_taint $&, "\t/(\\s)/ \$&";
+check_taint $&, "\$& from /(\\s)/";
check_taint $`, "\t\$`";
check_taint $', "\t\$'";
check_taint $+, "\t\$+";
check_taint_not $2, "\t\$2";
/(.)/; # untaint $&, $`, $', $+, $1.
-check_taint_not $&, "\t/(.)/ \$&";
+check_taint_not $&, "\$& from /(.)/";
/(\S)/; # taint $&, $`, $', $+, $1.
-check_taint $&, "\t/(\\S)/ \$&";
+check_taint $&, "\$& from /(\\S)/";
check_taint $`, "\t\$`";
check_taint $', "\t\$'";
check_taint $+, "\t\$+";
check_taint_not $2, "\t\$2";
/(.)/; # untaint $&, $`, $', $+, $1.
-check_taint_not $&, "\t/(.)/ \$&";
+check_taint_not $&, "\$& from /(.)/";
"a" =~ /(a)|(\w)/; # taint $&, $`, $', $+, $1.
-check_taint $&, "\t/(a)|(\\w)/ \$&";
+check_taint $&, "\$& from /(a)|(\\w)/";
check_taint $`, "\t\$`";
check_taint $', "\t\$'";
check_taint $+, "\t\$+";
check_taint $1, "\t\$1";
-ok($1 eq 'a', ("\t" x 4) . "\$1 is 'a'");
-ok(! defined $2, ("\t" x 4) . "\$2 is undefined");
+ok($1 eq 'a', ("\t" x 5) . "\$1 is 'a'");
+ok(! defined $2, ("\t" x 5) . "\$2 is undefined");
check_taint_not $2, "\t\$2";
check_taint_not $3, "\t\$3";
/(.)/; # untaint $&, $`, $', $+, $1.
-check_taint_not $&, "\t/(.)/ \$&";
+check_taint_not $&, "\$& from /(.)/";
"\N{CYRILLIC SMALL LETTER A}" =~ /(\N{CYRILLIC CAPITAL LETTER A})/i; # no tainting because no locale dependence
-check_taint_not $&, "\t/(\\N{CYRILLIC CAPITAL LETTER A})/i \$&";
+check_taint_not $&, "\$& from /(\\N{CYRILLIC CAPITAL LETTER A})/i";
check_taint_not $`, "\t\$`";
check_taint_not $', "\t\$'";
check_taint_not $+, "\t\$+";
check_taint_not $1, "\t\$1";
-ok($1 eq "\N{CYRILLIC SMALL LETTER A}", ("\t" x 4) . "\$1 is 'small cyrillic a'");
+ok($1 eq "\N{CYRILLIC SMALL LETTER A}", ("\t" x 4) . "\t\$1 is 'small cyrillic a'");
check_taint_not $2, "\t\$2";
/(.)/; # untaint $&, $`, $', $+, $1.
-check_taint_not $&, "\t/./ \$&";
+check_taint_not $&, "\$& from /./";
/(.)/; # untaint $&, $`, $', $+, $1.
-check_taint_not $&, "\t/(.)/ \$&";
+check_taint_not $&, "\$& from /(.)/";
"a:" =~ /(.)\b(.)/; # taint $&, $`, $', $+, $1.
-check_taint $&, "\t/(.)\\b(.)/ \$&";
+check_taint $&, "\$& from /(.)\\b(.)/";
check_taint $`, "\t\$`";
check_taint $', "\t\$'";
check_taint $+, "\t\$+";
check_taint_not $3, "\t\$3";
/(.)/; # untaint $&, $`, $', $+, $1.
-check_taint_not $&, "\t/./ \$&";
+check_taint_not $&, "\$& from /./";
"aa" =~ /(.)\B(.)/; # taint $&, $`, $', $+, $1.
-check_taint $&, "\t/(.)\\B(.)/ \$&";
+check_taint $&, "\$& from /(.)\\B(.)/";
check_taint $`, "\t\$`";
check_taint $', "\t\$'";
check_taint $+, "\t\$+";
check_taint_not $3, "\t\$3";
/(.)/; # untaint $&, $`, $', $+, $1.
-check_taint_not $&, "\t/./ \$&";
+check_taint_not $&, "\$& from /./";
"aaa" =~ /(.).(\1)/i; # notaint because not locale dependent
-check_taint_not $&, "\t/(.).(\\1)/ \$&";
+check_taint_not $&, "\$ & from /(.).(\\1)/";
check_taint_not $`, "\t\$`";
check_taint_not $', "\t\$'";
check_taint_not $+, "\t\$+";
check_taint_not $1, "\t\$1";
check_taint_not $2, "\t\$2";
-check_taint_not $3, "\t\$3";
+check_taint_not $3, "\t\$3";
/(.)/; # untaint $&, $`, $', $+, $1.
-check_taint_not $&, "\t/./ \$&";
+check_taint_not $&, "\$ & from /./";
$_ = $a; # untaint $_
-check_taint_not $_, "\t\$_";
+check_taint_not $_, 'untainting $_ works';
/(b)/; # this must not taint
-check_taint_not $&, "\t/(b)/ \$&";
+check_taint_not $&, "\$ & from /(b)/";
check_taint_not $`, "\t\$`";
check_taint_not $', "\t\$'";
check_taint_not $+, "\t\$+";
$_ = $a; # untaint $_
-check_taint_not $_, "\t\$_";
+check_taint_not $_, 'untainting $_ works';
$b = uc($a); # taint $b
s/(.+)/$b/; # this must taint only the $_
-check_taint $_, "\t\$_";
+check_taint $_, '$_ (wasn\'t tainted) from s/(.+)/$b/ where $b is tainted';
check_taint_not $&, "\t\$&";
check_taint_not $`, "\t\$`";
check_taint_not $', "\t\$'";
$_ = $a; # untaint $_
s/(.+)/b/; # this must not taint
-check_taint_not $_, "\t\$_";
+check_taint_not $_, '$_ (wasn\'t tainted) from s/(.+)/b/';
check_taint_not $&, "\t\$&";
check_taint_not $`, "\t\$`";
check_taint_not $', "\t\$'";
$b = $a; # untaint $b
($b = $a) =~ s/\w/$&/;
-check_taint $b, "\t\$b"; # $b should be tainted.
-check_taint_not $a, "\t\$a"; # $a should be not.
+check_taint $b, '$b from ($b = $a) =~ s/\w/$&/'; # $b should be tainted.
+check_taint_not $a, '$a from ($b = $a) =~ s/\w/$&/'; # $a should be not.
$_ = $a; # untaint $_
s/(\w)/\l$1/; # this must taint
-check_taint $_, "\t\$_";
+check_taint $_, '$_ (wasn\'t tainted) from s/(\w)/\l$1/,'; # this must taint
check_taint $&, "\t\$&";
check_taint $`, "\t\$`";
check_taint $', "\t\$'";
$_ = $a; # untaint $_
s/(\w)/\L$1/; # this must taint
-check_taint $_, "\t\$_";
+check_taint $_, '$_ (wasn\'t tainted) from s/(\w)/\L$1/,';
check_taint $&, "\t\$&";
check_taint $`, "\t\$`";
check_taint $', "\t\$'";
$_ = $a; # untaint $_
s/(\w)/\u$1/; # this must taint
-check_taint $_, "\t\$_";
+check_taint $_, '$_ (wasn\'t tainted) from s/(\w)/\u$1/';
check_taint $&, "\t\$&";
check_taint $`, "\t\$`";
check_taint $', "\t\$'";
$_ = $a; # untaint $_
s/(\w)/\U$1/; # this must taint
-check_taint $_, "\t\$_";
+check_taint $_, '$_ (wasn\'t tainted) from s/(\w)/\U$1/';
check_taint $&, "\t\$&";
check_taint $`, "\t\$`";
check_taint $', "\t\$'";
# After all this tainting $a should be cool.
-check_taint_not $a, "\t\$a";
+check_taint_not $a, '$a still not tainted';
"a" =~ /([a-z])/;
check_taint_not $1, '"a" =~ /([a-z])/';
++$locales_test_number;
undef @f;
- $test_names{$locales_test_number} = 'Verify that [:digit:] is a subset of [:xdigit:]';
+ my @xdigit_digits; # :digit: & :xdigit:
+ $test_names{$locales_test_number} = 'Verify that [:xdigit:] contains one or two blocks of 10 consecutive [:digit:] chars';
for (map { chr } 0..255) {
if ($is_utf8_locale) {
use locale ':not_characters';
- push @f, $_ if /[[:digit:]]/ and ! /[[:xdigit:]]/;
+ # For utf8 locales, we actually use a stricter test: that :digit:
+ # is a subset of :xdigit:, as we know that only 0-9 should match
+ push @f, $_ if /[[:digit:]]/ and ! /[[:xdigit:]]/;
+ }
+ else {
+ push @xdigit_digits, $_ if /[[:digit:]]/ and /[[:xdigit:]]/;
+ }
+ }
+ if (! $is_utf8_locale) {
+
+ # For non-utf8 locales, @xdigit_digits is a list of the characters
+ # that are both :xdigit: and :digit:. Because :digit: is stored in
+ # increasing code point order (unless the tests above failed),
+ # @xdigit_digits is as well. There should be exactly 10 or
+ # 20 of these.
+ if (@xdigit_digits != 10 && @xdigit_digits != 20) {
+ @f = @xdigit_digits;
}
else {
- push @f, $_ if /[[:digit:]]/ and ! /[[:xdigit:]]/;
+
+ # Look for contiguity in the series, adding any wrong ones to @f
+ my @temp = @xdigit_digits;
+ while (@temp > 1) {
+ push @f, $temp[1] if ($temp[0] != $temp[1] - 1)
+
+ # Skip this test for the 0th character of
+ # the second block of 10, as it won't be
+ # contiguous with the previous block
+ && (! defined $xdigit_digits[10]
+ || $temp[1] != $xdigit_digits[10]);
+ shift @temp;
+ }
}
}
+
report_multi_result($Locale, $locales_test_number, \@f);
++$locales_test_number;
$test_names{$locales_test_number} = 'Verify that any additional members of [:xdigit:], are in groups of 6 consecutive code points';
my $previous_ord;
my $count = 0;
- for (map { chr } 0..255) {
- next unless /[[:xdigit:]]/;
- next if /[[:digit:]]/;
- next if /[A-Fa-f]/;
+ for my $chr (map { chr } 0..255) {
+ next unless $chr =~ /[[:xdigit:]]/;
+ if ($is_utf8_locale) {
+ next if $chr =~ /[[:digit:]]/;
+ }
+ else {
+ next if grep { $chr eq $_ } @xdigit_digits;
+ }
+ next if $chr =~ /[A-Fa-f]/;
if (defined $previous_ord) {
if ($is_utf8_locale) {
use locale ':not_characters';
- push @f, $_ if ord $_ != $previous_ord + 1;
+ push @f, $chr if ord $chr != $previous_ord + 1;
}
else {
- push @f, $_ if ord $_ != $previous_ord + 1;
+ push @f, $chr if ord $chr != $previous_ord + 1;
}
}
$count++;
undef $previous_ord;
}
else {
- $previous_ord = ord $_;
+ $previous_ord = ord $chr;
}
}
report_multi_result($Locale, $locales_test_number, \@f);
$ok13 = $w == 0;
# Look for non-ASCII error messages, and verify that the first
- # such is in UTF-8 (the others almost certainly will be like the
- # first).
+ # such is NOT in UTF-8 (the others almost certainly will be like
+ # the first) See [perl #119499].
$ok14 = 1;
foreach my $err (keys %!) {
use Errno;
$! = eval "&Errno::$err"; # Convert to strerror() output
my $strerror = "$!";
if ("$strerror" =~ /\P{ASCII}/) {
- $ok14 = utf8::is_utf8($strerror);
+ $ok14 = ! utf8::is_utf8($strerror);
last;
}
}
$problematical_tests{$locales_test_number} = 1;
report_result($Locale, ++$locales_test_number, $ok14);
- $test_names{$locales_test_number} = 'Verify that non-ASCII UTF-8 error messages are in UTF-8';
+ $test_names{$locales_test_number} = 'Verify that non-ASCII UTF-8 error messages are NOT in UTF-8';
report_result($Locale, ++$locales_test_number, $ok15);
$test_names{$locales_test_number} = 'Verify that a number with a UTF-8 radix has a UTF-8 stringification';
my $percent_fail = (int(.5 + (1000 * scalar(keys $Problem{$test_num})
/ scalar(@Locale))))
/ 10;
- if (! $debug && $percent_fail < $acceptable_fold_failure_percentage)
+ if (! $debug && $percent_fail < $acceptable_failure_percentage)
{
$test_names{$test_num} .= 'TODO';
print "# ", 100 - $percent_fail, "% of locales pass the following test, so it is likely that the failures\n";
my $s = join(" ", @s);
$s =~ s/(.{50,60}) /$1\n#\t/g;
- warn
+ print
"# The following locales\n#\n",
"#\t", $s, "\n#\n",
"# tested okay.\n#\n",
} else {
- warn "# None of your locales were fully okay.\n";
+ print "# None of your locales were fully okay.\n";
}
if (@F) {
$details = "# For even more details, rerun, with environment variable PERL_DEBUG_FULL_TEST=2.\n";
}
- warn
+ print
"# The following locales\n#\n",
"#\t", $F, "\n#\n",
"# had problems.\n#\n",
$details;
} else {
- warn "# None of your locales were broken.\n";
+ print "# None of your locales were broken.\n";
}
}