# To make a TODO test, add the string 'TODO' to its %test_names value
+my $is_ebcdic = ord("A") == 193;
+
+no warnings 'locale'; # We test even weird locales; and do some scary things
+ # in ok locales
+
binmode STDOUT, ':utf8';
binmode STDERR, ':utf8';
chdir 't' if -d 't';
@INC = '../lib';
unshift @INC, '.';
- require Config; import Config;
- if (!$Config{d_setlocale} || $Config{ccflags} =~ /\bD?NO_LOCALE\b/) {
+ require './loc_tools.pl';
+ unless (locales_enabled('LC_CTYPE')) {
print "1..0\n";
exit;
}
- require './loc_tools.pl';
$| = 1;
+ require Config; import Config;
}
use strict;
-use feature 'fc';
+use feature 'fc', 'postderef';
# =1 adds debugging output; =2 increases the verbosity somewhat
my $debug = $ENV{PERL_DEBUG_FULL_TEST} // 0;
# The list of test numbers of the problematic tests.
my %problematical_tests;
+# If any %problematical_tests fails in one of these locales, it is
+# considered a TODO.
+my %known_bad_locales = (
+ irix => qr/ ^ (?: cs | hu | sk ) $/x,
+ darwin => qr/ ^ lt_LT.ISO8859 /ix,
+ os390 => qr/ ^ italian /ix,
+ );
+
+# cygwin isn't returning proper radix length in this locale, but supposedly to
+# be fixed in later versions.
+if ($^O eq 'cygwin' && version->new(($Config{osvers} =~ /^(\d+(?:\.\d+)+)/)[0]) le v2.4.1) {
+ $known_bad_locales{'cygwin'} = qr/ ^ ps_AF /ix;
+}
use Dumpvalue;
sub debug {
return unless $debug;
my($mess) = join "", '# ', @_;
- chop $mess;
- print $dumper->stringify($mess,1), "\n";
+ chomp $mess;
+ print STDERR $dumper->stringify($mess,1), "\n";
}
sub debug_more {
}
sub debugf {
- printf @_ if $debug;
+ printf STDERR @_ if $debug;
}
$a = 'abc %9';
print "ok " . ++$test_num;
print " $message";
print "\n";
+ return ($result) ? 1 : 0;
}
# First we'll do a lot of taint checking for locales.
ok((not is_tainted($_[0])), "verify that isn't tainted$message_tail");
}
+foreach my $category (qw(ALL COLLATE CTYPE MESSAGES MONETARY NUMERIC TIME)) {
+ my $short_result = locales_enabled($category);
+ ok ($short_result == 0 || $short_result == 1,
+ "Verify locales_enabled('$category') returns 0 or 1");
+ debug("locales_enabled('$category') returned '$short_result'");
+ my $long_result = locales_enabled("LC_$category");
+ if (! ok ($long_result == $short_result,
+ " and locales_enabled('LC_$category') returns "
+ . "the same value")
+ ) {
+ debug("locales_enabled('LC_$category') returned $long_result");
+ }
+}
+
"\tb\t" =~ /^m?(\s)(.*)\1$/;
check_taint_not $&, "not tainted outside 'use locale'";
;
my %Problem;
my %Okay;
+my %Known_bad_locale; # Failed test for a locale known to be bad
my %Testing;
my @Added_alpha; # Alphas that aren't in the C locale.
my %test_names;
}
else {
$result .= " " unless $prev_was_punct;
- $result .= charnames::viacode(ord $char);
+ my $name = charnames::viacode(ord $char);
+ $result .= (defined $name) ? $name : ':unknown:';
$prev_was_punct = 0;
}
}
sub report_result {
my ($Locale, $i, $pass_fail, $message) = @_;
- $message //= "";
- $message = " ($message)" if $message;
- unless ($pass_fail) {
+ if ($pass_fail) {
+ push @{$Okay{$i}}, $Locale;
+ }
+ else {
+ $message //= "";
+ $message = " ($message)" if $message;
+ $Known_bad_locale{$i}{$Locale} = 1 if exists $known_bad_locales{$^O}
+ && $Locale =~ $known_bad_locales{$^O};
$Problem{$i}{$Locale} = 1;
debug "failed $i ($test_names{$i}) with locale '$Locale'$message\n";
- } else {
- push @{$Okay{$i}}, $Locale;
}
}
debug "is utf8 locale? = $is_utf8_locale\n";
- my $radix = localeconv()->{decimal_point};
- if ($radix !~ / ^ [[:ascii:]] + $/x) {
- use bytes;
- $radix = disp_chars(split "", $radix);
- }
- debug "radix = $radix\n";
+ debug "radix = " . disp_str(localeconv()->{decimal_point}) . "\n";
if (! $is_utf8_locale) {
use locale;
last;
}
}
+
+ use locale;
+
+ ++$locales_test_number;
+ $test_names{$locales_test_number}
+ = 'Skip in locales where \001 has primary sorting weight; '
+ . 'otherwise verify that \0 doesn\'t have primary sorting weight';
+ if ("a\001c" lt "ab") {
+ report_result($Locale, $locales_test_number, 1);
+ }
+ else {
+ my $ok = "ab" lt "a\0c";
+ report_result($Locale, $locales_test_number, $ok);
+ }
+
+ ++$locales_test_number;
+ $test_names{$locales_test_number}
+ = 'Verify that strings with embedded NUL collate';
+ my $ok = "a\0a\0a" lt "a\001a\001a";
+ report_result($Locale, $locales_test_number, $ok);
+
+ ++$locales_test_number;
+ $test_names{$locales_test_number}
+ = 'Verify that strings with embedded NUL and '
+ . 'extra trailing NUL collate';
+ $ok = "a\0a\0" lt "a\001a\001";
+ report_result($Locale, $locales_test_number, $ok);
+
+ ++$locales_test_number;
+ $test_names{$locales_test_number}
+ = "Skip in non-UTF-8 locales; otherwise verify that UTF8ness "
+ . "doesn't matter with collation";
+ if (! $is_utf8_locale) {
+ report_result($Locale, $locales_test_number, 1);
+ }
+ else {
+
+ # khw can't think of anything better. Start with a string that is
+ # higher than its UTF-8 representation in both EBCDIC and ASCII
+ my $string = chr utf8::unicode_to_native(0xff);
+ my $utf8_string = $string;
+ utf8::upgrade($utf8_string);
+
+ # 8 should be lt 9 in all locales (except ones that aren't
+ # ASCII-based, which might fail this)
+ $ok = ("a${string}8") lt ("a${utf8_string}9");
+ report_result($Locale, $locales_test_number, $ok);
+ }
+
+ ++$locales_test_number;
+ $test_names{$locales_test_number}
+ = "Skip in UTF-8 locales; otherwise verify that single byte "
+ . "collates before 0x100 and above";
+ if ($is_utf8_locale) {
+ report_result($Locale, $locales_test_number, 1);
+ }
+ else {
+ my $max_collating = chr 0; # Find byte that collates highest
+ for my $i (0 .. 255) {
+ my $char = chr $i;
+ $max_collating = $char if $char gt $max_collating;
+ }
+ $ok = $max_collating lt chr 0x100;
+ report_result($Locale, $locales_test_number, $ok);
+ }
+
+ ++$locales_test_number;
+ $test_names{$locales_test_number}
+ = "Skip in UTF-8 locales; otherwise verify that 0x100 and "
+ . "above collate in code point order";
+ if ($is_utf8_locale) {
+ report_result($Locale, $locales_test_number, 1);
+ }
+ else {
+ $ok = chr 0x100 lt chr 0x101;
+ report_result($Locale, $locales_test_number, $ok);
+ }
}
my $ok1;
# first). This is only done if the current locale has LC_MESSAGES
$ok14 = 1;
$ok14_5 = 1;
- if (setlocale(&POSIX::LC_MESSAGES, $Locale)) {
+ if ( locales_enabled('LC_MESSAGES')
+ && setlocale(&POSIX::LC_MESSAGES, $Locale))
+ {
foreach my $err (keys %!) {
use Errno;
$! = eval "&Errno::$err"; # Convert to strerror() output
}
$ok21 = 1;
- foreach my $err (keys %!) {
- no locale;
- use Errno;
- $! = eval "&Errno::$err"; # Convert to strerror() output
- my $strerror = "$!";
- if ("$strerror" =~ /\P{ASCII}/) {
- $ok21 = 0;
- last;
+ if (locales_enabled('LC_MESSAGES')) {
+ foreach my $err (keys %!) {
+ no locale;
+ use Errno;
+ $! = eval "&Errno::$err"; # Convert to strerror() output
+ my $strerror = "$!";
+ if ("$strerror" =~ /\P{ASCII}/) {
+ $ok21 = 0;
+ last;
+ }
}
}
my $first_c_test = $locales_test_number;
- report_result($Locale, ++$locales_test_number, $ok3);
- $test_names{$locales_test_number} = 'Verify that a different locale radix works when doing "==" with a constant';
- $problematical_tests{$locales_test_number} = 1;
+ $test_names{++$locales_test_number} = 'Verify that a different locale radix works when doing "==" with a constant';
+ if ($Config{usequadmath}) {
+ print "# Skip: no locale radix with usequadmath ($test_names{$locales_test_number})\n";
+ report_result($Locale, $locales_test_number, 1);
+ } else {
+ report_result($Locale, $locales_test_number, $ok3);
+ $problematical_tests{$locales_test_number} = 1;
+ }
- report_result($Locale, ++$locales_test_number, $ok4);
- $test_names{$locales_test_number} = 'Verify that a different locale radix works when doing "==" with a scalar';
- $problematical_tests{$locales_test_number} = 1;
+ $test_names{++$locales_test_number} = 'Verify that a different locale radix works when doing "==" with a scalar';
+ if ($Config{usequadmath}) {
+ print "# Skip: no locale radix with usequadmath ($test_names{$locales_test_number})\n";
+ report_result($Locale, $locales_test_number, 1);
+ } else {
+ report_result($Locale, $locales_test_number, $ok4);
+ $problematical_tests{$locales_test_number} = 1;
+ }
report_result($Locale, ++$locales_test_number, $ok5);
$test_names{$locales_test_number} = 'Verify that a different locale radix works when doing "==" with a scalar and an intervening sprintf';
report_result($Locale, ++$locales_test_number, $ok7);
$test_names{$locales_test_number} = 'Verify that "==" with a scalar still works in inner no locale';
- report_result($Locale, ++$locales_test_number, $ok8);
- $test_names{$locales_test_number} = 'Verify that "==" with a scalar and an intervening sprintf still works in inner no locale';
- $problematical_tests{$locales_test_number} = 1;
+ $test_names{++$locales_test_number} = 'Verify that "==" with a scalar and an intervening sprintf still works in inner no locale';
+ if ($Config{usequadmath}) {
+ print "# Skip: no locale radix with usequadmath ($test_names{$locales_test_number})\n";
+ report_result($Locale, $locales_test_number, 1);
+ } else {
+ report_result($Locale, $locales_test_number, $ok8);
+ $problematical_tests{$locales_test_number} = 1;
+ }
debug "$first_e_test..$locales_test_number: \$e = $e, no locale\n";
$test_names{$locales_test_number} = 'Verify that after a no-locale block, a different locale radix still works when doing "==" with a scalar';
$problematical_tests{$locales_test_number} = 1;
- report_result($Locale, ++$locales_test_number, $ok11);
- $test_names{$locales_test_number} = 'Verify that after a no-locale block, a different locale radix still works when doing "==" with a scalar and an intervening sprintf';
- $problematical_tests{$locales_test_number} = 1;
+ $test_names{++$locales_test_number} = 'Verify that after a no-locale block, a different locale radix still works when doing "==" with a scalar and an intervening sprintf';
+ if ($Config{usequadmath}) {
+ print "# Skip: no locale radix with usequadmath ($test_names{$locales_test_number})\n";
+ report_result($Locale, $locales_test_number, 1);
+ } else {
+ report_result($Locale, $locales_test_number, $ok11);
+ $problematical_tests{$locales_test_number} = 1;
+ }
report_result($Locale, ++$locales_test_number, $ok12);
$test_names{$locales_test_number} = 'Verify that after a no-locale block, a different locale radix can participate in an addition and function call as numeric';
report_result($Locale, ++$locales_test_number, $ok16);
$test_names{$locales_test_number} = 'Verify that a sprintf of a number with a UTF-8 radix yields UTF-8';
+ $problematical_tests{$locales_test_number} = 1;
report_result($Locale, ++$locales_test_number, $ok17);
$test_names{$locales_test_number} = 'Verify that a sprintf of a number outside locale scope uses a dot radix';
report_result($Locale, ++$locales_test_number, $ok18);
$test_names{$locales_test_number} = 'Verify that a sprintf of a number back within locale scope uses locale radix';
+ $problematical_tests{$locales_test_number} = 1;
report_result($Locale, ++$locales_test_number, $ok19);
$test_names{$locales_test_number} = 'Verify that strftime doesn\'t return "%p" in locales where %p is empty';
"; lc=", disp_chars(($y)), "; ",
"; fc=", disp_chars((fc $x)), "; ",
disp_chars(($x)), "=~/", disp_chars(($y)), "/i=",
- $x =~ /$y/i ? 1 : 0,
+ $x =~ /\Q$y/i ? 1 : 0,
"; ",
disp_chars(($y)), "=~/", disp_chars(($x)), "/i=",
- $y =~ /$x/i ? 1 : 0,
+ $y =~ /\Q$x/i ? 1 : 0,
"\n");
#
# If $x and $y contain regular expression characters
print "# Regex characters in '$x' or '$y', skipping test $locales_test_number for locale '$Locale'\n";
next;
}
- push @f, $x unless $x =~ /$y/i && $y =~ /$x/i;
+ push @f, $x unless $x =~ /\Q$y/i && $y =~ /\Q$x/i;
# fc is not a locale concept, so Perl uses lc for it.
push @f, $x unless lc $x eq fc $x;
"; lc=", disp_chars(($y)), "; ",
"; fc=", disp_chars((fc $x)), "; ",
disp_chars(($x)), "=~/", disp_chars(($y)), "/i=",
- $x =~ /$y/i ? 1 : 0,
+ $x =~ /\Q$y/i ? 1 : 0,
"; ",
disp_chars(($y)), "=~/", disp_chars(($x)), "/i=",
- $y =~ /$x/i ? 1 : 0,
+ $y =~ /\Q$x/i ? 1 : 0,
"\n");
- push @f, $x unless $x =~ /$y/i && $y =~ /$x/i;
+ push @f, $x unless $x =~ /\Q$y/i && $y =~ /\Q$x/i;
# The places where Unicode's lc is different from fc are
# skipped here by virtue of the 'next unless uc...' line above
"; uc=", disp_chars(($y)), "; ",
"; fc=", disp_chars((fc $x)), "; ",
disp_chars(($x)), "=~/", disp_chars(($y)), "/i=",
- $x =~ /$y/i ? 1 : 0,
+ $x =~ /\Q$y/i ? 1 : 0,
"; ",
disp_chars(($y)), "=~/", disp_chars(($x)), "/i=",
- $y =~ /$x/i ? 1 : 0,
+ $y =~ /\Q$x/i ? 1 : 0,
"\n");
if ($x =~ $re || $y =~ $re) { # See above.
print "# Regex characters in '$x' or '$y', skipping test $locales_test_number for locale '$Locale'\n";
next;
}
- push @f, $x unless $x =~ /$y/i && $y =~ /$x/i;
+ push @f, $x unless $x =~ /\Q$y/i && $y =~ /\Q$x/i;
push @f, $x unless lc $x eq fc $x;
}
"; uc=", disp_chars(($y)), "; ",
"; fc=", disp_chars((fc $x)), "; ",
disp_chars(($x)), "=~/", disp_chars(($y)), "/i=",
- $x =~ /$y/i ? 1 : 0,
+ $x =~ /\Q$y/i ? 1 : 0,
"; ",
disp_chars(($y)), "=~/", disp_chars(($x)), "/i=",
- $y =~ /$x/i ? 1 : 0,
+ $y =~ /\Q$x/i ? 1 : 0,
"\n");
- push @f, $x unless $x =~ /$y/i && $y =~ /$x/i;
+ push @f, $x unless $x =~ /\Q$y/i && $y =~ /\Q$x/i;
push @f, $x unless lc $x eq fc $x;
}
}
}
- report_result($Locale, $locales_test_number, @f == 0);
- if (@f) {
- print "# failed $locales_test_number locale '$Locale' numbers @f\n"
+ if ($Config{usequadmath}) {
+ print "# Skip: no locale radix with usequadmath ($Locale)\n";
+ report_result($Locale, $locales_test_number, 1);
+ } else {
+ report_result($Locale, $locales_test_number, @f == 0);
+ if (@f) {
+ print "# failed $locales_test_number locale '$Locale' numbers @f\n"
+ }
}
}
}
# Recount the errors.
+TEST_NUM:
foreach $test_num ($first_locales_test_number..$final_locales_test_number) {
if (%setlocale_failed) {
print "not ";
}
- elsif ($Problem{$test_num} || !defined $Okay{$test_num} || !@{$Okay{$test_num}}) {
+ elsif ($Problem{$test_num}
+ || ! defined $Okay{$test_num}
+ || ! @{$Okay{$test_num}})
+ {
if (defined $not_necessarily_a_problem_test_number
&& $test_num == $not_necessarily_a_problem_test_number)
{
print "# It usually indicates a problem in the environment,\n";
print "# not in Perl itself.\n";
}
- if ($Okay{$test_num} && grep { $_ == $test_num } keys %problematical_tests) {
- no warnings 'experimental::autoderef';
+
+ # If there are any locales that pass this test, or are known-bad, it
+ # may be that there are enough passes that we TODO the failure.
+ if (($Okay{$test_num} || $Known_bad_locale{$test_num})
+ && grep { $_ == $test_num } keys %problematical_tests)
+ {
+ no warnings 'experimental::postderef';
+
+ # Don't count the known-bad failures when calculating the
+ # percentage that fail.
+ my $known_failures = (exists $Known_bad_locale{$test_num})
+ ? scalar(keys $Known_bad_locale{$test_num}->%*)
+ : 0;
+ my $adjusted_failures = scalar(keys $Problem{$test_num}->%*)
+ - $known_failures;
+
+ # Specially handle failures where only known-bad locales fail.
+ # This makes the diagnositics clearer.
+ if ($adjusted_failures <= 0) {
+ print "not ok $test_num $test_names{$test_num} # TODO fails only on ",
+ "known bad locales: ",
+ join " ", keys $Known_bad_locale{$test_num}->%*, "\n";
+ next TEST_NUM;
+ }
+
# Round to nearest .1%
- my $percent_fail = (int(.5 + (1000 * scalar(keys $Problem{$test_num})
+ my $percent_fail = (int(.5 + (1000 * $adjusted_failures
/ scalar(@Locale))))
/ 10;
if ($percent_fail < $acceptable_failure_percentage) {
if (! $debug) {
$test_names{$test_num} .= 'TODO';
- print "# ", 100 - $percent_fail, "% of locales pass the following test, so it is likely that the failures\n";
+ print "# ", 100 - $percent_fail, "% of locales not known to be problematic on this platform\n";
+ print "# pass the following test, so it is likely that the failures\n";
print "# are errors in the locale definitions. The test is marked TODO, as the\n";
print "# problem is not likely to be Perl's\n";
}
}
- elsif ($debug) {
+ if ($debug) {
print "# $percent_fail% of locales (",
- scalar(keys $Problem{$test_num}),
+ scalar(keys $Problem{$test_num}->%*),
" of ",
scalar(@Locale),
- ") fail the following test\n";
+ ") fail the above test (TODO cut-off is ",
+ $acceptable_failure_percentage,
+ "%)\n";
}
}
print "#\n";
# All casing operations under locale (but not :not_characters) should
# taint
if ($function =~ /^u/) {
- @list = ("", "a", "\xe0", "\xff", "\x{fb00}", "\x{149}", "\x{101}");
- $ascii_case_change_delta = -32;
+ @list = ("", "a",
+ chr(utf8::unicode_to_native(0xe0)),
+ chr(utf8::unicode_to_native(0xff)),
+ "\x{fb00}", "\x{149}", "\x{101}");
+ $ascii_case_change_delta = ($is_ebcdic) ? +64 : -32;
$above_latin1_case_change_delta = -1;
}
else {
- @list = ("", "A", "\xC0", "\x{17F}", "\x{100}");
- $ascii_case_change_delta = +32;
+ @list = ("", "A",
+ chr(utf8::unicode_to_native(0xC0)),
+ "\x{17F}", "\x{100}");
+ $ascii_case_change_delta = ($is_ebcdic) ? -64 : +32;
$above_latin1_case_change_delta = +1;
}
foreach my $is_utf8_locale (0 .. 1) {
my $should_be;
my $changed;
if (! $is_utf8_locale) {
+ no warnings 'locale';
$should_be = ($j == $#list)
? chr(ord($char) + $above_latin1_case_change_delta)
- : (length $char == 0 || ord($char) > 127)
- ? $char
- : chr(ord($char) + $ascii_case_change_delta);
+ : (length $char == 0 || utf8::native_to_unicode(ord($char)) > 127)
+ ? $char
+ : chr(ord($char) + $ascii_case_change_delta);
# This monstrosity is in order to avoid using an eval,
# which might perturb the results
foreach ($first_locales_test_number..$final_locales_test_number) {
if ($Problem{$_}) {
my @f = sort keys %{ $Problem{$_} };
+
+ # Don't list the failures caused by known-bad locales.
+ if (exists $known_bad_locales{$^O}) {
+ @f = grep { $_ !~ $known_bad_locales{$^O} } @f;
+ next unless @f;
+ }
my $f = join(" ", @f);
$f =~ s/(.{50,60}) /$1\n#\t/g;
print
}
}
+if (exists $known_bad_locales{$^O} && ! %Known_bad_locale) {
+ $test_num++;
+ print "ok $test_num $^O no longer has known bad locales # TODO\n";
+}
+
print "1..$test_num\n";
# eof