This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
perlfunc: re-document old split() @_ side effect
[perl5.git] / lib / locale.t
CommitLineData
8ebc5c01 1#!./perl -wT
2
66cbab2c
KW
3# This tests plain 'use locale' and adorned 'use locale ":not_characters"'
4# Because these pragmas are compile time, and I (khw) am trying to test
5# without using 'eval' as much as possible, which might cloud the issue, the
6# crucial parts of the code are duplicated in a block for each pragma.
7
6c2e653d
KW
8# To make a TODO test, add the string 'TODO' to its %test_names value
9
9de25f67
KW
10my $is_ebcdic = ord("A") == 193;
11
02d357f9
KW
12no warnings 'locale'; # We test even weird locales; and do some scary things
13 # in ok locales
14
e3a2734b
KW
15binmode STDOUT, ':utf8';
16binmode STDERR, ':utf8';
17
8ebc5c01 18BEGIN {
19 chdir 't' if -d 't';
20822f61 20 @INC = '../lib';
f9cbebe1 21 unshift @INC, '.';
ef9d5242
KW
22 require './loc_tools.pl';
23 unless (locales_enabled('LC_CTYPE')) {
b002077a
CS
24 print "1..0\n";
25 exit;
26 }
2de3dbcc 27 $| = 1;
ef9d5242 28 require Config; import Config;
8ebc5c01 29}
30
31use strict;
26230909 32use feature 'fc', 'postderef';
8ebc5c01 33
92891c66 34# =1 adds debugging output; =2 increases the verbosity somewhat
d127e8bc 35our $debug = $ENV{PERL_DEBUG_FULL_TEST} // 0;
284102e8 36
6d5d702a
KW
37# Certain tests have been shown to be problematical for a few locales. Don't
38# fail them unless at least this percentage of the tested locales fail.
4874c8de 39# On AIX machines, many locales call a no-break space a graphic.
df33771d 40# (There aren't 1000 locales currently in existence, so 99.9 works)
4874c8de 41my $acceptable_failure_percentage = ($^O =~ / ^ ( AIX ) $ /ix)
31f658ea
KW
42 ? 99.9
43 : 5;
6d5d702a 44
7493b8f2 45# The list of test numbers of the problematic tests.
5b1f18a7 46my %problematical_tests;
7493b8f2 47
852b6749
KW
48# If any %problematical_tests fails in one of these locales, it is
49# considered a TODO.
50my %known_bad_locales = (
51 irix => qr/ ^ (?: cs | hu | sk ) $/x,
52 darwin => qr/ ^ lt_LT.ISO8859 /ix,
53 os390 => qr/ ^ italian /ix,
54 );
7493b8f2 55
dc51aebe
KW
56# cygwin isn't returning proper radix length in this locale, but supposedly to
57# be fixed in later versions.
58if ($^O eq 'cygwin' && version->new(($Config{osvers} =~ /^(\d+(?:\.\d+)+)/)[0]) le v2.4.1) {
59 $known_bad_locales{'cygwin'} = qr/ ^ ps_AF /ix;
60}
61
db4b7445
A
62use Dumpvalue;
63
64my $dumper = Dumpvalue->new(
65 tick => qq{"},
66 quoteHighBit => 0,
67 unctrl => "quote"
68 );
d127e8bc 69
6be75cd7 70sub debug {
db4b7445 71 return unless $debug;
46973bb2 72 my($mess) = join "", '# ', @_;
d8f8a481 73 chomp $mess;
bd083ee5 74 print STDERR $dumper->stringify($mess,1), "\n";
6be75cd7
JH
75}
76
d127e8bc
KW
77sub note {
78 local $debug = 1;
79 debug @_;
80}
81
92891c66
KW
82sub debug_more {
83 return unless $debug > 1;
84 return debug(@_);
85}
86
6be75cd7 87sub debugf {
bd083ee5 88 printf STDERR @_ if $debug;
6be75cd7
JH
89}
90
40205cab 91$a = 'abc %9';
8ebc5c01 92
c213d471
KW
93my $test_num = 0;
94
8ebc5c01 95sub ok {
c213d471 96 my ($result, $message) = @_;
e3a2734b 97 $message = "" unless defined $message;
8ebc5c01 98
99 print 'not ' unless ($result);
c213d471 100 print "ok " . ++$test_num;
e3a2734b
KW
101 print " $message";
102 print "\n";
cf82bb84 103 return ($result) ? 1 : 0;
8ebc5c01 104}
105
106# First we'll do a lot of taint checking for locales.
107# This is the easiest to test, actually, as any locale,
108# even the default locale will taint under 'use locale'.
109
110sub is_tainted { # hello, camel two.
9f1b1f2d 111 no warnings 'uninitialized' ;
8ebc5c01 112 my $dummy;
ba74571d 113 local $@;
8ebc5c01 114 not eval { $dummy = join("", @_), kill 0; 1 }
115}
116
a9b7c637
KW
117sub check_taint ($;$) {
118 my $message_tail = $_[1] // "";
2575405e
KW
119
120 # Extra blanks are so aligns with taint_not output
121 $message_tail = ": $message_tail" if $message_tail;
a9b7c637 122 ok is_tainted($_[0]), "verify that is tainted$message_tail";
8ebc5c01 123}
124
a9b7c637
KW
125sub check_taint_not ($;$) {
126 my $message_tail = $_[1] // "";
2575405e 127 $message_tail = ": $message_tail" if $message_tail;
a9b7c637 128 ok((not is_tainted($_[0])), "verify that isn't tainted$message_tail");
8ebc5c01 129}
130
308482c2
KW
131foreach my $category (qw(ALL COLLATE CTYPE MESSAGES MONETARY NUMERIC TIME)) {
132 my $short_result = locales_enabled($category);
133 ok ($short_result == 0 || $short_result == 1,
134 "Verify locales_enabled('$category') returns 0 or 1");
135 debug("locales_enabled('$category') returned '$short_result'");
136 my $long_result = locales_enabled("LC_$category");
137 if (! ok ($long_result == $short_result,
138 " and locales_enabled('LC_$category') returns "
139 . "the same value")
140 ) {
141 debug("locales_enabled('LC_$category') returned $long_result");
142 }
143}
144
bf3cd0e6
KW
145"\tb\t" =~ /^m?(\s)(.*)\1$/;
146check_taint_not $&, "not tainted outside 'use locale'";
147;
148
8ebc5c01 149use locale; # engage locale and therefore locale taint.
150
b6fb4de7
KW
151# BE SURE TO COPY ANYTHING YOU ADD to these tests to the block below for
152# ":notcharacters"
2575405e
KW
153
154check_taint_not $a, '$a';
155
156check_taint uc($a), 'uc($a)';
157check_taint "\U$a", '"\U$a"';
158check_taint ucfirst($a), 'ucfirst($a)';
159check_taint "\u$a", '"\u$a"';
160check_taint lc($a), 'lc($a)';
161check_taint fc($a), 'fc($a)';
162check_taint "\L$a", '"\L$a"';
163check_taint "\F$a", '"\F$a"';
164check_taint lcfirst($a), 'lcfirst($a)';
165check_taint "\l$a", '"\l$a"';
166
167check_taint_not sprintf('%e', 123.456), "sprintf('%e', 123.456)";
168check_taint_not sprintf('%f', 123.456), "sprintf('%f', 123.456)";
169check_taint_not sprintf('%g', 123.456), "sprintf('%g', 123.456)";
170check_taint_not sprintf('%d', 123.456), "sprintf('%d', 123.456)";
171check_taint_not sprintf('%x', 123.456), "sprintf('%x', 123.456)";
8ebc5c01 172
173$_ = $a; # untaint $_
174
175$_ = uc($a); # taint $_
176
2575405e 177check_taint $_, '$_ = uc($a)';
8ebc5c01 178
179/(\w)/; # taint $&, $`, $', $+, $1.
2575405e 180check_taint $&, "\$& from /(\\w)/";
fdf73a7f
KW
181check_taint $`, "\t\$`";
182check_taint $', "\t\$'";
183check_taint $+, "\t\$+";
184check_taint $1, "\t\$1";
185check_taint_not $2, "\t\$2";
8ebc5c01 186
187/(.)/; # untaint $&, $`, $', $+, $1.
2575405e 188check_taint_not $&, "\$& from /(.)/";
fdf73a7f
KW
189check_taint_not $`, "\t\$`";
190check_taint_not $', "\t\$'";
191check_taint_not $+, "\t\$+";
192check_taint_not $1, "\t\$1";
193check_taint_not $2, "\t\$2";
8ebc5c01 194
195/(\W)/; # taint $&, $`, $', $+, $1.
2575405e 196check_taint $&, "\$& from /(\\W)/";
fdf73a7f
KW
197check_taint $`, "\t\$`";
198check_taint $', "\t\$'";
199check_taint $+, "\t\$+";
200check_taint $1, "\t\$1";
201check_taint_not $2, "\t\$2";
8ebc5c01 202
9486279c 203/(.)/; # untaint $&, $`, $', $+, $1.
2575405e 204check_taint_not $&, "\$& from /(.)/";
fdf73a7f
KW
205check_taint_not $`, "\t\$`";
206check_taint_not $', "\t\$'";
207check_taint_not $+, "\t\$+";
208check_taint_not $1, "\t\$1";
209check_taint_not $2, "\t\$2";
9486279c 210
8ebc5c01 211/(\s)/; # taint $&, $`, $', $+, $1.
2575405e 212check_taint $&, "\$& from /(\\s)/";
fdf73a7f
KW
213check_taint $`, "\t\$`";
214check_taint $', "\t\$'";
215check_taint $+, "\t\$+";
216check_taint $1, "\t\$1";
217check_taint_not $2, "\t\$2";
8ebc5c01 218
9486279c 219/(.)/; # untaint $&, $`, $', $+, $1.
2575405e 220check_taint_not $&, "\$& from /(.)/";
9486279c 221
8ebc5c01 222/(\S)/; # taint $&, $`, $', $+, $1.
2575405e 223check_taint $&, "\$& from /(\\S)/";
fdf73a7f
KW
224check_taint $`, "\t\$`";
225check_taint $', "\t\$'";
226check_taint $+, "\t\$+";
227check_taint $1, "\t\$1";
228check_taint_not $2, "\t\$2";
8ebc5c01 229
63baef57 230/(.)/; # untaint $&, $`, $', $+, $1.
2575405e 231check_taint_not $&, "\$& from /(.)/";
63baef57 232
40205cab
KW
233"0" =~ /(\d)/; # taint $&, $`, $', $+, $1.
234check_taint $&, "\$& from /(\\d)/";
235check_taint $`, "\t\$`";
236check_taint $', "\t\$'";
237check_taint $+, "\t\$+";
238check_taint $1, "\t\$1";
239check_taint_not $2, "\t\$2";
240
241/(.)/; # untaint $&, $`, $', $+, $1.
242check_taint_not $&, "\$& from /(.)/";
243
244/(\D)/; # taint $&, $`, $', $+, $1.
245check_taint $&, "\$& from /(\\D)/";
246check_taint $`, "\t\$`";
247check_taint $', "\t\$'";
248check_taint $+, "\t\$+";
249check_taint $1, "\t\$1";
250check_taint_not $2, "\t\$2";
251
252/(.)/; # untaint $&, $`, $', $+, $1.
253check_taint_not $&, "\$& from /(.)/";
254
255/([[:alnum:]])/; # taint $&, $`, $', $+, $1.
256check_taint $&, "\$& from /([[:alnum:]])/";
257check_taint $`, "\t\$`";
258check_taint $', "\t\$'";
259check_taint $+, "\t\$+";
260check_taint $1, "\t\$1";
261check_taint_not $2, "\t\$2";
262
263/(.)/; # untaint $&, $`, $', $+, $1.
264check_taint_not $&, "\$& from /(.)/";
265
266/([[:^alnum:]])/; # taint $&, $`, $', $+, $1.
267check_taint $&, "\$& from /([[:^alnum:]])/";
268check_taint $`, "\t\$`";
269check_taint $', "\t\$'";
270check_taint $+, "\t\$+";
271check_taint $1, "\t\$1";
272check_taint_not $2, "\t\$2";
273
63baef57 274"a" =~ /(a)|(\w)/; # taint $&, $`, $', $+, $1.
2575405e 275check_taint $&, "\$& from /(a)|(\\w)/";
63baef57
KW
276check_taint $`, "\t\$`";
277check_taint $', "\t\$'";
278check_taint $+, "\t\$+";
279check_taint $1, "\t\$1";
2575405e
KW
280ok($1 eq 'a', ("\t" x 5) . "\$1 is 'a'");
281ok(! defined $2, ("\t" x 5) . "\$2 is undefined");
63baef57
KW
282check_taint_not $2, "\t\$2";
283check_taint_not $3, "\t\$3";
284
285/(.)/; # untaint $&, $`, $', $+, $1.
2575405e 286check_taint_not $&, "\$& from /(.)/";
63baef57
KW
287
288"\N{CYRILLIC SMALL LETTER A}" =~ /(\N{CYRILLIC CAPITAL LETTER A})/i; # no tainting because no locale dependence
2575405e 289check_taint_not $&, "\$& from /(\\N{CYRILLIC CAPITAL LETTER A})/i";
63baef57
KW
290check_taint_not $`, "\t\$`";
291check_taint_not $', "\t\$'";
292check_taint_not $+, "\t\$+";
293check_taint_not $1, "\t\$1";
2575405e 294ok($1 eq "\N{CYRILLIC SMALL LETTER A}", ("\t" x 4) . "\t\$1 is 'small cyrillic a'");
63baef57
KW
295check_taint_not $2, "\t\$2";
296
297/(.)/; # untaint $&, $`, $', $+, $1.
2575405e 298check_taint_not $&, "\$& from /./";
63baef57 299
40205cab
KW
300"(\N{KELVIN SIGN})" =~ /(\N{KELVIN SIGN})/i; # taints because depends on locale
301check_taint $&, "\$& from /(\\N{KELVIN SIGN})/i";
302check_taint $`, "\t\$`";
303check_taint $', "\t\$'";
304check_taint $+, "\t\$+";
305check_taint $1, "\t\$1";
306check_taint_not $2, "\t\$2";
307
63baef57 308/(.)/; # untaint $&, $`, $', $+, $1.
2575405e 309check_taint_not $&, "\$& from /(.)/";
63baef57
KW
310
311"a:" =~ /(.)\b(.)/; # taint $&, $`, $', $+, $1.
2575405e 312check_taint $&, "\$& from /(.)\\b(.)/";
63baef57
KW
313check_taint $`, "\t\$`";
314check_taint $', "\t\$'";
315check_taint $+, "\t\$+";
316check_taint $1, "\t\$1";
317check_taint $2, "\t\$2";
318check_taint_not $3, "\t\$3";
319
320/(.)/; # untaint $&, $`, $', $+, $1.
2575405e 321check_taint_not $&, "\$& from /./";
63baef57
KW
322
323"aa" =~ /(.)\B(.)/; # taint $&, $`, $', $+, $1.
2575405e 324check_taint $&, "\$& from /(.)\\B(.)/";
63baef57
KW
325check_taint $`, "\t\$`";
326check_taint $', "\t\$'";
327check_taint $+, "\t\$+";
328check_taint $1, "\t\$1";
329check_taint $2, "\t\$2";
330check_taint_not $3, "\t\$3";
331
332/(.)/; # untaint $&, $`, $', $+, $1.
2575405e 333check_taint_not $&, "\$& from /./";
63baef57
KW
334
335"aaa" =~ /(.).(\1)/i; # notaint because not locale dependent
2575405e 336check_taint_not $&, "\$ & from /(.).(\\1)/";
63baef57
KW
337check_taint_not $`, "\t\$`";
338check_taint_not $', "\t\$'";
339check_taint_not $+, "\t\$+";
340check_taint_not $1, "\t\$1";
341check_taint_not $2, "\t\$2";
2575405e 342check_taint_not $3, "\t\$3";
63baef57
KW
343
344/(.)/; # untaint $&, $`, $', $+, $1.
2575405e 345check_taint_not $&, "\$ & from /./";
63baef57 346
8ebc5c01 347$_ = $a; # untaint $_
348
2575405e 349check_taint_not $_, 'untainting $_ works';
8ebc5c01 350
351/(b)/; # this must not taint
2575405e 352check_taint_not $&, "\$ & from /(b)/";
fdf73a7f
KW
353check_taint_not $`, "\t\$`";
354check_taint_not $', "\t\$'";
355check_taint_not $+, "\t\$+";
356check_taint_not $1, "\t\$1";
357check_taint_not $2, "\t\$2";
8ebc5c01 358
359$_ = $a; # untaint $_
360
2575405e 361check_taint_not $_, 'untainting $_ works';
8ebc5c01 362
363$b = uc($a); # taint $b
364s/(.+)/$b/; # this must taint only the $_
365
2575405e 366check_taint $_, '$_ (wasn\'t tainted) from s/(.+)/$b/ where $b is tainted';
fdf73a7f
KW
367check_taint_not $&, "\t\$&";
368check_taint_not $`, "\t\$`";
369check_taint_not $', "\t\$'";
370check_taint_not $+, "\t\$+";
371check_taint_not $1, "\t\$1";
372check_taint_not $2, "\t\$2";
8ebc5c01 373
374$_ = $a; # untaint $_
375
376s/(.+)/b/; # this must not taint
2575405e 377check_taint_not $_, '$_ (wasn\'t tainted) from s/(.+)/b/';
fdf73a7f
KW
378check_taint_not $&, "\t\$&";
379check_taint_not $`, "\t\$`";
380check_taint_not $', "\t\$'";
381check_taint_not $+, "\t\$+";
382check_taint_not $1, "\t\$1";
383check_taint_not $2, "\t\$2";
8ebc5c01 384
385$b = $a; # untaint $b
386
387($b = $a) =~ s/\w/$&/;
2575405e
KW
388check_taint $b, '$b from ($b = $a) =~ s/\w/$&/'; # $b should be tainted.
389check_taint_not $a, '$a from ($b = $a) =~ s/\w/$&/'; # $a should be not.
8ebc5c01 390
391$_ = $a; # untaint $_
392
393s/(\w)/\l$1/; # this must taint
2575405e 394check_taint $_, '$_ (wasn\'t tainted) from s/(\w)/\l$1/,'; # this must taint
fdf73a7f
KW
395check_taint $&, "\t\$&";
396check_taint $`, "\t\$`";
397check_taint $', "\t\$'";
398check_taint $+, "\t\$+";
399check_taint $1, "\t\$1";
400check_taint_not $2, "\t\$2";
8ebc5c01 401
402$_ = $a; # untaint $_
403
404s/(\w)/\L$1/; # this must taint
2575405e 405check_taint $_, '$_ (wasn\'t tainted) from s/(\w)/\L$1/,';
fdf73a7f
KW
406check_taint $&, "\t\$&";
407check_taint $`, "\t\$`";
408check_taint $', "\t\$'";
409check_taint $+, "\t\$+";
410check_taint $1, "\t\$1";
411check_taint_not $2, "\t\$2";
8ebc5c01 412
413$_ = $a; # untaint $_
414
415s/(\w)/\u$1/; # this must taint
2575405e 416check_taint $_, '$_ (wasn\'t tainted) from s/(\w)/\u$1/';
fdf73a7f
KW
417check_taint $&, "\t\$&";
418check_taint $`, "\t\$`";
419check_taint $', "\t\$'";
420check_taint $+, "\t\$+";
421check_taint $1, "\t\$1";
422check_taint_not $2, "\t\$2";
8ebc5c01 423
424$_ = $a; # untaint $_
425
426s/(\w)/\U$1/; # this must taint
2575405e 427check_taint $_, '$_ (wasn\'t tainted) from s/(\w)/\U$1/';
fdf73a7f
KW
428check_taint $&, "\t\$&";
429check_taint $`, "\t\$`";
430check_taint $', "\t\$'";
431check_taint $+, "\t\$+";
432check_taint $1, "\t\$1";
433check_taint_not $2, "\t\$2";
8ebc5c01 434
faab3281
KW
435# After all this tainting $a should be cool.
436
2575405e 437check_taint_not $a, '$a still not tainted';
faab3281 438
b99851e1
KW
439"a" =~ /([a-z])/;
440check_taint_not $1, '"a" =~ /([a-z])/';
441"foo.bar_baz" =~ /^(.*)[._](.*?)$/; # Bug 120675
442check_taint_not $1, '"foo.bar_baz" =~ /^(.*)[._](.*?)$/';
443
7a9d1c02 444# BE SURE TO COPY ANYTHING YOU ADD to the block below
8ebc5c01 445
66cbab2c
KW
446{ # This is just the previous tests copied here with a different
447 # compile-time pragma.
448
449 use locale ':not_characters'; # engage restricted locale with different
450 # tainting rules
b6fb4de7
KW
451 check_taint_not $a, '$a';
452
453 check_taint_not uc($a), 'uc($a)';
454 check_taint_not "\U$a", '"\U$a"';
455 check_taint_not ucfirst($a), 'ucfirst($a)';
456 check_taint_not "\u$a", '"\u$a"';
457 check_taint_not lc($a), 'lc($a)';
458 check_taint_not fc($a), 'fc($a)';
459 check_taint_not "\L$a", '"\L$a"';
460 check_taint_not "\F$a", '"\F$a"';
461 check_taint_not lcfirst($a), 'lcfirst($a)';
462 check_taint_not "\l$a", '"\l$a"';
463
464 check_taint_not sprintf('%e', 123.456), "sprintf('%e', 123.456)";
465 check_taint_not sprintf('%f', 123.456), "sprintf('%f', 123.456)";
466 check_taint_not sprintf('%g', 123.456), "sprintf('%g', 123.456)";
467 check_taint_not sprintf('%d', 123.456), "sprintf('%d', 123.456)";
468 check_taint_not sprintf('%x', 123.456), "sprintf('%x', 123.456)";
66cbab2c
KW
469
470 $_ = $a; # untaint $_
471
b6fb4de7 472 $_ = uc($a);
66cbab2c 473
b6fb4de7 474 check_taint_not $_, '$_ = uc($a)';
66cbab2c 475
b6fb4de7
KW
476 /(\w)/;
477 check_taint_not $&, "\$& from /(\\w)/";
478 check_taint_not $`, "\t\$`";
479 check_taint_not $', "\t\$'";
480 check_taint_not $+, "\t\$+";
481 check_taint_not $1, "\t\$1";
482 check_taint_not $2, "\t\$2";
66cbab2c
KW
483
484 /(.)/; # untaint $&, $`, $', $+, $1.
b6fb4de7
KW
485 check_taint_not $&, "\$& from /(.)/";
486 check_taint_not $`, "\t\$`";
487 check_taint_not $', "\t\$'";
488 check_taint_not $+, "\t\$+";
489 check_taint_not $1, "\t\$1";
490 check_taint_not $2, "\t\$2";
491
492 /(\W)/;
493 check_taint_not $&, "\$& from /(\\W)/";
494 check_taint_not $`, "\t\$`";
495 check_taint_not $', "\t\$'";
496 check_taint_not $+, "\t\$+";
497 check_taint_not $1, "\t\$1";
498 check_taint_not $2, "\t\$2";
66cbab2c 499
b6fb4de7
KW
500 /(.)/; # untaint $&, $`, $', $+, $1.
501 check_taint_not $&, "\$& from /(.)/";
502 check_taint_not $`, "\t\$`";
503 check_taint_not $', "\t\$'";
504 check_taint_not $+, "\t\$+";
505 check_taint_not $1, "\t\$1";
506 check_taint_not $2, "\t\$2";
507
508 /(\s)/;
509 check_taint_not $&, "\$& from /(\\s)/";
510 check_taint_not $`, "\t\$`";
511 check_taint_not $', "\t\$'";
512 check_taint_not $+, "\t\$+";
513 check_taint_not $1, "\t\$1";
514 check_taint_not $2, "\t\$2";
66cbab2c 515
b6fb4de7
KW
516 /(.)/; # untaint $&, $`, $', $+, $1.
517 check_taint_not $&, "\$& from /(.)/";
66cbab2c 518
b6fb4de7
KW
519 /(\S)/;
520 check_taint_not $&, "\$& from /(\\S)/";
521 check_taint_not $`, "\t\$`";
522 check_taint_not $', "\t\$'";
523 check_taint_not $+, "\t\$+";
524 check_taint_not $1, "\t\$1";
525 check_taint_not $2, "\t\$2";
66cbab2c 526
b6fb4de7
KW
527 /(.)/; # untaint $&, $`, $', $+, $1.
528 check_taint_not $&, "\$& from /(.)/";
529
530 "0" =~ /(\d)/;
531 check_taint_not $&, "\$& from /(\\d)/";
532 check_taint_not $`, "\t\$`";
533 check_taint_not $', "\t\$'";
534 check_taint_not $+, "\t\$+";
535 check_taint_not $1, "\t\$1";
536 check_taint_not $2, "\t\$2";
537
538 /(.)/; # untaint $&, $`, $', $+, $1.
539 check_taint_not $&, "\$& from /(.)/";
66cbab2c 540
b6fb4de7
KW
541 /(\D)/;
542 check_taint_not $&, "\$& from /(\\D)/";
543 check_taint_not $`, "\t\$`";
544 check_taint_not $', "\t\$'";
545 check_taint_not $+, "\t\$+";
546 check_taint_not $1, "\t\$1";
547 check_taint_not $2, "\t\$2";
66cbab2c 548
b6fb4de7
KW
549 /(.)/; # untaint $&, $`, $', $+, $1.
550 check_taint_not $&, "\$& from /(.)/";
551
552 /([[:alnum:]])/;
553 check_taint_not $&, "\$& from /([[:alnum:]])/";
554 check_taint_not $`, "\t\$`";
555 check_taint_not $', "\t\$'";
556 check_taint_not $+, "\t\$+";
557 check_taint_not $1, "\t\$1";
558 check_taint_not $2, "\t\$2";
559
560 /(.)/; # untaint $&, $`, $', $+, $1.
561 check_taint_not $&, "\$& from /(.)/";
562
563 /([[:^alnum:]])/;
564 check_taint_not $&, "\$& from /([[:^alnum:]])/";
565 check_taint_not $`, "\t\$`";
566 check_taint_not $', "\t\$'";
567 check_taint_not $+, "\t\$+";
568 check_taint_not $1, "\t\$1";
569 check_taint_not $2, "\t\$2";
570
571 "a" =~ /(a)|(\w)/;
572 check_taint_not $&, "\$& from /(a)|(\\w)/";
573 check_taint_not $`, "\t\$`";
574 check_taint_not $', "\t\$'";
575 check_taint_not $+, "\t\$+";
576 check_taint_not $1, "\t\$1";
577 ok($1 eq 'a', ("\t" x 5) . "\$1 is 'a'");
578 ok(! defined $2, ("\t" x 5) . "\$2 is undefined");
579 check_taint_not $2, "\t\$2";
580 check_taint_not $3, "\t\$3";
581
582 /(.)/; # untaint $&, $`, $', $+, $1.
583 check_taint_not $&, "\$& from /(.)/";
584
585 "\N{CYRILLIC SMALL LETTER A}" =~ /(\N{CYRILLIC CAPITAL LETTER A})/i;
586 check_taint_not $&, "\$& from /(\\N{CYRILLIC CAPITAL LETTER A})/i";
587 check_taint_not $`, "\t\$`";
588 check_taint_not $', "\t\$'";
589 check_taint_not $+, "\t\$+";
590 check_taint_not $1, "\t\$1";
591 ok($1 eq "\N{CYRILLIC SMALL LETTER A}", ("\t" x 4) . "\t\$1 is 'small cyrillic a'");
592 check_taint_not $2, "\t\$2";
593
594 /(.)/; # untaint $&, $`, $', $+, $1.
595 check_taint_not $&, "\$& from /./";
66cbab2c 596
b6fb4de7
KW
597 "(\N{KELVIN SIGN})" =~ /(\N{KELVIN SIGN})/i;
598 check_taint_not $&, "\$& from /(\\N{KELVIN SIGN})/i";
599 check_taint_not $`, "\t\$`";
600 check_taint_not $', "\t\$'";
601 check_taint_not $+, "\t\$+";
602 check_taint_not $1, "\t\$1";
603 check_taint_not $2, "\t\$2";
604
605 /(.)/; # untaint $&, $`, $', $+, $1.
606 check_taint_not $&, "\$& from /(.)/";
607
608 "a:" =~ /(.)\b(.)/;
609 check_taint_not $&, "\$& from /(.)\\b(.)/";
610 check_taint_not $`, "\t\$`";
611 check_taint_not $', "\t\$'";
612 check_taint_not $+, "\t\$+";
613 check_taint_not $1, "\t\$1";
614 check_taint_not $2, "\t\$2";
615 check_taint_not $3, "\t\$3";
616
617 /(.)/; # untaint $&, $`, $', $+, $1.
618 check_taint_not $&, "\$& from /./";
619
620 "aa" =~ /(.)\B(.)/;
621 check_taint_not $&, "\$& from /(.)\\B(.)/";
622 check_taint_not $`, "\t\$`";
623 check_taint_not $', "\t\$'";
624 check_taint_not $+, "\t\$+";
625 check_taint_not $1, "\t\$1";
626 check_taint_not $2, "\t\$2";
627 check_taint_not $3, "\t\$3";
628
629 /(.)/; # untaint $&, $`, $', $+, $1.
630 check_taint_not $&, "\$& from /./";
631
632 "aaa" =~ /(.).(\1)/i; # notaint because not locale dependent
633 check_taint_not $&, "\$ & from /(.).(\\1)/";
634 check_taint_not $`, "\t\$`";
635 check_taint_not $', "\t\$'";
636 check_taint_not $+, "\t\$+";
637 check_taint_not $1, "\t\$1";
638 check_taint_not $2, "\t\$2";
639 check_taint_not $3, "\t\$3";
640
641 /(.)/; # untaint $&, $`, $', $+, $1.
642 check_taint_not $&, "\$ & from /./";
643
644 $_ = $a; # untaint $_
645
646 check_taint_not $_, 'untainting $_ works';
647
648 /(b)/;
649 check_taint_not $&, "\$ & from /(b)/";
650 check_taint_not $`, "\t\$`";
651 check_taint_not $', "\t\$'";
652 check_taint_not $+, "\t\$+";
653 check_taint_not $1, "\t\$1";
654 check_taint_not $2, "\t\$2";
66cbab2c
KW
655
656 $_ = $a; # untaint $_
657
b6fb4de7
KW
658 check_taint_not $_, 'untainting $_ works';
659
660 s/(.+)/b/;
661 check_taint_not $_, '$_ (wasn\'t tainted) from s/(.+)/b/';
662 check_taint_not $&, "\t\$&";
663 check_taint_not $`, "\t\$`";
664 check_taint_not $', "\t\$'";
665 check_taint_not $+, "\t\$+";
666 check_taint_not $1, "\t\$1";
667 check_taint_not $2, "\t\$2";
66cbab2c
KW
668
669 $b = $a; # untaint $b
670
671 ($b = $a) =~ s/\w/$&/;
b6fb4de7
KW
672 check_taint_not $b, '$b from ($b = $a) =~ s/\w/$&/';
673 check_taint_not $a, '$a from ($b = $a) =~ s/\w/$&/';
66cbab2c
KW
674
675 $_ = $a; # untaint $_
676
b6fb4de7
KW
677 s/(\w)/\l$1/;
678 check_taint_not $_, '$_ (wasn\'t tainted) from s/(\w)/\l$1/,'; # this must taint
679 check_taint_not $&, "\t\$&";
680 check_taint_not $`, "\t\$`";
681 check_taint_not $', "\t\$'";
682 check_taint_not $+, "\t\$+";
683 check_taint_not $1, "\t\$1";
684 check_taint_not $2, "\t\$2";
66cbab2c
KW
685
686 $_ = $a; # untaint $_
687
b6fb4de7
KW
688 s/(\w)/\L$1/;
689 check_taint_not $_, '$_ (wasn\'t tainted) from s/(\w)/\L$1/,';
690 check_taint_not $&, "\t\$&";
691 check_taint_not $`, "\t\$`";
692 check_taint_not $', "\t\$'";
693 check_taint_not $+, "\t\$+";
694 check_taint_not $1, "\t\$1";
695 check_taint_not $2, "\t\$2";
66cbab2c
KW
696
697 $_ = $a; # untaint $_
698
b6fb4de7
KW
699 s/(\w)/\u$1/;
700 check_taint_not $_, '$_ (wasn\'t tainted) from s/(\w)/\u$1/';
701 check_taint_not $&, "\t\$&";
702 check_taint_not $`, "\t\$`";
703 check_taint_not $', "\t\$'";
704 check_taint_not $+, "\t\$+";
705 check_taint_not $1, "\t\$1";
706 check_taint_not $2, "\t\$2";
66cbab2c
KW
707
708 $_ = $a; # untaint $_
709
b6fb4de7
KW
710 s/(\w)/\U$1/;
711 check_taint_not $_, '$_ (wasn\'t tainted) from s/(\w)/\U$1/';
712 check_taint_not $&, "\t\$&";
713 check_taint_not $`, "\t\$`";
714 check_taint_not $', "\t\$'";
715 check_taint_not $+, "\t\$+";
716 check_taint_not $1, "\t\$1";
717 check_taint_not $2, "\t\$2";
66cbab2c
KW
718
719 # After all this tainting $a should be cool.
720
b6fb4de7 721 check_taint_not $a, '$a still not tainted';
7a9d1c02
KW
722
723 "a" =~ /([a-z])/;
724 check_taint_not $1, '"a" =~ /([a-z])/';
725 "foo.bar_baz" =~ /^(.*)[._](.*?)$/; # Bug 120675
726 check_taint_not $1, '"foo.bar_baz" =~ /^(.*)[._](.*?)$/';
b6fb4de7 727
66cbab2c
KW
728}
729
730# Here are in scope of 'use locale'
731
8ebc5c01 732# I think we've seen quite enough of taint.
733# Let us do some *real* locale work now,
284102e8 734# unless setlocale() is missing (i.e. minitest).
8ebc5c01 735
6cf0b567 736# The test number before our first setlocale()
66330f13 737my $final_without_setlocale = $test_num;
6cf0b567 738
284102e8
JH
739# Find locales.
740
46973bb2 741debug "Scanning for locales...\n";
6be75cd7 742
9b0711ee
KW
743require POSIX; import POSIX ':locale_h';
744
3d9e170f 745my @Locale = find_locales([ &POSIX::LC_CTYPE, &POSIX::LC_NUMERIC, &POSIX::LC_ALL ]);
4599a1de 746
46973bb2 747debug "Locales =\n";
887ef7ed 748for ( @Locale ) {
46973bb2 749 debug "$_\n";
887ef7ed 750}
8ebc5c01 751
d369fd5b
KW
752unless (@Locale) {
753 print "1..$test_num\n";
754 exit;
755}
756
d369fd5b
KW
757
758setlocale(&POSIX::LC_ALL, "C");
759
73fc293b
KW
760my %posixes;
761
284102e8 762my %Problem;
2a680da6 763my %Okay;
852b6749 764my %Known_bad_locale; # Failed test for a locale known to be bad
2a680da6 765my %Testing;
30032ef4 766my @Added_alpha; # Alphas that aren't in the C locale.
c08acc4c 767my %test_names;
284102e8 768
705af3af 769sub disp_chars {
019bf7dd
KW
770 # This returns a display string denoting the input parameter @_, each
771 # entry of which is a single character in the range 0-255. The first part
772 # of the output is a string of the characters in @_ that are ASCII
773 # graphics, and hence unambiguously displayable. They are given by code
774 # point order. The second part is the remaining code points, the ordinals
775 # of which are each displayed as 2-digit hex. Blanks are inserted so as
776 # to keep anything from the first part looking like a 2-digit hex number.
777
778 no locale;
779 my @chars = sort { ord $a <=> ord $b } @_;
780 my $output = "";
019bf7dd
KW
781 my $range_start;
782 my $start_class;
783 push @chars, chr(258); # This sentinel simplifies the loop termination
784 # logic
785 foreach my $i (0 .. @chars - 1) {
786 my $char = $chars[$i];
787 my $range_end;
788 my $class;
789
790 # We avoid using [:posix:] classes, as these are being tested in this
791 # file. Each equivalence class below is for things that can appear in
792 # a range; those that can't be in a range have class -1. 0 for those
793 # which should be output in hex; and >0 for the other ranges
794 if ($char =~ /[A-Z]/) {
795 $class = 2;
796 }
797 elsif ($char =~ /[a-z]/) {
798 $class = 3;
799 }
800 elsif ($char =~ /[0-9]/) {
801 $class = 4;
802 }
78fbfaf5
KW
803 # Uncomment to get literal punctuation displayed instead of hex
804 #elsif ($char =~ /[[\]!"#\$\%&\'()*+,.\/:\\;<=>?\@\^_`{|}~-]/) {
805 # $class = -1; # Punct never appears in a range
806 #}
019bf7dd
KW
807 else {
808 $class = 0; # Output in hex
809 }
810
811 if (! defined $range_start) {
812 if ($class < 0) {
78fbfaf5 813 $output .= " " . $char;
019bf7dd
KW
814 }
815 else {
816 $range_start = ord $char;
817 $start_class = $class;
818 }
819 } # A range ends if not consecutive, or the class-type changes
820 elsif (ord $char != ($range_end = ord($chars[$i-1])) + 1
821 || $class != $start_class)
822 {
823
824 # Here, the current character is not in the range. This means the
825 # previous character must have been. Output the range up through
826 # that one.
827 my $range_length = $range_end - $range_start + 1;
828 if ($start_class > 0) {
829 $output .= " " . chr($range_start);
830 $output .= "-" . chr($range_end) if $range_length > 1;
831 }
832 else {
78fbfaf5
KW
833 $output .= sprintf(" %02X", $range_start);
834 $output .= sprintf("-%02X", $range_end) if $range_length > 1;
019bf7dd
KW
835 }
836
837 # Handle the new current character, as potentially beginning a new
838 # range
839 undef $range_start;
840 redo;
841 }
842 }
843
844 $output =~ s/^ //;
78fbfaf5 845 return $output;
019bf7dd
KW
846}
847
010b53a6
KW
848sub disp_str ($) {
849 my $string = shift;
850
851 # Displays the string unambiguously. ASCII printables are always output
852 # as-is, though perhaps separated by blanks from other characters. If
853 # entirely printable ASCII, just returns the string. Otherwise if valid
854 # UTF-8 it uses the character names for non-printable-ASCII. Otherwise it
855 # outputs hex for each non-ASCII-printable byte.
856
857 return $string if $string =~ / ^ [[:print:]]* $/xa;
858
859 my $result = "";
860 my $prev_was_punct = 1; # Beginning is considered punct
861 if (utf8::valid($string) && utf8::is_utf8($string)) {
862 use charnames ();
863 foreach my $char (split "", $string) {
864
865 # Keep punctuation adjacent to other characters; otherwise
866 # separate them with a blank
867 if ($char =~ /[[:punct:]]/a) {
868 $result .= $char;
869 $prev_was_punct = 1;
870 }
871 elsif ($char =~ /[[:print:]]/a) {
872 $result .= " " unless $prev_was_punct;
873 $result .= $char;
874 $prev_was_punct = 0;
875 }
876 else {
877 $result .= " " unless $prev_was_punct;
e82d2a11
KW
878 my $name = charnames::viacode(ord $char);
879 $result .= (defined $name) ? $name : ':unknown:';
010b53a6
KW
880 $prev_was_punct = 0;
881 }
882 }
883 }
884 else {
885 use bytes;
886 foreach my $char (split "", $string) {
887 if ($char =~ /[[:punct:]]/a) {
888 $result .= $char;
889 $prev_was_punct = 1;
890 }
891 elsif ($char =~ /[[:print:]]/a) {
892 $result .= " " unless $prev_was_punct;
893 $result .= $char;
894 $prev_was_punct = 0;
895 }
896 else {
897 $result .= " " unless $prev_was_punct;
898 $result .= sprintf("%02X", ord $char);
899 $prev_was_punct = 0;
900 }
901 }
902 }
903
904 return $result;
905}
906
30032ef4
KW
907sub report_result {
908 my ($Locale, $i, $pass_fail, $message) = @_;
852b6749
KW
909 if ($pass_fail) {
910 push @{$Okay{$i}}, $Locale;
911 }
912 else {
f17bc913
KW
913 $message //= "";
914 $message = " ($message)" if $message;
852b6749
KW
915 $Known_bad_locale{$i}{$Locale} = 1 if exists $known_bad_locales{$^O}
916 && $Locale =~ $known_bad_locales{$^O};
2a680da6 917 $Problem{$i}{$Locale} = 1;
46973bb2 918 debug "failed $i ($test_names{$i}) with locale '$Locale'$message\n";
2a680da6
JH
919 }
920}
921
7c844d17
KW
922sub report_multi_result {
923 my ($Locale, $i, $results_ref) = @_;
924
925 # $results_ref points to an array, each element of which is a character that was
926 # in error for this test numbered '$i'. If empty, the test passed
927
928 my $message = "";
929 if (@$results_ref) {
705af3af 930 $message = join " ", "for", disp_chars(@$results_ref);
7c844d17
KW
931 }
932 report_result($Locale, $i, @$results_ref == 0, $message);
933}
934
c4093d7d
KW
935my $first_locales_test_number = $final_without_setlocale + 1;
936my $locales_test_number;
937my $not_necessarily_a_problem_test_number;
6d5d702a 938my $first_casing_test_number;
c4093d7d
KW
939my %setlocale_failed; # List of locales that setlocale() didn't work on
940
73fc293b 941foreach my $Locale (@Locale) {
c4093d7d 942 $locales_test_number = $first_locales_test_number - 1;
46973bb2
KW
943 debug "\n";
944 debug "Locale = $Locale\n";
284102e8 945
a810e350 946 unless (setlocale(&POSIX::LC_ALL, $Locale)) {
c4093d7d 947 $setlocale_failed{$Locale} = $Locale;
284102e8 948 next;
8ebc5c01 949 }
8ebc5c01 950
31f05a37
KW
951 # We test UTF-8 locales only under ':not_characters'; It is easier to
952 # test them in other test files than here. Non- UTF-8 locales are tested
953 # only under plain 'use locale', as otherwise we would have to convert
954 # everything in them to Unicode.
66cbab2c 955
95eaa1bf
KW
956 my %UPPER = (); # All alpha X for which uc(X) == X and lc(X) != X
957 my %lower = (); # All alpha X for which lc(X) == X and uc(X) != X
958 my %BoThCaSe = (); # All alpha X for which uc(X) == lc(X) == X
66cbab2c 959
ab8b8bcc 960 my $is_utf8_locale = is_locale_utf8($Locale);
b9df08e4 961
46973bb2 962 debug "is utf8 locale? = $is_utf8_locale\n";
b9df08e4 963
68886934 964 debug "radix = " . disp_str(localeconv()->{decimal_point}) . "\n";
b9df08e4 965
66cbab2c
KW
966 if (! $is_utf8_locale) {
967 use locale;
fb844330
KW
968 @{$posixes{'word'}} = grep /\w/, map { chr } 0..255;
969 @{$posixes{'digit'}} = grep /\d/, map { chr } 0..255;
970 @{$posixes{'space'}} = grep /\s/, map { chr } 0..255;
971 @{$posixes{'alpha'}} = grep /[[:alpha:]]/, map {chr } 0..255;
972 @{$posixes{'alnum'}} = grep /[[:alnum:]]/, map {chr } 0..255;
973 @{$posixes{'ascii'}} = grep /[[:ascii:]]/, map {chr } 0..255;
974 @{$posixes{'blank'}} = grep /[[:blank:]]/, map {chr } 0..255;
975 @{$posixes{'cntrl'}} = grep /[[:cntrl:]]/, map {chr } 0..255;
976 @{$posixes{'graph'}} = grep /[[:graph:]]/, map {chr } 0..255;
977 @{$posixes{'lower'}} = grep /[[:lower:]]/, map {chr } 0..255;
978 @{$posixes{'print'}} = grep /[[:print:]]/, map {chr } 0..255;
98ed56e7 979 @{$posixes{'punct'}} = grep /[[:punct:]]/, map {chr } 0..255;
fb844330
KW
980 @{$posixes{'upper'}} = grep /[[:upper:]]/, map {chr } 0..255;
981 @{$posixes{'xdigit'}} = grep /[[:xdigit:]]/, map {chr } 0..255;
389cb6df 982 @{$posixes{'cased'}} = grep /[[:upper:][:lower:]]/i, map {chr } 0..255;
e5272a46 983
71e5cbb3
KW
984 # Sieve the uppercase and the lowercase.
985
fb844330 986 for (@{$posixes{'word'}}) {
71e5cbb3
KW
987 if (/[^\d_]/) { # skip digits and the _
988 if (uc($_) eq $_) {
989 $UPPER{$_} = $_;
990 }
991 if (lc($_) eq $_) {
992 $lower{$_} = $_;
993 }
994 }
995 }
66cbab2c
KW
996 }
997 else {
998 use locale ':not_characters';
fb844330
KW
999 @{$posixes{'word'}} = grep /\w/, map { chr } 0..255;
1000 @{$posixes{'digit'}} = grep /\d/, map { chr } 0..255;
1001 @{$posixes{'space'}} = grep /\s/, map { chr } 0..255;
1002 @{$posixes{'alpha'}} = grep /[[:alpha:]]/, map {chr } 0..255;
1003 @{$posixes{'alnum'}} = grep /[[:alnum:]]/, map {chr } 0..255;
1004 @{$posixes{'ascii'}} = grep /[[:ascii:]]/, map {chr } 0..255;
1005 @{$posixes{'blank'}} = grep /[[:blank:]]/, map {chr } 0..255;
1006 @{$posixes{'cntrl'}} = grep /[[:cntrl:]]/, map {chr } 0..255;
1007 @{$posixes{'graph'}} = grep /[[:graph:]]/, map {chr } 0..255;
1008 @{$posixes{'lower'}} = grep /[[:lower:]]/, map {chr } 0..255;
1009 @{$posixes{'print'}} = grep /[[:print:]]/, map {chr } 0..255;
98ed56e7 1010 @{$posixes{'punct'}} = grep /[[:punct:]]/, map {chr } 0..255;
fb844330
KW
1011 @{$posixes{'upper'}} = grep /[[:upper:]]/, map {chr } 0..255;
1012 @{$posixes{'xdigit'}} = grep /[[:xdigit:]]/, map {chr } 0..255;
389cb6df 1013 @{$posixes{'cased'}} = grep /[[:upper:][:lower:]]/i, map {chr } 0..255;
fb844330 1014 for (@{$posixes{'word'}}) {
66cbab2c
KW
1015 if (/[^\d_]/) { # skip digits and the _
1016 if (uc($_) eq $_) {
1017 $UPPER{$_} = $_;
1018 }
1019 if (lc($_) eq $_) {
1020 $lower{$_} = $_;
1021 }
1022 }
1023 }
1024 }
a160ac48 1025
fb844330
KW
1026 # Ordered, where possible, in groups of "this is a subset of the next
1027 # one"
46973bb2
KW
1028 debug ":upper: = ", disp_chars(@{$posixes{'upper'}}), "\n";
1029 debug ":lower: = ", disp_chars(@{$posixes{'lower'}}), "\n";
1030 debug ":cased: = ", disp_chars(@{$posixes{'cased'}}), "\n";
1031 debug ":alpha: = ", disp_chars(@{$posixes{'alpha'}}), "\n";
1032 debug ":alnum: = ", disp_chars(@{$posixes{'alnum'}}), "\n";
8eb6215e 1033 debug ' \w = ', disp_chars(@{$posixes{'word'}}), "\n";
46973bb2
KW
1034 debug ":graph: = ", disp_chars(@{$posixes{'graph'}}), "\n";
1035 debug ":print: = ", disp_chars(@{$posixes{'print'}}), "\n";
8eb6215e 1036 debug ' \d = ', disp_chars(@{$posixes{'digit'}}), "\n";
46973bb2
KW
1037 debug ":xdigit: = ", disp_chars(@{$posixes{'xdigit'}}), "\n";
1038 debug ":blank: = ", disp_chars(@{$posixes{'blank'}}), "\n";
8eb6215e 1039 debug ' \s = ', disp_chars(@{$posixes{'space'}}), "\n";
46973bb2
KW
1040 debug ":punct: = ", disp_chars(@{$posixes{'punct'}}), "\n";
1041 debug ":cntrl: = ", disp_chars(@{$posixes{'cntrl'}}), "\n";
1042 debug ":ascii: = ", disp_chars(@{$posixes{'ascii'}}), "\n";
a160ac48 1043
284102e8 1044 foreach (keys %UPPER) {
a160ac48 1045
097ee67d 1046 $BoThCaSe{$_}++ if exists $lower{$_};
284102e8
JH
1047 }
1048 foreach (keys %lower) {
097ee67d 1049 $BoThCaSe{$_}++ if exists $UPPER{$_};
284102e8 1050 }
097ee67d 1051 foreach (keys %BoThCaSe) {
284102e8
JH
1052 delete $UPPER{$_};
1053 delete $lower{$_};
1054 }
1055
e5cc0528
KW
1056 my %Unassigned;
1057 foreach my $ord ( 0 .. 255 ) {
1058 $Unassigned{chr $ord} = 1;
1059 }
1060 foreach my $class (keys %posixes) {
1061 foreach my $char (@{$posixes{$class}}) {
1062 delete $Unassigned{$char};
1063 }
1064 }
1065
46973bb2
KW
1066 debug "UPPER = ", disp_chars(sort { ord $a <=> ord $b } keys %UPPER), "\n";
1067 debug "lower = ", disp_chars(sort { ord $a <=> ord $b } keys %lower), "\n";
1068 debug "BoThCaSe = ", disp_chars(sort { ord $a <=> ord $b } keys %BoThCaSe), "\n";
1069 debug "Unassigned = ", disp_chars(sort { ord $a <=> ord $b } keys %Unassigned), "\n";
284102e8 1070
baa71cfd 1071 my @failures;
3da38613 1072 my @fold_failures;
b9df08e4 1073 foreach my $x (sort { ord $a <=> ord $b } keys %UPPER) {
baa71cfd 1074 my $ok;
3da38613 1075 my $fold_ok;
baa71cfd
KW
1076 if ($is_utf8_locale) {
1077 use locale ':not_characters';
1078 $ok = $x =~ /[[:upper:]]/;
3da38613 1079 $fold_ok = $x =~ /[[:lower:]]/i;
baa71cfd
KW
1080 }
1081 else {
1082 use locale;
1083 $ok = $x =~ /[[:upper:]]/;
3da38613 1084 $fold_ok = $x =~ /[[:lower:]]/i;
baa71cfd
KW
1085 }
1086 push @failures, $x unless $ok;
3da38613 1087 push @fold_failures, $x unless $fold_ok;
baa71cfd 1088 }
baa71cfd 1089 $locales_test_number++;
6d5d702a 1090 $first_casing_test_number = $locales_test_number;
95eaa1bf 1091 $test_names{$locales_test_number} = 'Verify that /[[:upper:]]/ matches all alpha X for which uc(X) == X and lc(X) != X';
7c844d17 1092 report_multi_result($Locale, $locales_test_number, \@failures);
6d5d702a 1093
3da38613 1094 $locales_test_number++;
6d5d702a 1095
95eaa1bf 1096 $test_names{$locales_test_number} = 'Verify that /[[:lower:]]/i matches all alpha X for which uc(X) == X and lc(X) != X';
7c844d17 1097 report_multi_result($Locale, $locales_test_number, \@fold_failures);
baa71cfd 1098
baa71cfd 1099 undef @failures;
3da38613 1100 undef @fold_failures;
baa71cfd 1101
b9df08e4 1102 foreach my $x (sort { ord $a <=> ord $b } keys %lower) {
baa71cfd 1103 my $ok;
3da38613 1104 my $fold_ok;
baa71cfd
KW
1105 if ($is_utf8_locale) {
1106 use locale ':not_characters';
1107 $ok = $x =~ /[[:lower:]]/;
3da38613 1108 $fold_ok = $x =~ /[[:upper:]]/i;
baa71cfd
KW
1109 }
1110 else {
1111 use locale;
1112 $ok = $x =~ /[[:lower:]]/;
3da38613 1113 $fold_ok = $x =~ /[[:upper:]]/i;
baa71cfd
KW
1114 }
1115 push @failures, $x unless $ok;
3da38613 1116 push @fold_failures, $x unless $fold_ok;
baa71cfd
KW
1117 }
1118
1119 $locales_test_number++;
95eaa1bf 1120 $test_names{$locales_test_number} = 'Verify that /[[:lower:]]/ matches all alpha X for which lc(X) == X and uc(X) != X';
7c844d17
KW
1121 report_multi_result($Locale, $locales_test_number, \@failures);
1122
3da38613 1123 $locales_test_number++;
95eaa1bf 1124 $test_names{$locales_test_number} = 'Verify that /[[:upper:]]/i matches all alpha X for which lc(X) == X and uc(X) != X';
7c844d17 1125 report_multi_result($Locale, $locales_test_number, \@fold_failures);
baa71cfd 1126
9445c837
KW
1127 { # Find the alphabetic characters that are not considered alphabetics
1128 # in the default (C) locale.
8ebc5c01 1129
284102e8 1130 no locale;
71e5cbb3 1131
30032ef4 1132 @Added_alpha = ();
5e7a1028 1133 for (keys %UPPER, keys %lower, keys %BoThCaSe) {
30032ef4 1134 push(@Added_alpha, $_) if (/\W/);
284102e8 1135 }
8ebc5c01 1136 }
8ebc5c01 1137
b9df08e4 1138 @Added_alpha = sort { ord $a <=> ord $b } @Added_alpha;
8ebc5c01 1139
46973bb2 1140 debug "Added_alpha = ", disp_chars(@Added_alpha), "\n";
8ebc5c01 1141
db31898d 1142 # Cross-check the whole 8-bit character set.
8ebc5c01 1143
db31898d
KW
1144 ++$locales_test_number;
1145 my @f;
1146 $test_names{$locales_test_number} = 'Verify that \w and [:word:] are identical';
1147 for (map { chr } 0..255) {
1148 if ($is_utf8_locale) {
1149 use locale ':not_characters';
1150 push @f, $_ unless /[[:word:]]/ == /\w/;
1151 }
1152 else {
1153 push @f, $_ unless /[[:word:]]/ == /\w/;
1154 }
1155 }
1156 report_multi_result($Locale, $locales_test_number, \@f);
8ebc5c01 1157
db31898d
KW
1158 ++$locales_test_number;
1159 undef @f;
1160 $test_names{$locales_test_number} = 'Verify that \d and [:digit:] are identical';
1161 for (map { chr } 0..255) {
1162 if ($is_utf8_locale) {
1163 use locale ':not_characters';
1164 push @f, $_ unless /[[:digit:]]/ == /\d/;
1165 }
1166 else {
1167 push @f, $_ unless /[[:digit:]]/ == /\d/;
1168 }
1169 }
1170 report_multi_result($Locale, $locales_test_number, \@f);
32e8aa3f 1171
db31898d
KW
1172 ++$locales_test_number;
1173 undef @f;
1174 $test_names{$locales_test_number} = 'Verify that \s and [:space:] are identical';
1175 for (map { chr } 0..255) {
1176 if ($is_utf8_locale) {
1177 use locale ':not_characters';
1178 push @f, $_ unless /[[:space:]]/ == /\s/;
1179 }
1180 else {
1181 push @f, $_ unless /[[:space:]]/ == /\s/;
1182 }
1183 }
1184 report_multi_result($Locale, $locales_test_number, \@f);
32e8aa3f 1185
db31898d
KW
1186 ++$locales_test_number;
1187 undef @f;
1188 $test_names{$locales_test_number} = 'Verify that [:posix:] and [:^posix:] are mutually exclusive';
1189 for (map { chr } 0..255) {
1190 if ($is_utf8_locale) {
1191 use locale ':not_characters';
1192 push @f, $_ unless (/[[:alpha:]]/ xor /[[:^alpha:]]/) ||
1193 (/[[:alnum:]]/ xor /[[:^alnum:]]/) ||
1194 (/[[:ascii:]]/ xor /[[:^ascii:]]/) ||
1195 (/[[:blank:]]/ xor /[[:^blank:]]/) ||
1196 (/[[:cntrl:]]/ xor /[[:^cntrl:]]/) ||
1197 (/[[:digit:]]/ xor /[[:^digit:]]/) ||
1198 (/[[:graph:]]/ xor /[[:^graph:]]/) ||
1199 (/[[:lower:]]/ xor /[[:^lower:]]/) ||
1200 (/[[:print:]]/ xor /[[:^print:]]/) ||
1201 (/[[:space:]]/ xor /[[:^space:]]/) ||
1202 (/[[:upper:]]/ xor /[[:^upper:]]/) ||
1203 (/[[:word:]]/ xor /[[:^word:]]/) ||
1204 (/[[:xdigit:]]/ xor /[[:^xdigit:]]/) ||
1205
1206 # effectively is what [:cased:] would be if it existed.
389cb6df 1207 (/[[:upper:][:lower:]]/i xor /[^[:upper:][:lower:]]/i);
db31898d
KW
1208 }
1209 else {
1210 push @f, $_ unless (/[[:alpha:]]/ xor /[[:^alpha:]]/) ||
1211 (/[[:alnum:]]/ xor /[[:^alnum:]]/) ||
1212 (/[[:ascii:]]/ xor /[[:^ascii:]]/) ||
1213 (/[[:blank:]]/ xor /[[:^blank:]]/) ||
1214 (/[[:cntrl:]]/ xor /[[:^cntrl:]]/) ||
1215 (/[[:digit:]]/ xor /[[:^digit:]]/) ||
1216 (/[[:graph:]]/ xor /[[:^graph:]]/) ||
1217 (/[[:lower:]]/ xor /[[:^lower:]]/) ||
1218 (/[[:print:]]/ xor /[[:^print:]]/) ||
1219 (/[[:space:]]/ xor /[[:^space:]]/) ||
1220 (/[[:upper:]]/ xor /[[:^upper:]]/) ||
1221 (/[[:word:]]/ xor /[[:^word:]]/) ||
1222 (/[[:xdigit:]]/ xor /[[:^xdigit:]]/) ||
389cb6df 1223 (/[[:upper:][:lower:]]/i xor /[^[:upper:][:lower:]]/i);
db31898d
KW
1224 }
1225 }
1226 report_multi_result($Locale, $locales_test_number, \@f);
32e8aa3f 1227
db31898d
KW
1228 # The rules for the relationships are given in:
1229 # http://www.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap07.html
32e8aa3f 1230
660c7bcc
KW
1231
1232 ++$locales_test_number;
1233 undef @f;
1234 $test_names{$locales_test_number} = 'Verify that [:lower:] contains at least a-z';
1235 for ('a' .. 'z') {
1236 if ($is_utf8_locale) {
1237 use locale ':not_characters';
1238 push @f, $_ unless /[[:lower:]]/;
1239 }
1240 else {
1241 push @f, $_ unless /[[:lower:]]/;
1242 }
1243 }
1244 report_multi_result($Locale, $locales_test_number, \@f);
1245
db31898d
KW
1246 ++$locales_test_number;
1247 undef @f;
1248 $test_names{$locales_test_number} = 'Verify that [:lower:] is a subset of [:alpha:]';
1249 for (map { chr } 0..255) {
1250 if ($is_utf8_locale) {
1251 use locale ':not_characters';
1252 push @f, $_ if /[[:lower:]]/ and ! /[[:alpha:]]/;
1253 }
1254 else {
1255 push @f, $_ if /[[:lower:]]/ and ! /[[:alpha:]]/;
1256 }
1257 }
1258 report_multi_result($Locale, $locales_test_number, \@f);
32e8aa3f 1259
db31898d
KW
1260 ++$locales_test_number;
1261 undef @f;
660c7bcc
KW
1262 $test_names{$locales_test_number} = 'Verify that [:upper:] contains at least A-Z';
1263 for ('A' .. 'Z') {
1264 if ($is_utf8_locale) {
1265 use locale ':not_characters';
1266 push @f, $_ unless /[[:upper:]]/;
1267 }
1268 else {
1269 push @f, $_ unless /[[:upper:]]/;
1270 }
1271 }
1272 report_multi_result($Locale, $locales_test_number, \@f);
1273
1274 ++$locales_test_number;
1275 undef @f;
db31898d
KW
1276 $test_names{$locales_test_number} = 'Verify that [:upper:] is a subset of [:alpha:]';
1277 for (map { chr } 0..255) {
1278 if ($is_utf8_locale) {
1279 use locale ':not_characters';
1280 push @f, $_ if /[[:upper:]]/ and ! /[[:alpha:]]/;
1281 }
1282 else {
1283 push @f, $_ if /[[:upper:]]/ and ! /[[:alpha:]]/;
1284 }
1285 }
1286 report_multi_result($Locale, $locales_test_number, \@f);
32e8aa3f 1287
db31898d
KW
1288 ++$locales_test_number;
1289 undef @f;
1290 $test_names{$locales_test_number} = 'Verify that /[[:lower:]]/i is a subset of [:alpha:]';
1291 for (map { chr } 0..255) {
1292 if ($is_utf8_locale) {
1293 use locale ':not_characters';
1294 push @f, $_ if /[[:lower:]]/i and ! /[[:alpha:]]/;
1295 }
1296 else {
1297 push @f, $_ if /[[:lower:]]/i and ! /[[:alpha:]]/;
1298 }
1299 }
1300 report_multi_result($Locale, $locales_test_number, \@f);
32e8aa3f 1301
db31898d
KW
1302 ++$locales_test_number;
1303 undef @f;
1304 $test_names{$locales_test_number} = 'Verify that [:alpha:] is a subset of [:alnum:]';
1305 for (map { chr } 0..255) {
1306 if ($is_utf8_locale) {
1307 use locale ':not_characters';
1308 push @f, $_ if /[[:alpha:]]/ and ! /[[:alnum:]]/;
1309 }
1310 else {
1311 push @f, $_ if /[[:alpha:]]/ and ! /[[:alnum:]]/;
1312 }
1313 }
1314 report_multi_result($Locale, $locales_test_number, \@f);
32e8aa3f 1315
db31898d
KW
1316 ++$locales_test_number;
1317 undef @f;
660c7bcc
KW
1318 $test_names{$locales_test_number} = 'Verify that [:digit:] contains at least 0-9';
1319 for ('0' .. '9') {
1320 if ($is_utf8_locale) {
1321 use locale ':not_characters';
1322 push @f, $_ unless /[[:digit:]]/;
1323 }
1324 else {
1325 push @f, $_ unless /[[:digit:]]/;
1326 }
1327 }
1328 report_multi_result($Locale, $locales_test_number, \@f);
1329
1330 ++$locales_test_number;
1331 undef @f;
db31898d
KW
1332 $test_names{$locales_test_number} = 'Verify that [:digit:] is a subset of [:alnum:]';
1333 for (map { chr } 0..255) {
1334 if ($is_utf8_locale) {
1335 use locale ':not_characters';
1336 push @f, $_ if /[[:digit:]]/ and ! /[[:alnum:]]/;
1337 }
1338 else {
1339 push @f, $_ if /[[:digit:]]/ and ! /[[:alnum:]]/;
1340 }
1341 }
1342 report_multi_result($Locale, $locales_test_number, \@f);
32e8aa3f 1343
db31898d
KW
1344 ++$locales_test_number;
1345 undef @f;
aac995d2 1346 $test_names{$locales_test_number} = 'Verify that [:digit:] matches either 10 or 20 code points';
fb844330 1347 report_result($Locale, $locales_test_number, @{$posixes{'digit'}} == 10 || @{$posixes{'digit'}} == 20);
aac995d2
KW
1348
1349 ++$locales_test_number;
1350 undef @f;
660c7bcc
KW
1351 $test_names{$locales_test_number} = 'Verify that if there is a second set of digits in [:digit:], they are consecutive';
1352 if (@{$posixes{'digit'}} == 20) {
1353 my $previous_ord;
fc81f5f2 1354 for (map { chr } 0..255) {
660c7bcc
KW
1355 next unless /[[:digit:]]/;
1356 next if /[0-9]/;
1357 if (defined $previous_ord) {
1358 if ($is_utf8_locale) {
1359 use locale ':not_characters';
1360 push @f, $_ if ord $_ != $previous_ord + 1;
1361 }
1362 else {
1363 push @f, $_ if ord $_ != $previous_ord + 1;
1364 }
fc81f5f2 1365 }
660c7bcc 1366 $previous_ord = ord $_;
db31898d
KW
1367 }
1368 }
1369 report_multi_result($Locale, $locales_test_number, \@f);
32e8aa3f 1370
db31898d
KW
1371 ++$locales_test_number;
1372 undef @f;
d458c02c
KW
1373 my @xdigit_digits; # :digit: & :xdigit:
1374 $test_names{$locales_test_number} = 'Verify that [:xdigit:] contains one or two blocks of 10 consecutive [:digit:] chars';
db31898d
KW
1375 for (map { chr } 0..255) {
1376 if ($is_utf8_locale) {
1377 use locale ':not_characters';
d458c02c
KW
1378 # For utf8 locales, we actually use a stricter test: that :digit:
1379 # is a subset of :xdigit:, as we know that only 0-9 should match
1380 push @f, $_ if /[[:digit:]]/ and ! /[[:xdigit:]]/;
db31898d
KW
1381 }
1382 else {
d458c02c 1383 push @xdigit_digits, $_ if /[[:digit:]]/ and /[[:xdigit:]]/;
db31898d
KW
1384 }
1385 }
d458c02c
KW
1386 if (! $is_utf8_locale) {
1387
1388 # For non-utf8 locales, @xdigit_digits is a list of the characters
1389 # that are both :xdigit: and :digit:. Because :digit: is stored in
1390 # increasing code point order (unless the tests above failed),
1391 # @xdigit_digits is as well. There should be exactly 10 or
1392 # 20 of these.
1393 if (@xdigit_digits != 10 && @xdigit_digits != 20) {
1394 @f = @xdigit_digits;
1395 }
1396 else {
1397
1398 # Look for contiguity in the series, adding any wrong ones to @f
1399 my @temp = @xdigit_digits;
1400 while (@temp > 1) {
1401 push @f, $temp[1] if ($temp[0] != $temp[1] - 1)
1402
1403 # Skip this test for the 0th character of
1404 # the second block of 10, as it won't be
1405 # contiguous with the previous block
1406 && (! defined $xdigit_digits[10]
1407 || $temp[1] != $xdigit_digits[10]);
1408 shift @temp;
1409 }
1410 }
1411 }
1412
db31898d 1413 report_multi_result($Locale, $locales_test_number, \@f);
32e8aa3f 1414
660c7bcc
KW
1415 ++$locales_test_number;
1416 undef @f;
1417 $test_names{$locales_test_number} = 'Verify that [:xdigit:] contains at least A-F, a-f';
1418 for ('A' .. 'F', 'a' .. 'f') {
1419 if ($is_utf8_locale) {
1420 use locale ':not_characters';
1421 push @f, $_ unless /[[:xdigit:]]/;
1422 }
1423 else {
1424 push @f, $_ unless /[[:xdigit:]]/;
1425 }
1426 }
1427 report_multi_result($Locale, $locales_test_number, \@f);
1428
1429 ++$locales_test_number;
1430 undef @f;
1431 $test_names{$locales_test_number} = 'Verify that any additional members of [:xdigit:], are in groups of 6 consecutive code points';
1432 my $previous_ord;
1433 my $count = 0;
d458c02c
KW
1434 for my $chr (map { chr } 0..255) {
1435 next unless $chr =~ /[[:xdigit:]]/;
1436 if ($is_utf8_locale) {
1437 next if $chr =~ /[[:digit:]]/;
1438 }
1439 else {
1440 next if grep { $chr eq $_ } @xdigit_digits;
1441 }
1442 next if $chr =~ /[A-Fa-f]/;
660c7bcc
KW
1443 if (defined $previous_ord) {
1444 if ($is_utf8_locale) {
1445 use locale ':not_characters';
d458c02c 1446 push @f, $chr if ord $chr != $previous_ord + 1;
660c7bcc
KW
1447 }
1448 else {
d458c02c 1449 push @f, $chr if ord $chr != $previous_ord + 1;
660c7bcc
KW
1450 }
1451 }
1452 $count++;
1453 if ($count == 6) {
1454 undef $previous_ord;
1455 }
1456 else {
d458c02c 1457 $previous_ord = ord $chr;
660c7bcc
KW
1458 }
1459 }
1460 report_multi_result($Locale, $locales_test_number, \@f);
32e8aa3f 1461
db31898d
KW
1462 ++$locales_test_number;
1463 undef @f;
1464 $test_names{$locales_test_number} = 'Verify that [:xdigit:] is a subset of [:graph:]';
1465 for (map { chr } 0..255) {
1466 if ($is_utf8_locale) {
1467 use locale ':not_characters';
1468 push @f, $_ if /[[:xdigit:]]/ and ! /[[:graph:]]/;
1469 }
1470 else {
1471 push @f, $_ if /[[:xdigit:]]/ and ! /[[:graph:]]/;
1472 }
1473 }
1474 report_multi_result($Locale, $locales_test_number, \@f);
32e8aa3f 1475
660c7bcc
KW
1476 # Note that xdigit doesn't have to be a subset of alnum
1477
db31898d
KW
1478 ++$locales_test_number;
1479 undef @f;
1480 $test_names{$locales_test_number} = 'Verify that [:punct:] is a subset of [:graph:]';
1481 for (map { chr } 0..255) {
1482 if ($is_utf8_locale) {
1483 use locale ':not_characters';
1484 push @f, $_ if /[[:punct:]]/ and ! /[[:graph:]]/;
1485 }
1486 else {
1487 push @f, $_ if /[[:punct:]]/ and ! /[[:graph:]]/;
1488 }
1489 }
1490 report_multi_result($Locale, $locales_test_number, \@f);
32e8aa3f 1491
db31898d
KW
1492 ++$locales_test_number;
1493 undef @f;
660c7bcc
KW
1494 $test_names{$locales_test_number} = 'Verify that the space character is not in [:graph:]';
1495 if ($is_utf8_locale) {
1496 use locale ':not_characters';
1497 push @f, " " if " " =~ /[[:graph:]]/;
1498 }
1499 else {
1500 push @f, " " if " " =~ /[[:graph:]]/;
1501 }
1502 report_multi_result($Locale, $locales_test_number, \@f);
1503
1504 ++$locales_test_number;
1505 undef @f;
1506 $test_names{$locales_test_number} = 'Verify that [:space:] contains at least [\f\n\r\t\cK ]';
1507 for (' ', "\f", "\n", "\r", "\t", "\cK") {
1508 if ($is_utf8_locale) {
1509 use locale ':not_characters';
1510 push @f, $_ unless /[[:space:]]/;
1511 }
1512 else {
1513 push @f, $_ unless /[[:space:]]/;
1514 }
1515 }
1516 report_multi_result($Locale, $locales_test_number, \@f);
1517
1518 ++$locales_test_number;
1519 undef @f;
1520 $test_names{$locales_test_number} = 'Verify that [:blank:] contains at least [\t ]';
1521 for (' ', "\t") {
1522 if ($is_utf8_locale) {
1523 use locale ':not_characters';
1524 push @f, $_ unless /[[:blank:]]/;
1525 }
1526 else {
1527 push @f, $_ unless /[[:blank:]]/;
1528 }
1529 }
1530 report_multi_result($Locale, $locales_test_number, \@f);
1531
1532 ++$locales_test_number;
1533 undef @f;
db31898d
KW
1534 $test_names{$locales_test_number} = 'Verify that [:blank:] is a subset of [:space:]';
1535 for (map { chr } 0..255) {
1536 if ($is_utf8_locale) {
1537 use locale ':not_characters';
1538 push @f, $_ if /[[:blank:]]/ and ! /[[:space:]]/;
1539 }
1540 else {
1541 push @f, $_ if /[[:blank:]]/ and ! /[[:space:]]/;
1542 }
1543 }
1544 report_multi_result($Locale, $locales_test_number, \@f);
32e8aa3f 1545
db31898d
KW
1546 ++$locales_test_number;
1547 undef @f;
1548 $test_names{$locales_test_number} = 'Verify that [:graph:] is a subset of [:print:]';
1549 for (map { chr } 0..255) {
1550 if ($is_utf8_locale) {
1551 use locale ':not_characters';
1552 push @f, $_ if /[[:graph:]]/ and ! /[[:print:]]/;
1553 }
1554 else {
1555 push @f, $_ if /[[:graph:]]/ and ! /[[:print:]]/;
1556 }
1557 }
1558 report_multi_result($Locale, $locales_test_number, \@f);
32e8aa3f 1559
db31898d
KW
1560 ++$locales_test_number;
1561 undef @f;
660c7bcc
KW
1562 $test_names{$locales_test_number} = 'Verify that the space character is in [:print:]';
1563 if ($is_utf8_locale) {
1564 use locale ':not_characters';
1565 push @f, " " if " " !~ /[[:print:]]/;
1566 }
1567 else {
1568 push @f, " " if " " !~ /[[:print:]]/;
1569 }
1570 report_multi_result($Locale, $locales_test_number, \@f);
1571
1572 ++$locales_test_number;
1573 undef @f;
db31898d
KW
1574 $test_names{$locales_test_number} = 'Verify that isn\'t both [:cntrl:] and [:print:]';
1575 for (map { chr } 0..255) {
1576 if ($is_utf8_locale) {
1577 use locale ':not_characters';
1578 push @f, $_ if (/[[:print:]]/ and /[[:cntrl:]]/);
1579 }
1580 else {
1581 push @f, $_ if (/[[:print:]]/ and /[[:cntrl:]]/);
1582 }
1583 }
1584 report_multi_result($Locale, $locales_test_number, \@f);
32e8aa3f 1585
db31898d
KW
1586 ++$locales_test_number;
1587 undef @f;
660c7bcc
KW
1588 $test_names{$locales_test_number} = 'Verify that isn\'t both [:alpha:] and [:digit:]';
1589 for (map { chr } 0..255) {
1590 if ($is_utf8_locale) {
1591 use locale ':not_characters';
1592 push @f, $_ if /[[:alpha:]]/ and /[[:digit:]]/;
1593 }
1594 else {
1595 push @f, $_ if /[[:alpha:]]/ and /[[:digit:]]/;
1596 }
1597 }
1598 report_multi_result($Locale, $locales_test_number, \@f);
1599
1600 ++$locales_test_number;
1601 undef @f;
db31898d
KW
1602 $test_names{$locales_test_number} = 'Verify that isn\'t both [:alnum:] and [:punct:]';
1603 for (map { chr } 0..255) {
1604 if ($is_utf8_locale) {
1605 use locale ':not_characters';
1606 push @f, $_ if /[[:alnum:]]/ and /[[:punct:]]/;
1607 }
1608 else {
1609 push @f, $_ if /[[:alnum:]]/ and /[[:punct:]]/;
1610 }
1611 }
1612 report_multi_result($Locale, $locales_test_number, \@f);
32e8aa3f 1613
db31898d
KW
1614 ++$locales_test_number;
1615 undef @f;
1616 $test_names{$locales_test_number} = 'Verify that isn\'t both [:xdigit:] and [:punct:]';
1617 for (map { chr } 0..255) {
1618 if ($is_utf8_locale) {
1619 use locale ':not_characters';
1620 push @f, $_ if (/[[:punct:]]/ and /[[:xdigit:]]/);
1621 }
1622 else {
1623 push @f, $_ if (/[[:punct:]]/ and /[[:xdigit:]]/);
1624 }
1625 }
1626 report_multi_result($Locale, $locales_test_number, \@f);
32e8aa3f 1627
db31898d
KW
1628 ++$locales_test_number;
1629 undef @f;
1630 $test_names{$locales_test_number} = 'Verify that isn\'t both [:graph:] and [:space:]';
1631 for (map { chr } 0..255) {
1632 if ($is_utf8_locale) {
1633 use locale ':not_characters';
1634 push @f, $_ if (/[[:graph:]]/ and /[[:space:]]/);
1635 }
1636 else {
1637 push @f, $_ if (/[[:graph:]]/ and /[[:space:]]/);
1638 }
1639 }
1640 report_multi_result($Locale, $locales_test_number, \@f);
32e8aa3f 1641
7493b8f2 1642 foreach ($first_casing_test_number..$locales_test_number) {
5b1f18a7 1643 $problematical_tests{$_} = 1;
7493b8f2
KW
1644 }
1645
32e8aa3f 1646
db31898d
KW
1647 # Test for read-only scalars' locale vs non-locale comparisons.
1648
1649 {
1650 no locale;
1651 my $ok;
1652 $a = "qwerty";
1653 if ($is_utf8_locale) {
1654 use locale ':not_characters';
1655 $ok = ($a cmp "qwerty") == 0;
1656 }
1657 else {
1658 use locale;
1659 $ok = ($a cmp "qwerty") == 0;
1660 }
1661 report_result($Locale, ++$locales_test_number, $ok);
1662 $test_names{$locales_test_number} = 'Verify that cmp works with a read-only scalar; no- vs locale';
1663 }
8ebc5c01 1664
db31898d
KW
1665 {
1666 my ($from, $to, $lesser, $greater,
1667 @test, %test, $test, $yes, $no, $sign);
284102e8 1668
db31898d
KW
1669 ++$locales_test_number;
1670 $test_names{$locales_test_number} = 'Verify that "le", "ne", etc work';
1671 $not_necessarily_a_problem_test_number = $locales_test_number;
1672 for (0..9) {
1673 # Select a slice.
fb844330
KW
1674 $from = int(($_*@{$posixes{'word'}})/10);
1675 $to = $from + int(@{$posixes{'word'}}/10);
1676 $to = $#{$posixes{'word'}} if ($to > $#{$posixes{'word'}});
1677 $lesser = join('', @{$posixes{'word'}}[$from..$to]);
db31898d
KW
1678 # Select a slice one character on.
1679 $from++; $to++;
fb844330
KW
1680 $to = $#{$posixes{'word'}} if ($to > $#{$posixes{'word'}});
1681 $greater = join('', @{$posixes{'word'}}[$from..$to]);
66cbab2c
KW
1682 if ($is_utf8_locale) {
1683 use locale ':not_characters';
db31898d
KW
1684 ($yes, $no, $sign) = ($lesser lt $greater
1685 ? (" ", "not ", 1)
1686 : ("not ", " ", -1));
66cbab2c
KW
1687 }
1688 else {
1689 use locale;
db31898d
KW
1690 ($yes, $no, $sign) = ($lesser lt $greater
1691 ? (" ", "not ", 1)
1692 : ("not ", " ", -1));
66cbab2c 1693 }
db31898d
KW
1694 # all these tests should FAIL (return 0). Exact lt or gt cannot
1695 # be tested because in some locales, say, eacute and E may test
1696 # equal.
1697 @test =
1698 (
1699 $no.' ($lesser le $greater)', # 1
1700 'not ($lesser ne $greater)', # 2
1701 ' ($lesser eq $greater)', # 3
1702 $yes.' ($lesser ge $greater)', # 4
1703 $yes.' ($lesser ge $greater)', # 5
1704 $yes.' ($greater le $lesser )', # 7
1705 'not ($greater ne $lesser )', # 8
1706 ' ($greater eq $lesser )', # 9
1707 $no.' ($greater ge $lesser )', # 10
1708 'not (($lesser cmp $greater) == -($sign))' # 11
1709 );
1710 @test{@test} = 0 x @test;
1711 $test = 0;
1712 for my $ti (@test) {
66cbab2c
KW
1713 if ($is_utf8_locale) {
1714 use locale ':not_characters';
db31898d 1715 $test{$ti} = eval $ti;
66cbab2c
KW
1716 }
1717 else {
db31898d
KW
1718 # Already in 'use locale';
1719 $test{$ti} = eval $ti;
66cbab2c 1720 }
db31898d
KW
1721 $test ||= $test{$ti}
1722 }
1723 report_result($Locale, $locales_test_number, $test == 0);
1724 if ($test) {
46973bb2
KW
1725 debug "lesser = '$lesser'\n";
1726 debug "greater = '$greater'\n";
1727 debug "lesser cmp greater = ",
db31898d 1728 $lesser cmp $greater, "\n";
46973bb2 1729 debug "greater cmp lesser = ",
db31898d 1730 $greater cmp $lesser, "\n";
46973bb2 1731 debug "(greater) from = $from, to = $to\n";
db31898d
KW
1732 for my $ti (@test) {
1733 debugf("# %-40s %-4s", $ti,
1734 $test{$ti} ? 'FAIL' : 'ok');
1735 if ($ti =~ /\(\.*(\$.+ +cmp +\$[^\)]+)\.*\)/) {
1736 debugf("(%s == %4d)", $1, eval $1);
66cbab2c 1737 }
46973bb2 1738 debugf("\n#");
db31898d
KW
1739 }
1740
1741 last;
1742 }
1743 }
4e615abd
KW
1744
1745 use locale;
1746
620f73f4 1747 my @sorted_controls = sort @{$posixes{'cntrl'}};
635ddc7f
KW
1748 my $output = "";
1749 for my $control (@sorted_controls) {
1750 $output .= " " . disp_chars($control);
1751 }
1752 debug "sorted :cntrl: = $output\n";
620f73f4
KW
1753
1754 ++$locales_test_number;
1755 $test_names{$locales_test_number}
1756 = 'Verify that \0 sorts before any other control';
1757 my $ok = $sorted_controls[0] eq "\0";
1758 report_result($Locale, $locales_test_number, $ok);
1759 shift @sorted_controls;
1760 my $lowest_control = $sorted_controls[0];
1761
4e615abd
KW
1762 ++$locales_test_number;
1763 $test_names{$locales_test_number}
620f73f4 1764 = 'Skip in locales where all controls have primary sorting weight; '
4e615abd 1765 . 'otherwise verify that \0 doesn\'t have primary sorting weight';
620f73f4 1766 if ("a${lowest_control}c" lt "ab") {
4e615abd
KW
1767 report_result($Locale, $locales_test_number, 1);
1768 }
1769 else {
1770 my $ok = "ab" lt "a\0c";
1771 report_result($Locale, $locales_test_number, $ok);
1772 }
6696cfa7
KW
1773
1774 ++$locales_test_number;
1775 $test_names{$locales_test_number}
a4a439fb 1776 = 'Verify that strings with embedded NUL collate';
620f73f4 1777 $ok = "a\0a\0a" lt "a${lowest_control}a${lowest_control}a";
6696cfa7
KW
1778 report_result($Locale, $locales_test_number, $ok);
1779
1780 ++$locales_test_number;
1781 $test_names{$locales_test_number}
a4a439fb 1782 = 'Verify that strings with embedded NUL and '
6696cfa7 1783 . 'extra trailing NUL collate';
620f73f4 1784 $ok = "a\0a\0" lt "a${lowest_control}a${lowest_control}";
6696cfa7 1785 report_result($Locale, $locales_test_number, $ok);
a4a439fb
KW
1786
1787 ++$locales_test_number;
1788 $test_names{$locales_test_number}
e1c30f0c
KW
1789 = 'Verify that empty strings collate';
1790 $ok = "" le "";
1791 report_result($Locale, $locales_test_number, $ok);
1792
1793 ++$locales_test_number;
1794 $test_names{$locales_test_number}
a4a439fb
KW
1795 = "Skip in non-UTF-8 locales; otherwise verify that UTF8ness "
1796 . "doesn't matter with collation";
1797 if (! $is_utf8_locale) {
1798 report_result($Locale, $locales_test_number, 1);
1799 }
1800 else {
1801
1802 # khw can't think of anything better. Start with a string that is
1803 # higher than its UTF-8 representation in both EBCDIC and ASCII
1804 my $string = chr utf8::unicode_to_native(0xff);
1805 my $utf8_string = $string;
1806 utf8::upgrade($utf8_string);
1807
1808 # 8 should be lt 9 in all locales (except ones that aren't
1809 # ASCII-based, which might fail this)
1810 $ok = ("a${string}8") lt ("a${utf8_string}9");
1811 report_result($Locale, $locales_test_number, $ok);
1812 }
1813
1814 ++$locales_test_number;
1815 $test_names{$locales_test_number}
1816 = "Skip in UTF-8 locales; otherwise verify that single byte "
1817 . "collates before 0x100 and above";
1818 if ($is_utf8_locale) {
1819 report_result($Locale, $locales_test_number, 1);
1820 }
1821 else {
1822 my $max_collating = chr 0; # Find byte that collates highest
1823 for my $i (0 .. 255) {
1824 my $char = chr $i;
1825 $max_collating = $char if $char gt $max_collating;
1826 }
1827 $ok = $max_collating lt chr 0x100;
1828 report_result($Locale, $locales_test_number, $ok);
1829 }
1830
1831 ++$locales_test_number;
1832 $test_names{$locales_test_number}
1833 = "Skip in UTF-8 locales; otherwise verify that 0x100 and "
1834 . "above collate in code point order";
1835 if ($is_utf8_locale) {
1836 report_result($Locale, $locales_test_number, 1);
1837 }
1838 else {
1839 $ok = chr 0x100 lt chr 0x101;
1840 report_result($Locale, $locales_test_number, $ok);
1841 }
db31898d 1842 }
c4093d7d 1843
66cbab2c
KW
1844 my $ok1;
1845 my $ok2;
1846 my $ok3;
1847 my $ok4;
1848 my $ok5;
1849 my $ok6;
1850 my $ok7;
1851 my $ok8;
1852 my $ok9;
1853 my $ok10;
1854 my $ok11;
1855 my $ok12;
1856 my $ok13;
1500bd91 1857 my $ok14;
2c6ee1a7 1858 my $ok14_5;
28acfe03
KW
1859 my $ok15;
1860 my $ok16;
e46375fa
KW
1861 my $ok17;
1862 my $ok18;
f406a445 1863 my $ok19;
9717af6d 1864 my $ok20;
8ee4b769 1865 my $ok21;
66cbab2c
KW
1866
1867 my $c;
1868 my $d;
1869 my $e;
1870 my $f;
1871 my $g;
e46375fa
KW
1872 my $h;
1873 my $i;
1874 my $j;
66cbab2c
KW
1875
1876 if (! $is_utf8_locale) {
71e5cbb3 1877 use locale;
6be75cd7 1878
71e5cbb3 1879 my ($x, $y) = (1.23, 1.23);
6be75cd7 1880
71e5cbb3
KW
1881 $a = "$x";
1882 printf ''; # printf used to reset locale to "C"
1883 $b = "$y";
1884 $ok1 = $a eq $b;
6be75cd7 1885
71e5cbb3
KW
1886 $c = "$x";
1887 my $z = sprintf ''; # sprintf used to reset locale to "C"
1888 $d = "$y";
1889 $ok2 = $c eq $d;
1890 {
66cbab2c 1891
71e5cbb3
KW
1892 use warnings;
1893 my $w = 0;
1894 local $SIG{__WARN__} =
1895 sub {
1896 print "# @_\n";
1897 $w++;
1898 };
6be75cd7 1899
71e5cbb3
KW
1900 # The == (among other ops) used to warn for locales
1901 # that had something else than "." as the radix character.
6be75cd7 1902
71e5cbb3
KW
1903 $ok3 = $c == 1.23;
1904 $ok4 = $c == $x;
1905 $ok5 = $c == $d;
1906 {
1907 no locale;
66cbab2c 1908
b79536ea 1909 $e = "$x";
71e5cbb3
KW
1910
1911 $ok6 = $e == 1.23;
1912 $ok7 = $e == $x;
1913 $ok8 = $e == $c;
1914 }
66cbab2c 1915
71e5cbb3
KW
1916 $f = "1.23";
1917 $g = 2.34;
e46375fa
KW
1918 $h = 1.5;
1919 $i = 1.25;
1920 $j = "$h:$i";
66cbab2c 1921
71e5cbb3
KW
1922 $ok9 = $f == 1.23;
1923 $ok10 = $f == $x;
1924 $ok11 = $f == $c;
1925 $ok12 = abs(($f + $g) - 3.57) < 0.01;
1926 $ok13 = $w == 0;
2c6ee1a7 1927 $ok14 = $ok14_5 = $ok15 = $ok16 = 1; # Skip for non-utf8 locales
71e5cbb3 1928 }
e46375fa
KW
1929 {
1930 no locale;
1931 $ok17 = "1.5:1.25" eq sprintf("%g:%g", $h, $i);
1932 }
1933 $ok18 = $j eq sprintf("%g:%g", $h, $i);
66cbab2c
KW
1934 }
1935 else {
1936 use locale ':not_characters';
1937
1938 my ($x, $y) = (1.23, 1.23);
1939 $a = "$x";
1940 printf ''; # printf used to reset locale to "C"
1941 $b = "$y";
1942 $ok1 = $a eq $b;
1943
1944 $c = "$x";
1945 my $z = sprintf ''; # sprintf used to reset locale to "C"
1946 $d = "$y";
1947 $ok2 = $c eq $d;
1948 {
1949 use warnings;
1950 my $w = 0;
1951 local $SIG{__WARN__} =
1952 sub {
1953 print "# @_\n";
1954 $w++;
1955 };
1956 $ok3 = $c == 1.23;
1957 $ok4 = $c == $x;
1958 $ok5 = $c == $d;
1959 {
1960 no locale;
b79536ea 1961 $e = "$x";
66cbab2c
KW
1962
1963 $ok6 = $e == 1.23;
1964 $ok7 = $e == $x;
1965 $ok8 = $e == $c;
1966 }
1967
1968 $f = "1.23";
1969 $g = 2.34;
e46375fa
KW
1970 $h = 1.5;
1971 $i = 1.25;
1972 $j = "$h:$i";
66cbab2c
KW
1973
1974 $ok9 = $f == 1.23;
1975 $ok10 = $f == $x;
1976 $ok11 = $f == $c;
1977 $ok12 = abs(($f + $g) - 3.57) < 0.01;
1978 $ok13 = $w == 0;
1500bd91
KW
1979
1980 # Look for non-ASCII error messages, and verify that the first
5320b60d 1981 # such is in UTF-8 (the others almost certainly will be like the
5af2752a 1982 # first). This is only done if the current locale has LC_MESSAGES
1500bd91 1983 $ok14 = 1;
2c6ee1a7 1984 $ok14_5 = 1;
ef9d5242
KW
1985 if ( locales_enabled('LC_MESSAGES')
1986 && setlocale(&POSIX::LC_MESSAGES, $Locale))
1987 {
9bc33472
KW
1988 foreach my $err (keys %!) {
1989 use Errno;
1990 $! = eval "&Errno::$err"; # Convert to strerror() output
594feacd 1991 my $errnum = 0+$!;
9bc33472
KW
1992 my $strerror = "$!";
1993 if ("$strerror" =~ /\P{ASCII}/) {
1994 $ok14 = utf8::is_utf8($strerror);
1995 no locale;
1996 $ok14_5 = "$!" !~ /\P{ASCII}/;
594feacd
KW
1997 debug( disp_str(
1998 "non-ASCII \$! for error $errnum='$strerror'"))
1999 if ! $ok14_5;
9bc33472
KW
2000 last;
2001 }
1500bd91
KW
2002 }
2003 }
28acfe03
KW
2004
2005 # Similarly, we verify that a non-ASCII radix is in UTF-8. This
2006 # also catches if there is a disparity between sprintf and
2007 # stringification.
2008
2009 my $string_g = "$g";
2ef12107 2010 my $sprintf_g = sprintf("%g", $g);
28acfe03 2011
2ef12107
KW
2012 $ok15 = $string_g =~ / ^ \p{ASCII}+ $ /x || utf8::is_utf8($string_g);
2013 $ok16 = $sprintf_g eq $string_g;
66cbab2c 2014 }
e46375fa
KW
2015 {
2016 no locale;
2017 $ok17 = "1.5:1.25" eq sprintf("%g:%g", $h, $i);
2018 }
2019 $ok18 = $j eq sprintf("%g:%g", $h, $i);
57ba90dd
KW
2020 }
2021
d6bfff6d
KW
2022 $ok19 = $ok20 = 1;
2023 if (setlocale(&POSIX::LC_TIME, $Locale)) { # These tests aren't affected by
2024 # :not_characters
57ba90dd
KW
2025 my @times = CORE::localtime();
2026
2027 use locale;
2028 $ok19 = POSIX::strftime("%p", @times) ne "%p"; # [perl #119425]
dc5a2383
KW
2029 my $date = POSIX::strftime("'%A' '%B' '%Z' '%p'", @times);
2030 debug("'Day' 'Month' 'TZ' 'am/pm' = ", disp_str($date));
57ba90dd
KW
2031
2032 # If there is any non-ascii, it better be UTF-8 in a UTF-8 locale, and
2033 # not UTF-8 if the locale isn't UTF-8.
2034 $ok20 = $date =~ / ^ \p{ASCII}+ $ /x
2035 || $is_utf8_locale == utf8::is_utf8($date);
66cbab2c
KW
2036 }
2037
8ee4b769 2038 $ok21 = 1;
ef9d5242 2039 if (locales_enabled('LC_MESSAGES')) {
a2917a7e
KW
2040 foreach my $err (keys %!) {
2041 no locale;
2042 use Errno;
2043 $! = eval "&Errno::$err"; # Convert to strerror() output
2044 my $strerror = "$!";
c9dda6da 2045 if ($strerror =~ /\P{ASCII}/) {
a2917a7e 2046 $ok21 = 0;
c9dda6da 2047 debug(disp_str("non-ASCII strerror=$strerror"));
a2917a7e 2048 last;
ef9d5242 2049 }
8ee4b769
KW
2050 }
2051 }
2052
30032ef4 2053 report_result($Locale, ++$locales_test_number, $ok1);
66cbab2c
KW
2054 $test_names{$locales_test_number} = 'Verify that an intervening printf doesn\'t change assignment results';
2055 my $first_a_test = $locales_test_number;
2056
46973bb2 2057 debug "$first_a_test..$locales_test_number: \$a = $a, \$b = $b, Locale = $Locale\n";
66cbab2c 2058
30032ef4 2059 report_result($Locale, ++$locales_test_number, $ok2);
66cbab2c
KW
2060 $test_names{$locales_test_number} = 'Verify that an intervening sprintf doesn\'t change assignment results';
2061
2062 my $first_c_test = $locales_test_number;
2063
435847ca
JH
2064 $test_names{++$locales_test_number} = 'Verify that a different locale radix works when doing "==" with a constant';
2065 if ($Config{usequadmath}) {
2066 print "# Skip: no locale radix with usequadmath ($test_names{$locales_test_number})\n";
2067 report_result($Locale, $locales_test_number, 1);
2068 } else {
2069 report_result($Locale, $locales_test_number, $ok3);
2070 $problematical_tests{$locales_test_number} = 1;
2071 }
6be75cd7 2072
435847ca
JH
2073 $test_names{++$locales_test_number} = 'Verify that a different locale radix works when doing "==" with a scalar';
2074 if ($Config{usequadmath}) {
2075 print "# Skip: no locale radix with usequadmath ($test_names{$locales_test_number})\n";
2076 report_result($Locale, $locales_test_number, 1);
2077 } else {
2078 report_result($Locale, $locales_test_number, $ok4);
2079 $problematical_tests{$locales_test_number} = 1;
2080 }
66cbab2c 2081
30032ef4 2082 report_result($Locale, ++$locales_test_number, $ok5);
71e5cbb3 2083 $test_names{$locales_test_number} = 'Verify that a different locale radix works when doing "==" with a scalar and an intervening sprintf';
b057411d 2084 $problematical_tests{$locales_test_number} = 1;
66cbab2c 2085
46973bb2 2086 debug "$first_c_test..$locales_test_number: \$c = $c, \$d = $d, Locale = $Locale\n";
66cbab2c 2087
30032ef4 2088 report_result($Locale, ++$locales_test_number, $ok6);
b79536ea 2089 $test_names{$locales_test_number} = 'Verify that can assign stringified under inner no-locale block';
71e5cbb3 2090 my $first_e_test = $locales_test_number;
6be75cd7 2091
30032ef4 2092 report_result($Locale, ++$locales_test_number, $ok7);
71e5cbb3 2093 $test_names{$locales_test_number} = 'Verify that "==" with a scalar still works in inner no locale';
66cbab2c 2094
435847ca
JH
2095 $test_names{++$locales_test_number} = 'Verify that "==" with a scalar and an intervening sprintf still works in inner no locale';
2096 if ($Config{usequadmath}) {
2097 print "# Skip: no locale radix with usequadmath ($test_names{$locales_test_number})\n";
2098 report_result($Locale, $locales_test_number, 1);
2099 } else {
2100 report_result($Locale, $locales_test_number, $ok8);
2101 $problematical_tests{$locales_test_number} = 1;
2102 }
c4093d7d 2103
46973bb2 2104 debug "$first_e_test..$locales_test_number: \$e = $e, no locale\n";
2a680da6 2105
30032ef4 2106 report_result($Locale, ++$locales_test_number, $ok9);
71e5cbb3 2107 $test_names{$locales_test_number} = 'Verify that after a no-locale block, a different locale radix still works when doing "==" with a constant';
b057411d 2108 $problematical_tests{$locales_test_number} = 1;
71e5cbb3 2109 my $first_f_test = $locales_test_number;
6be75cd7 2110
30032ef4 2111 report_result($Locale, ++$locales_test_number, $ok10);
71e5cbb3 2112 $test_names{$locales_test_number} = 'Verify that after a no-locale block, a different locale radix still works when doing "==" with a scalar';
b057411d 2113 $problematical_tests{$locales_test_number} = 1;
66cbab2c 2114
435847ca
JH
2115 $test_names{++$locales_test_number} = 'Verify that after a no-locale block, a different locale radix still works when doing "==" with a scalar and an intervening sprintf';
2116 if ($Config{usequadmath}) {
2117 print "# Skip: no locale radix with usequadmath ($test_names{$locales_test_number})\n";
2118 report_result($Locale, $locales_test_number, 1);
2119 } else {
2120 report_result($Locale, $locales_test_number, $ok11);
2121 $problematical_tests{$locales_test_number} = 1;
2122 }
906f284f 2123
30032ef4 2124 report_result($Locale, ++$locales_test_number, $ok12);
71e5cbb3 2125 $test_names{$locales_test_number} = 'Verify that after a no-locale block, a different locale radix can participate in an addition and function call as numeric';
b057411d 2126 $problematical_tests{$locales_test_number} = 1;
c4093d7d 2127
30032ef4 2128 report_result($Locale, ++$locales_test_number, $ok13);
71e5cbb3 2129 $test_names{$locales_test_number} = 'Verify that don\'t get warning under "==" even if radix is not a dot';
b057411d 2130 $problematical_tests{$locales_test_number} = 1;
c4093d7d 2131
30032ef4 2132 report_result($Locale, ++$locales_test_number, $ok14);
5320b60d 2133 $test_names{$locales_test_number} = 'Verify that non-ASCII UTF-8 error messages are in UTF-8';
1500bd91 2134
2c6ee1a7
KW
2135 report_result($Locale, ++$locales_test_number, $ok14_5);
2136 $test_names{$locales_test_number} = '... and are ASCII outside "use locale"';
2137
30032ef4 2138 report_result($Locale, ++$locales_test_number, $ok15);
28acfe03
KW
2139 $test_names{$locales_test_number} = 'Verify that a number with a UTF-8 radix has a UTF-8 stringification';
2140
30032ef4 2141 report_result($Locale, ++$locales_test_number, $ok16);
28acfe03 2142 $test_names{$locales_test_number} = 'Verify that a sprintf of a number with a UTF-8 radix yields UTF-8';
dc51aebe 2143 $problematical_tests{$locales_test_number} = 1;
28acfe03 2144
e46375fa
KW
2145 report_result($Locale, ++$locales_test_number, $ok17);
2146 $test_names{$locales_test_number} = 'Verify that a sprintf of a number outside locale scope uses a dot radix';
2147
2148 report_result($Locale, ++$locales_test_number, $ok18);
2149 $test_names{$locales_test_number} = 'Verify that a sprintf of a number back within locale scope uses locale radix';
dc51aebe 2150 $problematical_tests{$locales_test_number} = 1;
e46375fa 2151
f406a445
KW
2152 report_result($Locale, ++$locales_test_number, $ok19);
2153 $test_names{$locales_test_number} = 'Verify that strftime doesn\'t return "%p" in locales where %p is empty';
2154
9717af6d
KW
2155 report_result($Locale, ++$locales_test_number, $ok20);
2156 $test_names{$locales_test_number} = 'Verify that strftime returns date with UTF-8 flag appropriately set';
6a78954a
KW
2157 $problematical_tests{$locales_test_number} = 1; # This is broken in
2158 # OS X 10.9.3
9717af6d 2159
8ee4b769
KW
2160 report_result($Locale, ++$locales_test_number, $ok21);
2161 $test_names{$locales_test_number} = '"$!" is ASCII only outside of locale scope';
2162
46973bb2 2163 debug "$first_f_test..$locales_test_number: \$f = $f, \$g = $g, back to locale = $Locale\n";
906f284f 2164
26d80d95 2165 # Does taking lc separately differ from taking
126fe723 2166 # the lc "in-line"? (This was the bug 19990704.002 (#965), change #3568.)
26d80d95 2167 # The bug was in the caching of the 'o'-magic.
66cbab2c 2168 if (! $is_utf8_locale) {
2a680da6 2169 use locale;
6be75cd7 2170
2a680da6
JH
2171 sub lcA {
2172 my $lc0 = lc $_[0];
2173 my $lc1 = lc $_[1];
2174 return $lc0 cmp $lc1;
2175 }
6be75cd7 2176
2a680da6
JH
2177 sub lcB {
2178 return lc($_[0]) cmp lc($_[1]);
2179 }
6be75cd7 2180
2a680da6
JH
2181 my $x = "ab";
2182 my $y = "aa";
2183 my $z = "AB";
6be75cd7 2184
30032ef4 2185 report_result($Locale, ++$locales_test_number,
2a680da6
JH
2186 lcA($x, $y) == 1 && lcB($x, $y) == 1 ||
2187 lcA($x, $z) == 0 && lcB($x, $z) == 0);
6be75cd7 2188 }
66cbab2c
KW
2189 else {
2190 use locale ':not_characters';
2191
2192 sub lcC {
2193 my $lc0 = lc $_[0];
2194 my $lc1 = lc $_[1];
2195 return $lc0 cmp $lc1;
2196 }
2197
2198 sub lcD {
2199 return lc($_[0]) cmp lc($_[1]);
2200 }
2201
2202 my $x = "ab";
2203 my $y = "aa";
2204 my $z = "AB";
2205
30032ef4 2206 report_result($Locale, ++$locales_test_number,
66cbab2c
KW
2207 lcC($x, $y) == 1 && lcD($x, $y) == 1 ||
2208 lcC($x, $z) == 0 && lcD($x, $z) == 0);
2209 }
2210 $test_names{$locales_test_number} = 'Verify "lc(foo) cmp lc(bar)" is the same as using intermediaries for the cmp';
d8093b23 2211
26d80d95
LC
2212 # Does lc of an UPPER (if different from the UPPER) match
2213 # case-insensitively the UPPER, and does the UPPER match
2214 # case-insensitively the lc of the UPPER. And vice versa.
3ba0e062 2215 {
ef4a39e5
JH
2216 use locale;
2217 no utf8;
2218 my $re = qr/[\[\(\{\*\+\?\|\^\$\\]/;
2219
2220 my @f = ();
c4093d7d 2221 ++$locales_test_number;
c08acc4c 2222 $test_names{$locales_test_number} = 'Verify case insensitive matching works';
b9df08e4 2223 foreach my $x (sort { ord $a <=> ord $b } keys %UPPER) {
66cbab2c 2224 if (! $is_utf8_locale) {
71e5cbb3
KW
2225 my $y = lc $x;
2226 next unless uc $y eq $x;
46973bb2 2227 debug_more( "UPPER=", disp_chars(($x)),
92891c66 2228 "; lc=", disp_chars(($y)), "; ",
4eac893c 2229 "; fc=", disp_chars((fc $x)), "; ",
92891c66 2230 disp_chars(($x)), "=~/", disp_chars(($y)), "/i=",
4d2de105 2231 $x =~ /\Q$y/i ? 1 : 0,
92891c66
KW
2232 "; ",
2233 disp_chars(($y)), "=~/", disp_chars(($x)), "/i=",
4d2de105 2234 $y =~ /\Q$x/i ? 1 : 0,
92891c66 2235 "\n");
71e5cbb3
KW
2236 #
2237 # If $x and $y contain regular expression characters
2238 # AND THEY lowercase (/i) to regular expression characters,
2239 # regcomp() will be mightily confused. No, the \Q doesn't
2240 # help here (maybe regex engine internal lowercasing
2241 # is done after the \Q?) An example of this happening is
2242 # the bg_BG (Bulgarian) locale under EBCDIC (OS/390 USS):
2243 # the chr(173) (the "[") is the lowercase of the chr(235).
2244 #
2245 # Similarly losing EBCDIC locales include cs_cz, cs_CZ,
2246 # el_gr, el_GR, en_us.IBM-037 (!), en_US.IBM-037 (!),
2247 # et_ee, et_EE, hr_hr, hr_HR, hu_hu, hu_HU, lt_LT,
2248 # mk_mk, mk_MK, nl_nl.IBM-037, nl_NL.IBM-037,
2249 # pl_pl, pl_PL, ro_ro, ro_RO, ru_ru, ru_RU,
2250 # sk_sk, sk_SK, sl_si, sl_SI, tr_tr, tr_TR.
2251 #
2252 # Similar things can happen even under (bastardised)
2253 # non-EBCDIC locales: in many European countries before the
2254 # advent of ISO 8859-x nationally customised versions of
2255 # ISO 646 were devised, reusing certain punctuation
2256 # characters for modified characters needed by the
2257 # country/language. For example, the "|" might have
2258 # stood for U+00F6 or LATIN SMALL LETTER O WITH DIAERESIS.
2259 #
2260 if ($x =~ $re || $y =~ $re) {
2261 print "# Regex characters in '$x' or '$y', skipping test $locales_test_number for locale '$Locale'\n";
2262 next;
2263 }
4d2de105 2264 push @f, $x unless $x =~ /\Q$y/i && $y =~ /\Q$x/i;
26c1569f
KW
2265
2266 # fc is not a locale concept, so Perl uses lc for it.
2267 push @f, $x unless lc $x eq fc $x;
66cbab2c
KW
2268 }
2269 else {
2270 use locale ':not_characters';
2271 my $y = lc $x;
2272 next unless uc $y eq $x;
46973bb2 2273 debug_more( "UPPER=", disp_chars(($x)),
92891c66 2274 "; lc=", disp_chars(($y)), "; ",
4eac893c 2275 "; fc=", disp_chars((fc $x)), "; ",
92891c66 2276 disp_chars(($x)), "=~/", disp_chars(($y)), "/i=",
4d2de105 2277 $x =~ /\Q$y/i ? 1 : 0,
92891c66
KW
2278 "; ",
2279 disp_chars(($y)), "=~/", disp_chars(($x)), "/i=",
4d2de105 2280 $y =~ /\Q$x/i ? 1 : 0,
92891c66 2281 "\n");
66cbab2c 2282
4d2de105 2283 push @f, $x unless $x =~ /\Q$y/i && $y =~ /\Q$x/i;
26c1569f
KW
2284
2285 # The places where Unicode's lc is different from fc are
2286 # skipped here by virtue of the 'next unless uc...' line above
2287 push @f, $x unless lc $x eq fc $x;
66cbab2c 2288 }
c00ff1c7 2289 }
ef4a39e5 2290
b9df08e4 2291 foreach my $x (sort { ord $a <=> ord $b } keys %lower) {
66cbab2c 2292 if (! $is_utf8_locale) {
71e5cbb3
KW
2293 my $y = uc $x;
2294 next unless lc $y eq $x;
46973bb2 2295 debug_more( "lower=", disp_chars(($x)),
92891c66 2296 "; uc=", disp_chars(($y)), "; ",
4eac893c 2297 "; fc=", disp_chars((fc $x)), "; ",
92891c66 2298 disp_chars(($x)), "=~/", disp_chars(($y)), "/i=",
4d2de105 2299 $x =~ /\Q$y/i ? 1 : 0,
92891c66
KW
2300 "; ",
2301 disp_chars(($y)), "=~/", disp_chars(($x)), "/i=",
4d2de105 2302 $y =~ /\Q$x/i ? 1 : 0,
92891c66 2303 "\n");
71e5cbb3
KW
2304 if ($x =~ $re || $y =~ $re) { # See above.
2305 print "# Regex characters in '$x' or '$y', skipping test $locales_test_number for locale '$Locale'\n";
2306 next;
2307 }
4d2de105 2308 push @f, $x unless $x =~ /\Q$y/i && $y =~ /\Q$x/i;
26c1569f
KW
2309
2310 push @f, $x unless lc $x eq fc $x;
66cbab2c
KW
2311 }
2312 else {
2313 use locale ':not_characters';
2314 my $y = uc $x;
2315 next unless lc $y eq $x;
46973bb2 2316 debug_more( "lower=", disp_chars(($x)),
92891c66 2317 "; uc=", disp_chars(($y)), "; ",
4eac893c 2318 "; fc=", disp_chars((fc $x)), "; ",
92891c66 2319 disp_chars(($x)), "=~/", disp_chars(($y)), "/i=",
4d2de105 2320 $x =~ /\Q$y/i ? 1 : 0,
92891c66
KW
2321 "; ",
2322 disp_chars(($y)), "=~/", disp_chars(($x)), "/i=",
4d2de105 2323 $y =~ /\Q$x/i ? 1 : 0,
92891c66 2324 "\n");
4d2de105 2325 push @f, $x unless $x =~ /\Q$y/i && $y =~ /\Q$x/i;
26c1569f
KW
2326
2327 push @f, $x unless lc $x eq fc $x;
66cbab2c 2328 }
c00ff1c7 2329 }
7c844d17 2330 report_multi_result($Locale, $locales_test_number, \@f);
5b1f18a7 2331 $problematical_tests{$locales_test_number} = 1;
d8093b23 2332 }
78787052
JL
2333
2334 # [perl #109318]
2335 {
2336 my @f = ();
2337 ++$locales_test_number;
2338 $test_names{$locales_test_number} = 'Verify atof with locale radix and negative exponent';
b057411d 2339 $problematical_tests{$locales_test_number} = 1;
78787052
JL
2340
2341 my $radix = POSIX::localeconv()->{decimal_point};
2342 my @nums = (
2343 "3.14e+9", "3${radix}14e+9", "3.14e-9", "3${radix}14e-9",
2344 "-3.14e+9", "-3${radix}14e+9", "-3.14e-9", "-3${radix}14e-9",
2345 );
2346
2347 if (! $is_utf8_locale) {
2348 use locale;
2349 for my $num (@nums) {
2350 push @f, $num
2351 unless sprintf("%g", $num) =~ /3.+14/;
2352 }
2353 }
2354 else {
2355 use locale ':not_characters';
2356 for my $num (@nums) {
2357 push @f, $num
2358 unless sprintf("%g", $num) =~ /3.+14/;
2359 }
2360 }
2361
435847ca
JH
2362 if ($Config{usequadmath}) {
2363 print "# Skip: no locale radix with usequadmath ($Locale)\n";
2364 report_result($Locale, $locales_test_number, 1);
2365 } else {
2366 report_result($Locale, $locales_test_number, @f == 0);
2367 if (@f) {
2368 print "# failed $locales_test_number locale '$Locale' numbers @f\n"
2369 }
78787052
JL
2370 }
2371 }
8ebc5c01 2372}
284102e8 2373
c4093d7d 2374my $final_locales_test_number = $locales_test_number;
6cf0b567 2375
2a680da6
JH
2376# Recount the errors.
2377
852b6749 2378TEST_NUM:
0d3899b1 2379foreach $test_num ($first_locales_test_number..$final_locales_test_number) {
c4093d7d
KW
2380 if (%setlocale_failed) {
2381 print "not ";
2382 }
852b6749
KW
2383 elsif ($Problem{$test_num}
2384 || ! defined $Okay{$test_num}
2385 || ! @{$Okay{$test_num}})
2386 {
c4093d7d 2387 if (defined $not_necessarily_a_problem_test_number
0d3899b1 2388 && $test_num == $not_necessarily_a_problem_test_number)
c4093d7d
KW
2389 {
2390 print "# The failure of test $not_necessarily_a_problem_test_number is not necessarily fatal.\n";
b4e009be 2391 print "# It usually indicates a problem in the environment,\n";
284102e8
JH
2392 print "# not in Perl itself.\n";
2393 }
852b6749
KW
2394
2395 # If there are any locales that pass this test, or are known-bad, it
2396 # may be that there are enough passes that we TODO the failure.
2397 if (($Okay{$test_num} || $Known_bad_locale{$test_num})
2398 && grep { $_ == $test_num } keys %problematical_tests)
2399 {
26230909 2400 no warnings 'experimental::postderef';
852b6749
KW
2401
2402 # Don't count the known-bad failures when calculating the
2403 # percentage that fail.
2404 my $known_failures = (exists $Known_bad_locale{$test_num})
26230909 2405 ? scalar(keys $Known_bad_locale{$test_num}->%*)
852b6749 2406 : 0;
26230909 2407 my $adjusted_failures = scalar(keys $Problem{$test_num}->%*)
852b6749
KW
2408 - $known_failures;
2409
2410 # Specially handle failures where only known-bad locales fail.
2411 # This makes the diagnositics clearer.
2412 if ($adjusted_failures <= 0) {
2413 print "not ok $test_num $test_names{$test_num} # TODO fails only on ",
2414 "known bad locales: ",
26230909 2415 join " ", keys $Known_bad_locale{$test_num}->%*, "\n";
852b6749
KW
2416 next TEST_NUM;
2417 }
2418
0a974e2d 2419 # Round to nearest .1%
852b6749 2420 my $percent_fail = (int(.5 + (1000 * $adjusted_failures
0a974e2d
KW
2421 / scalar(@Locale))))
2422 / 10;
a20d09fc
KW
2423 if ($percent_fail < $acceptable_failure_percentage) {
2424 if (! $debug) {
2425 $test_names{$test_num} .= 'TODO';
852b6749
KW
2426 print "# ", 100 - $percent_fail, "% of locales not known to be problematic on this platform\n";
2427 print "# pass the following test, so it is likely that the failures\n";
a20d09fc
KW
2428 print "# are errors in the locale definitions. The test is marked TODO, as the\n";
2429 print "# problem is not likely to be Perl's\n";
2430 }
2431 }
30962f68 2432 if ($debug) {
a20d09fc 2433 print "# $percent_fail% of locales (",
26230909 2434 scalar(keys $Problem{$test_num}->%*),
a20d09fc
KW
2435 " of ",
2436 scalar(@Locale),
30962f68
KW
2437 ") fail the above test (TODO cut-off is ",
2438 $acceptable_failure_percentage,
2439 "%)\n";
6d5d702a
KW
2440 }
2441 }
f5627fc1
KW
2442 print "#\n";
2443 if ($debug) {
2444 print "# The code points that had this failure are given above. Look for lines\n";
0d3899b1 2445 print "# that match 'failed $test_num'\n";
f5627fc1
KW
2446 }
2447 else {
2448 print "# For more details, rerun, with environment variable PERL_DEBUG_FULL_TEST=1.\n";
0d3899b1 2449 print "# Then look at that output for lines that match 'failed $test_num'\n";
108a305e 2450 }
284102e8 2451 print "not ";
8ebc5c01 2452 }
0d3899b1
KW
2453 print "ok $test_num";
2454 if (defined $test_names{$test_num}) {
6c2e653d 2455 # If TODO is in the test name, make it thus
0d3899b1
KW
2456 my $todo = $test_names{$test_num} =~ s/TODO\s*//;
2457 print " $test_names{$test_num}";
6c2e653d
KW
2458 print " # TODO" if $todo;
2459 }
c4093d7d 2460 print "\n";
8ebc5c01 2461}
fb73857a 2462
c4093d7d 2463$test_num = $final_locales_test_number;
c213d471 2464
4bc67723 2465unless ( $^O =~ m!^(dragonfly|openbsd|bitrig|mirbsd)$! ) {
083ace8f 2466 # perl #115808
fbd840df
KW
2467 use warnings;
2468 my $warned = 0;
2469 local $SIG{__WARN__} = sub {
2470 $warned = $_[0] =~ /uninitialized/;
2471 };
2472 my $z = "y" . setlocale(&POSIX::LC_ALL, "xyzzy");
2473 ok($warned, "variable set to setlocale(BAD LOCALE) is considered uninitialized");
2474}
2475
094a2f8c 2476# Test that tainting and case changing works on utf8 strings. These tests are
1f5852c9
KW
2477# placed last to avoid disturbing the hard-coded test numbers that existed at
2478# the time these were added above this in this file.
0099bb8d
KW
2479# This also tests that locale overrides unicode_strings in the same scope for
2480# non-utf8 strings.
a810e350 2481setlocale(&POSIX::LC_ALL, "C");
094a2f8c
KW
2482{
2483 use locale;
0099bb8d 2484 use feature 'unicode_strings';
094a2f8c 2485
26c1569f 2486 foreach my $function ("uc", "ucfirst", "lc", "lcfirst", "fc") {
094a2f8c
KW
2487 my @list; # List of code points to test for $function
2488
2489 # Used to calculate the changed case for ASCII characters by using the
2490 # ord, instead of using one of the functions under test.
2491 my $ascii_case_change_delta;
2492 my $above_latin1_case_change_delta; # Same for the specific ords > 255
2493 # that we use
2494
445bf929 2495 # We test an ASCII character, which should change case;
094a2f8c 2496 # a Latin1 character, which shouldn't change case under this C locale,
094a2f8c 2497 # an above-Latin1 character that when the case is changed would cross
445bf929
KW
2498 # the 255/256 boundary, so doesn't change case
2499 # (the \x{149} is one of these, but changes into 2 characters, the
094a2f8c
KW
2500 # first one of which doesn't cross the boundary.
2501 # the final one in each list is an above-Latin1 character whose case
445bf929
KW
2502 # does change. The code below uses its position in its list as a
2503 # marker to indicate that it, unlike the other code points above
2504 # ASCII, has a successful case change
2505 #
2506 # All casing operations under locale (but not :not_characters) should
2507 # taint
094a2f8c 2508 if ($function =~ /^u/) {
9de25f67
KW
2509 @list = ("", "a",
2510 chr(utf8::unicode_to_native(0xe0)),
2511 chr(utf8::unicode_to_native(0xff)),
2512 "\x{fb00}", "\x{149}", "\x{101}");
2513 $ascii_case_change_delta = ($is_ebcdic) ? +64 : -32;
094a2f8c
KW
2514 $above_latin1_case_change_delta = -1;
2515 }
2516 else {
9de25f67
KW
2517 @list = ("", "A",
2518 chr(utf8::unicode_to_native(0xC0)),
2519 "\x{17F}", "\x{100}");
2520 $ascii_case_change_delta = ($is_ebcdic) ? -64 : +32;
094a2f8c
KW
2521 $above_latin1_case_change_delta = +1;
2522 }
66cbab2c 2523 foreach my $is_utf8_locale (0 .. 1) {
71e5cbb3
KW
2524 foreach my $j (0 .. $#list) {
2525 my $char = $list[$j];
0099bb8d
KW
2526
2527 for my $encoded_in_utf8 (0 .. 1) {
faf0c248
KW
2528 my $should_be;
2529 my $changed;
2530 if (! $is_utf8_locale) {
ab0b796c 2531 no warnings 'locale';
faf0c248
KW
2532 $should_be = ($j == $#list)
2533 ? chr(ord($char) + $above_latin1_case_change_delta)
9de25f67
KW
2534 : (length $char == 0 || utf8::native_to_unicode(ord($char)) > 127)
2535 ? $char
2536 : chr(ord($char) + $ascii_case_change_delta);
faf0c248
KW
2537
2538 # This monstrosity is in order to avoid using an eval,
2539 # which might perturb the results
2540 $changed = ($function eq "uc")
2541 ? uc($char)
2542 : ($function eq "ucfirst")
2543 ? ucfirst($char)
2544 : ($function eq "lc")
2545 ? lc($char)
2546 : ($function eq "lcfirst")
2547 ? lcfirst($char)
26c1569f
KW
2548 : ($function eq "fc")
2549 ? fc($char)
faf0c248
KW
2550 : die("Unexpected function \"$function\"");
2551 }
2552 else {
2553 {
2554 no locale;
71e5cbb3 2555
faf0c248
KW
2556 # For utf8-locales the case changing functions
2557 # should work just like they do outside of locale.
2558 # Can use eval here because not testing it when
2559 # not in locale.
2560 $should_be = eval "$function('$char')";
2561 die "Unexpected eval error $@ from 'eval \"$function('$char')\"'" if $@;
71e5cbb3 2562
faf0c248
KW
2563 }
2564 use locale ':not_characters';
2565 $changed = ($function eq "uc")
2566 ? uc($char)
2567 : ($function eq "ucfirst")
2568 ? ucfirst($char)
2569 : ($function eq "lc")
2570 ? lc($char)
2571 : ($function eq "lcfirst")
2572 ? lcfirst($char)
26c1569f
KW
2573 : ($function eq "fc")
2574 ? fc($char)
faf0c248 2575 : die("Unexpected function \"$function\"");
71e5cbb3 2576 }
faf0c248
KW
2577 ok($changed eq $should_be,
2578 "$function(\"$char\") in C locale "
2579 . (($is_utf8_locale)
2580 ? "(use locale ':not_characters'"
2581 : "(use locale")
2582 . (($encoded_in_utf8)
2583 ? "; encoded in utf8)"
2584 : "; not encoded in utf8)")
2585 . " should be \"$should_be\", got \"$changed\"");
2586
445bf929
KW
2587 # Tainting shouldn't happen for use locale :not_character
2588 # (a utf8 locale)
2589 (! $is_utf8_locale)
faf0c248
KW
2590 ? check_taint($changed)
2591 : check_taint_not($changed);
2592
2593 # Use UTF-8 next time through the loop
2594 utf8::upgrade($char);
0099bb8d 2595 }
66cbab2c 2596 }
094a2f8c
KW
2597 }
2598 }
2599}
2600
1bfe8fea
KW
2601# Give final advice.
2602
2603my $didwarn = 0;
2604
2605foreach ($first_locales_test_number..$final_locales_test_number) {
2606 if ($Problem{$_}) {
2607 my @f = sort keys %{ $Problem{$_} };
852b6749
KW
2608
2609 # Don't list the failures caused by known-bad locales.
2610 if (exists $known_bad_locales{$^O}) {
2611 @f = grep { $_ !~ $known_bad_locales{$^O} } @f;
2612 next unless @f;
2613 }
1bfe8fea
KW
2614 my $f = join(" ", @f);
2615 $f =~ s/(.{50,60}) /$1\n#\t/g;
2616 print
2617 "#\n",
2618 "# The locale ", (@f == 1 ? "definition" : "definitions"), "\n#\n",
2619 "#\t", $f, "\n#\n",
2620 "# on your system may have errors because the locale test $_\n",
2621 "# \"$test_names{$_}\"\n",
2622 "# failed in ", (@f == 1 ? "that locale" : "those locales"),
2623 ".\n";
2624 print <<EOW;
2625#
2626# If your users are not using these locales you are safe for the moment,
98393d94 2627# but please report this failure first to perlbug\@perl.org using the
1bfe8fea
KW
2628# perlbug script (as described in the INSTALL file) so that the exact
2629# details of the failures can be sorted out first and then your operating
2630# system supplier can be alerted about these anomalies.
2631#
2632EOW
2633 $didwarn = 1;
2634 }
2635}
2636
2637# Tell which locales were okay and which were not.
2638
2639if ($didwarn) {
2640 my (@s, @F);
2641
2642 foreach my $l (@Locale) {
2643 my $p = 0;
2644 if ($setlocale_failed{$l}) {
2645 $p++;
2646 }
2647 else {
2648 foreach my $t
2649 ($first_locales_test_number..$final_locales_test_number)
2650 {
2651 $p++ if $Problem{$t}{$l};
2652 }
2653 }
2654 push @s, $l if $p == 0;
2655 push @F, $l unless $p == 0;
2656 }
2657
2658 if (@s) {
2659 my $s = join(" ", @s);
2660 $s =~ s/(.{50,60}) /$1\n#\t/g;
2661
a5cf558a 2662 print
1bfe8fea
KW
2663 "# The following locales\n#\n",
2664 "#\t", $s, "\n#\n",
2665 "# tested okay.\n#\n",
2666 } else {
a5cf558a 2667 print "# None of your locales were fully okay.\n";
1bfe8fea
KW
2668 }
2669
2670 if (@F) {
2671 my $F = join(" ", @F);
2672 $F =~ s/(.{50,60}) /$1\n#\t/g;
2673
c7e7f6b7
KW
2674 my $details = "";
2675 unless ($debug) {
2676 $details = "# For more details, rerun, with environment variable PERL_DEBUG_FULL_TEST=1.\n";
2677 }
2678 elsif ($debug == 1) {
2679 $details = "# For even more details, rerun, with environment variable PERL_DEBUG_FULL_TEST=2.\n";
2680 }
2681
a5cf558a 2682 print
1bfe8fea
KW
2683 "# The following locales\n#\n",
2684 "#\t", $F, "\n#\n",
2685 "# had problems.\n#\n",
c7e7f6b7 2686 $details;
1bfe8fea 2687 } else {
a5cf558a 2688 print "# None of your locales were broken.\n";
1bfe8fea
KW
2689 }
2690}
2691
852b6749
KW
2692if (exists $known_bad_locales{$^O} && ! %Known_bad_locale) {
2693 $test_num++;
2694 print "ok $test_num $^O no longer has known bad locales # TODO\n";
2695}
2696
fdf053ee 2697print "1..$test_num\n";
906f284f 2698
90248788 2699# eof