3 # This tests plain 'use locale' and adorned 'use locale ":not_characters"'
4 # Because these pragmas are compile time, and I (khw) am trying to test
5 # without using 'eval' as much as possible, which might cloud the issue, the
6 # crucial parts of the code are duplicated in a block for each pragma.
8 # Unfortunately, many systems have defective locale definitions. This test
9 # file looks for both perl bugs and bugs in the system's locale definitions.
10 # It can be difficult to tease apart which is which. For the latter, there
11 # are tests that are based on the POSIX standard. A character isn't supposed
12 # to be both a space and graphic, for example. Another example is if a
13 # character is the uppercase of another, that other should be the lowercase of
14 # the first. Including tests for these allows you to test for defective
15 # locales, as described in perllocale. The way this file distinguishes
16 # between defective locales, and perl bugs is to see what percentage of
17 # locales fail a given test. If it's a lot, then it's more likely to be a
18 # perl bug; only a few, those particular locales are likely defective. In
19 # that case the failing tests are marked TODO. (They should be reported to
20 # the vendor, however; but it's not perl's problem.) In some cases, this
21 # script has caused tickets to be filed against perl which turn out to be the
22 # platform's bug, but a higher percentage of locales are failing than the
23 # built-in cut-off point. For those platforms, code has been added to
24 # increase the cut-off, so those platforms don't trigger failing test reports.
25 # Ideally, the platforms would get fixed and that code would be changed to
26 # only kick-in when run on versions that are earlier than the fixed one. But,
27 # this rarely happens in practice.
29 # To make a TODO test, add the string 'TODO' to its %test_names value
31 my $is_ebcdic = ord("A") == 193;
34 no warnings 'locale'; # We test even weird locales; and do some scary things
37 binmode STDOUT, ':utf8';
38 binmode STDERR, ':utf8';
44 require './loc_tools.pl';
45 unless (locales_enabled('LC_CTYPE')) {
50 require Config; import Config;
56 # =1 adds debugging output; =2 increases the verbosity somewhat
57 our $debug = $ENV{PERL_DEBUG_FULL_TEST} // 0;
59 # Certain tests have been shown to be problematical for a few locales. Don't
60 # fail them unless at least this percentage of the tested locales fail.
61 # On AIX machines, many locales call a no-break space a graphic.
62 # (There aren't 1000 locales currently in existence, so 99.9 works)
63 # EBCDIC os390 has more locales fail than normal, because it has locales that
64 # move various critical characters like '['.
65 my $acceptable_failure_percentage = ($os =~ / ^ ( aix ) $ /x)
67 : ($os =~ / ^ ( os390 ) $ /x)
71 # The list of test numbers of the problematic tests.
72 my %problematical_tests;
74 # If any %problematical_tests fails in one of these locales, it is
76 my %known_bad_locales = (
77 irix => qr/ ^ (?: cs | hu | sk ) $/x,
78 darwin => qr/ ^ lt_LT.ISO8859 /ix,
79 os390 => qr/ ^ italian /ix,
80 netbsd => qr/\bISO8859-2\b/i,
82 # This may be the same bug as the cygwin below; it's
83 # generating malformed UTF-8 on the radix being
85 solaris => qr/ ^ ( ar_ | pa_ ) /x,
88 # cygwin isn't returning proper radix length in this locale, but supposedly to
89 # be fixed in later versions.
90 if ($os eq 'cygwin' && version->new(($Config{osvers} =~ /^(\d+(?:\.\d+)+)/)[0]) le v2.4.1) {
91 $known_bad_locales{'cygwin'} = qr/ ^ ps_AF /ix;
96 my $dumper = Dumpvalue->new(
103 return unless $debug;
104 my($mess) = join "", '# ', @_;
106 print STDERR $dumper->stringify($mess,1), "\n";
115 return unless $debug > 1;
120 printf STDERR @_ if $debug;
128 my ($result, $message) = @_;
129 $message = "" unless defined $message;
131 print 'not ' unless ($result);
132 print "ok " . ++$test_num;
135 return ($result) ? 1 : 0;
139 return ok 1, "skipped: " . shift;
146 # First we'll do a lot of taint checking for locales.
147 # This is the easiest to test, actually, as any locale,
148 # even the default locale will taint under 'use locale'.
150 sub is_tainted { # hello, camel two.
151 no warnings 'uninitialized' ;
154 not eval { $dummy = join("", @_), kill 0; 1 }
157 sub check_taint ($;$) {
158 my $message_tail = $_[1] // "";
160 # Extra blanks are so aligns with taint_not output
161 $message_tail = ": $message_tail" if $message_tail;
162 ok is_tainted($_[0]), "verify that is tainted$message_tail";
165 sub check_taint_not ($;$) {
166 my $message_tail = $_[1] // "";
167 $message_tail = ": $message_tail" if $message_tail;
168 ok((not is_tainted($_[0])), "verify that isn't tainted$message_tail");
171 foreach my $category (qw(ALL COLLATE CTYPE MESSAGES MONETARY NUMERIC TIME)) {
172 my $short_result = locales_enabled($category);
173 ok ($short_result == 0 || $short_result == 1,
174 "Verify locales_enabled('$category') returns 0 or 1");
175 debug("locales_enabled('$category') returned '$short_result'");
176 my $long_result = locales_enabled("LC_$category");
177 if (! ok ($long_result == $short_result,
178 " and locales_enabled('LC_$category') returns "
181 debug("locales_enabled('LC_$category') returned $long_result");
185 "\tb\t" =~ /^m?(\s)(.*)\1$/;
186 check_taint_not $&, "not tainted outside 'use locale'";
189 use locale; # engage locale and therefore locale taint.
191 # BE SURE TO COPY ANYTHING YOU ADD to these tests to the block below for
194 check_taint_not $a, '$a';
196 check_taint uc($a), 'uc($a)';
197 check_taint "\U$a", '"\U$a"';
198 check_taint ucfirst($a), 'ucfirst($a)';
199 check_taint "\u$a", '"\u$a"';
200 check_taint lc($a), 'lc($a)';
201 check_taint fc($a), 'fc($a)';
202 check_taint "\L$a", '"\L$a"';
203 check_taint "\F$a", '"\F$a"';
204 check_taint lcfirst($a), 'lcfirst($a)';
205 check_taint "\l$a", '"\l$a"';
207 check_taint_not sprintf('%e', 123.456), "sprintf('%e', 123.456)";
208 check_taint_not sprintf('%f', 123.456), "sprintf('%f', 123.456)";
209 check_taint_not sprintf('%g', 123.456), "sprintf('%g', 123.456)";
210 check_taint_not sprintf('%d', 123.456), "sprintf('%d', 123.456)";
211 check_taint_not sprintf('%x', 123.456), "sprintf('%x', 123.456)";
213 $_ = $a; # untaint $_
215 $_ = uc($a); # taint $_
217 check_taint $_, '$_ = uc($a)';
219 /(\w)/; # taint $&, $`, $', $+, $1.
220 check_taint $&, "\$& from /(\\w)/";
221 check_taint $`, "\t\$`";
222 check_taint $', "\t\$'";
223 check_taint $+, "\t\$+";
224 check_taint $1, "\t\$1";
225 check_taint_not $2, "\t\$2";
227 /(.)/; # untaint $&, $`, $', $+, $1.
228 check_taint_not $&, "\$& from /(.)/";
229 check_taint_not $`, "\t\$`";
230 check_taint_not $', "\t\$'";
231 check_taint_not $+, "\t\$+";
232 check_taint_not $1, "\t\$1";
233 check_taint_not $2, "\t\$2";
235 /(\W)/; # taint $&, $`, $', $+, $1.
236 check_taint $&, "\$& from /(\\W)/";
237 check_taint $`, "\t\$`";
238 check_taint $', "\t\$'";
239 check_taint $+, "\t\$+";
240 check_taint $1, "\t\$1";
241 check_taint_not $2, "\t\$2";
243 /(.)/; # untaint $&, $`, $', $+, $1.
244 check_taint_not $&, "\$& from /(.)/";
245 check_taint_not $`, "\t\$`";
246 check_taint_not $', "\t\$'";
247 check_taint_not $+, "\t\$+";
248 check_taint_not $1, "\t\$1";
249 check_taint_not $2, "\t\$2";
251 /(\s)/; # taint $&, $`, $', $+, $1.
252 check_taint $&, "\$& from /(\\s)/";
253 check_taint $`, "\t\$`";
254 check_taint $', "\t\$'";
255 check_taint $+, "\t\$+";
256 check_taint $1, "\t\$1";
257 check_taint_not $2, "\t\$2";
259 /(.)/; # untaint $&, $`, $', $+, $1.
260 check_taint_not $&, "\$& from /(.)/";
262 /(\S)/; # taint $&, $`, $', $+, $1.
263 check_taint $&, "\$& from /(\\S)/";
264 check_taint $`, "\t\$`";
265 check_taint $', "\t\$'";
266 check_taint $+, "\t\$+";
267 check_taint $1, "\t\$1";
268 check_taint_not $2, "\t\$2";
270 /(.)/; # untaint $&, $`, $', $+, $1.
271 check_taint_not $&, "\$& from /(.)/";
273 "0" =~ /(\d)/; # taint $&, $`, $', $+, $1.
274 check_taint $&, "\$& from /(\\d)/";
275 check_taint $`, "\t\$`";
276 check_taint $', "\t\$'";
277 check_taint $+, "\t\$+";
278 check_taint $1, "\t\$1";
279 check_taint_not $2, "\t\$2";
281 /(.)/; # untaint $&, $`, $', $+, $1.
282 check_taint_not $&, "\$& from /(.)/";
284 /(\D)/; # taint $&, $`, $', $+, $1.
285 check_taint $&, "\$& from /(\\D)/";
286 check_taint $`, "\t\$`";
287 check_taint $', "\t\$'";
288 check_taint $+, "\t\$+";
289 check_taint $1, "\t\$1";
290 check_taint_not $2, "\t\$2";
292 /(.)/; # untaint $&, $`, $', $+, $1.
293 check_taint_not $&, "\$& from /(.)/";
295 /([[:alnum:]])/; # taint $&, $`, $', $+, $1.
296 check_taint $&, "\$& from /([[:alnum:]])/";
297 check_taint $`, "\t\$`";
298 check_taint $', "\t\$'";
299 check_taint $+, "\t\$+";
300 check_taint $1, "\t\$1";
301 check_taint_not $2, "\t\$2";
303 /(.)/; # untaint $&, $`, $', $+, $1.
304 check_taint_not $&, "\$& from /(.)/";
306 /([[:^alnum:]])/; # taint $&, $`, $', $+, $1.
307 check_taint $&, "\$& from /([[:^alnum:]])/";
308 check_taint $`, "\t\$`";
309 check_taint $', "\t\$'";
310 check_taint $+, "\t\$+";
311 check_taint $1, "\t\$1";
312 check_taint_not $2, "\t\$2";
314 "a" =~ /(a)|(\w)/; # taint $&, $`, $', $+, $1.
315 check_taint $&, "\$& from /(a)|(\\w)/";
316 check_taint $`, "\t\$`";
317 check_taint $', "\t\$'";
318 check_taint $+, "\t\$+";
319 check_taint $1, "\t\$1";
320 ok($1 eq 'a', ("\t" x 5) . "\$1 is 'a'");
321 ok(! defined $2, ("\t" x 5) . "\$2 is undefined");
322 check_taint_not $2, "\t\$2";
323 check_taint_not $3, "\t\$3";
325 /(.)/; # untaint $&, $`, $', $+, $1.
326 check_taint_not $&, "\$& from /(.)/";
328 "\N{CYRILLIC SMALL LETTER A}" =~ /(\N{CYRILLIC CAPITAL LETTER A})/i; # no tainting because no locale dependence
329 check_taint_not $&, "\$& from /(\\N{CYRILLIC CAPITAL LETTER A})/i";
330 check_taint_not $`, "\t\$`";
331 check_taint_not $', "\t\$'";
332 check_taint_not $+, "\t\$+";
333 check_taint_not $1, "\t\$1";
334 ok($1 eq "\N{CYRILLIC SMALL LETTER A}", ("\t" x 4) . "\t\$1 is 'small cyrillic a'");
335 check_taint_not $2, "\t\$2";
337 /(.)/; # untaint $&, $`, $', $+, $1.
338 check_taint_not $&, "\$& from /./";
340 "(\N{KELVIN SIGN})" =~ /(\N{KELVIN SIGN})/i; # taints because depends on locale
341 check_taint $&, "\$& from /(\\N{KELVIN SIGN})/i";
342 check_taint $`, "\t\$`";
343 check_taint $', "\t\$'";
344 check_taint $+, "\t\$+";
345 check_taint $1, "\t\$1";
346 check_taint_not $2, "\t\$2";
348 /(.)/; # untaint $&, $`, $', $+, $1.
349 check_taint_not $&, "\$& from /(.)/";
351 "a:" =~ /(.)\b(.)/; # taint $&, $`, $', $+, $1.
352 check_taint $&, "\$& from /(.)\\b(.)/";
353 check_taint $`, "\t\$`";
354 check_taint $', "\t\$'";
355 check_taint $+, "\t\$+";
356 check_taint $1, "\t\$1";
357 check_taint $2, "\t\$2";
358 check_taint_not $3, "\t\$3";
360 /(.)/; # untaint $&, $`, $', $+, $1.
361 check_taint_not $&, "\$& from /./";
363 "aa" =~ /(.)\B(.)/; # taint $&, $`, $', $+, $1.
364 check_taint $&, "\$& from /(.)\\B(.)/";
365 check_taint $`, "\t\$`";
366 check_taint $', "\t\$'";
367 check_taint $+, "\t\$+";
368 check_taint $1, "\t\$1";
369 check_taint $2, "\t\$2";
370 check_taint_not $3, "\t\$3";
372 /(.)/; # untaint $&, $`, $', $+, $1.
373 check_taint_not $&, "\$& from /./";
375 "aaa" =~ /(.).(\1)/i; # notaint because not locale dependent
376 check_taint_not $&, "\$ & from /(.).(\\1)/";
377 check_taint_not $`, "\t\$`";
378 check_taint_not $', "\t\$'";
379 check_taint_not $+, "\t\$+";
380 check_taint_not $1, "\t\$1";
381 check_taint_not $2, "\t\$2";
382 check_taint_not $3, "\t\$3";
384 /(.)/; # untaint $&, $`, $', $+, $1.
385 check_taint_not $&, "\$ & from /./";
387 $_ = $a; # untaint $_
389 check_taint_not $_, 'untainting $_ works';
391 /(b)/; # this must not taint
392 check_taint_not $&, "\$ & from /(b)/";
393 check_taint_not $`, "\t\$`";
394 check_taint_not $', "\t\$'";
395 check_taint_not $+, "\t\$+";
396 check_taint_not $1, "\t\$1";
397 check_taint_not $2, "\t\$2";
399 $_ = $a; # untaint $_
401 check_taint_not $_, 'untainting $_ works';
403 $b = uc($a); # taint $b
404 s/(.+)/$b/; # this must taint only the $_
406 check_taint $_, '$_ (wasn\'t tainted) from s/(.+)/$b/ where $b is tainted';
407 check_taint_not $&, "\t\$&";
408 check_taint_not $`, "\t\$`";
409 check_taint_not $', "\t\$'";
410 check_taint_not $+, "\t\$+";
411 check_taint_not $1, "\t\$1";
412 check_taint_not $2, "\t\$2";
414 $_ = $a; # untaint $_
416 s/(.+)/b/; # this must not taint
417 check_taint_not $_, '$_ (wasn\'t tainted) from s/(.+)/b/';
418 check_taint_not $&, "\t\$&";
419 check_taint_not $`, "\t\$`";
420 check_taint_not $', "\t\$'";
421 check_taint_not $+, "\t\$+";
422 check_taint_not $1, "\t\$1";
423 check_taint_not $2, "\t\$2";
425 $b = $a; # untaint $b
427 ($b = $a) =~ s/\w/$&/;
428 check_taint $b, '$b from ($b = $a) =~ s/\w/$&/'; # $b should be tainted.
429 check_taint_not $a, '$a from ($b = $a) =~ s/\w/$&/'; # $a should be not.
431 $_ = $a; # untaint $_
433 s/(\w)/\l$1/; # this must taint
434 check_taint $_, '$_ (wasn\'t tainted) from s/(\w)/\l$1/,'; # this must taint
435 check_taint $&, "\t\$&";
436 check_taint $`, "\t\$`";
437 check_taint $', "\t\$'";
438 check_taint $+, "\t\$+";
439 check_taint $1, "\t\$1";
440 check_taint_not $2, "\t\$2";
442 $_ = $a; # untaint $_
444 s/(\w)/\L$1/; # this must taint
445 check_taint $_, '$_ (wasn\'t tainted) from s/(\w)/\L$1/,';
446 check_taint $&, "\t\$&";
447 check_taint $`, "\t\$`";
448 check_taint $', "\t\$'";
449 check_taint $+, "\t\$+";
450 check_taint $1, "\t\$1";
451 check_taint_not $2, "\t\$2";
453 $_ = $a; # untaint $_
455 s/(\w)/\u$1/; # this must taint
456 check_taint $_, '$_ (wasn\'t tainted) from s/(\w)/\u$1/';
457 check_taint $&, "\t\$&";
458 check_taint $`, "\t\$`";
459 check_taint $', "\t\$'";
460 check_taint $+, "\t\$+";
461 check_taint $1, "\t\$1";
462 check_taint_not $2, "\t\$2";
464 $_ = $a; # untaint $_
466 s/(\w)/\U$1/; # this must taint
467 check_taint $_, '$_ (wasn\'t tainted) from s/(\w)/\U$1/';
468 check_taint $&, "\t\$&";
469 check_taint $`, "\t\$`";
470 check_taint $', "\t\$'";
471 check_taint $+, "\t\$+";
472 check_taint $1, "\t\$1";
473 check_taint_not $2, "\t\$2";
475 # After all this tainting $a should be cool.
477 check_taint_not $a, '$a still not tainted';
480 check_taint_not $1, '"a" =~ /([a-z])/';
481 "foo.bar_baz" =~ /^(.*)[._](.*?)$/; # Bug 120675
482 check_taint_not $1, '"foo.bar_baz" =~ /^(.*)[._](.*?)$/';
484 # BE SURE TO COPY ANYTHING YOU ADD to the block below
486 { # This is just the previous tests copied here with a different
487 # compile-time pragma.
489 use locale ':not_characters'; # engage restricted locale with different
491 check_taint_not $a, '$a';
493 check_taint_not uc($a), 'uc($a)';
494 check_taint_not "\U$a", '"\U$a"';
495 check_taint_not ucfirst($a), 'ucfirst($a)';
496 check_taint_not "\u$a", '"\u$a"';
497 check_taint_not lc($a), 'lc($a)';
498 check_taint_not fc($a), 'fc($a)';
499 check_taint_not "\L$a", '"\L$a"';
500 check_taint_not "\F$a", '"\F$a"';
501 check_taint_not lcfirst($a), 'lcfirst($a)';
502 check_taint_not "\l$a", '"\l$a"';
504 check_taint_not sprintf('%e', 123.456), "sprintf('%e', 123.456)";
505 check_taint_not sprintf('%f', 123.456), "sprintf('%f', 123.456)";
506 check_taint_not sprintf('%g', 123.456), "sprintf('%g', 123.456)";
507 check_taint_not sprintf('%d', 123.456), "sprintf('%d', 123.456)";
508 check_taint_not sprintf('%x', 123.456), "sprintf('%x', 123.456)";
510 $_ = $a; # untaint $_
514 check_taint_not $_, '$_ = uc($a)';
517 check_taint_not $&, "\$& from /(\\w)/";
518 check_taint_not $`, "\t\$`";
519 check_taint_not $', "\t\$'";
520 check_taint_not $+, "\t\$+";
521 check_taint_not $1, "\t\$1";
522 check_taint_not $2, "\t\$2";
524 /(.)/; # untaint $&, $`, $', $+, $1.
525 check_taint_not $&, "\$& from /(.)/";
526 check_taint_not $`, "\t\$`";
527 check_taint_not $', "\t\$'";
528 check_taint_not $+, "\t\$+";
529 check_taint_not $1, "\t\$1";
530 check_taint_not $2, "\t\$2";
533 check_taint_not $&, "\$& from /(\\W)/";
534 check_taint_not $`, "\t\$`";
535 check_taint_not $', "\t\$'";
536 check_taint_not $+, "\t\$+";
537 check_taint_not $1, "\t\$1";
538 check_taint_not $2, "\t\$2";
540 /(.)/; # untaint $&, $`, $', $+, $1.
541 check_taint_not $&, "\$& from /(.)/";
542 check_taint_not $`, "\t\$`";
543 check_taint_not $', "\t\$'";
544 check_taint_not $+, "\t\$+";
545 check_taint_not $1, "\t\$1";
546 check_taint_not $2, "\t\$2";
549 check_taint_not $&, "\$& from /(\\s)/";
550 check_taint_not $`, "\t\$`";
551 check_taint_not $', "\t\$'";
552 check_taint_not $+, "\t\$+";
553 check_taint_not $1, "\t\$1";
554 check_taint_not $2, "\t\$2";
556 /(.)/; # untaint $&, $`, $', $+, $1.
557 check_taint_not $&, "\$& from /(.)/";
560 check_taint_not $&, "\$& from /(\\S)/";
561 check_taint_not $`, "\t\$`";
562 check_taint_not $', "\t\$'";
563 check_taint_not $+, "\t\$+";
564 check_taint_not $1, "\t\$1";
565 check_taint_not $2, "\t\$2";
567 /(.)/; # untaint $&, $`, $', $+, $1.
568 check_taint_not $&, "\$& from /(.)/";
571 check_taint_not $&, "\$& from /(\\d)/";
572 check_taint_not $`, "\t\$`";
573 check_taint_not $', "\t\$'";
574 check_taint_not $+, "\t\$+";
575 check_taint_not $1, "\t\$1";
576 check_taint_not $2, "\t\$2";
578 /(.)/; # untaint $&, $`, $', $+, $1.
579 check_taint_not $&, "\$& from /(.)/";
582 check_taint_not $&, "\$& from /(\\D)/";
583 check_taint_not $`, "\t\$`";
584 check_taint_not $', "\t\$'";
585 check_taint_not $+, "\t\$+";
586 check_taint_not $1, "\t\$1";
587 check_taint_not $2, "\t\$2";
589 /(.)/; # untaint $&, $`, $', $+, $1.
590 check_taint_not $&, "\$& from /(.)/";
593 check_taint_not $&, "\$& from /([[:alnum:]])/";
594 check_taint_not $`, "\t\$`";
595 check_taint_not $', "\t\$'";
596 check_taint_not $+, "\t\$+";
597 check_taint_not $1, "\t\$1";
598 check_taint_not $2, "\t\$2";
600 /(.)/; # untaint $&, $`, $', $+, $1.
601 check_taint_not $&, "\$& from /(.)/";
604 check_taint_not $&, "\$& from /([[:^alnum:]])/";
605 check_taint_not $`, "\t\$`";
606 check_taint_not $', "\t\$'";
607 check_taint_not $+, "\t\$+";
608 check_taint_not $1, "\t\$1";
609 check_taint_not $2, "\t\$2";
612 check_taint_not $&, "\$& from /(a)|(\\w)/";
613 check_taint_not $`, "\t\$`";
614 check_taint_not $', "\t\$'";
615 check_taint_not $+, "\t\$+";
616 check_taint_not $1, "\t\$1";
617 ok($1 eq 'a', ("\t" x 5) . "\$1 is 'a'");
618 ok(! defined $2, ("\t" x 5) . "\$2 is undefined");
619 check_taint_not $2, "\t\$2";
620 check_taint_not $3, "\t\$3";
622 /(.)/; # untaint $&, $`, $', $+, $1.
623 check_taint_not $&, "\$& from /(.)/";
625 "\N{CYRILLIC SMALL LETTER A}" =~ /(\N{CYRILLIC CAPITAL LETTER A})/i;
626 check_taint_not $&, "\$& from /(\\N{CYRILLIC CAPITAL LETTER A})/i";
627 check_taint_not $`, "\t\$`";
628 check_taint_not $', "\t\$'";
629 check_taint_not $+, "\t\$+";
630 check_taint_not $1, "\t\$1";
631 ok($1 eq "\N{CYRILLIC SMALL LETTER A}", ("\t" x 4) . "\t\$1 is 'small cyrillic a'");
632 check_taint_not $2, "\t\$2";
634 /(.)/; # untaint $&, $`, $', $+, $1.
635 check_taint_not $&, "\$& from /./";
637 "(\N{KELVIN SIGN})" =~ /(\N{KELVIN SIGN})/i;
638 check_taint_not $&, "\$& from /(\\N{KELVIN SIGN})/i";
639 check_taint_not $`, "\t\$`";
640 check_taint_not $', "\t\$'";
641 check_taint_not $+, "\t\$+";
642 check_taint_not $1, "\t\$1";
643 check_taint_not $2, "\t\$2";
645 /(.)/; # untaint $&, $`, $', $+, $1.
646 check_taint_not $&, "\$& from /(.)/";
649 check_taint_not $&, "\$& from /(.)\\b(.)/";
650 check_taint_not $`, "\t\$`";
651 check_taint_not $', "\t\$'";
652 check_taint_not $+, "\t\$+";
653 check_taint_not $1, "\t\$1";
654 check_taint_not $2, "\t\$2";
655 check_taint_not $3, "\t\$3";
657 /(.)/; # untaint $&, $`, $', $+, $1.
658 check_taint_not $&, "\$& from /./";
661 check_taint_not $&, "\$& from /(.)\\B(.)/";
662 check_taint_not $`, "\t\$`";
663 check_taint_not $', "\t\$'";
664 check_taint_not $+, "\t\$+";
665 check_taint_not $1, "\t\$1";
666 check_taint_not $2, "\t\$2";
667 check_taint_not $3, "\t\$3";
669 /(.)/; # untaint $&, $`, $', $+, $1.
670 check_taint_not $&, "\$& from /./";
672 "aaa" =~ /(.).(\1)/i; # notaint because not locale dependent
673 check_taint_not $&, "\$ & from /(.).(\\1)/";
674 check_taint_not $`, "\t\$`";
675 check_taint_not $', "\t\$'";
676 check_taint_not $+, "\t\$+";
677 check_taint_not $1, "\t\$1";
678 check_taint_not $2, "\t\$2";
679 check_taint_not $3, "\t\$3";
681 /(.)/; # untaint $&, $`, $', $+, $1.
682 check_taint_not $&, "\$ & from /./";
684 $_ = $a; # untaint $_
686 check_taint_not $_, 'untainting $_ works';
689 check_taint_not $&, "\$ & from /(b)/";
690 check_taint_not $`, "\t\$`";
691 check_taint_not $', "\t\$'";
692 check_taint_not $+, "\t\$+";
693 check_taint_not $1, "\t\$1";
694 check_taint_not $2, "\t\$2";
696 $_ = $a; # untaint $_
698 check_taint_not $_, 'untainting $_ works';
701 check_taint_not $_, '$_ (wasn\'t tainted) from s/(.+)/b/';
702 check_taint_not $&, "\t\$&";
703 check_taint_not $`, "\t\$`";
704 check_taint_not $', "\t\$'";
705 check_taint_not $+, "\t\$+";
706 check_taint_not $1, "\t\$1";
707 check_taint_not $2, "\t\$2";
709 $b = $a; # untaint $b
711 ($b = $a) =~ s/\w/$&/;
712 check_taint_not $b, '$b from ($b = $a) =~ s/\w/$&/';
713 check_taint_not $a, '$a from ($b = $a) =~ s/\w/$&/';
715 $_ = $a; # untaint $_
718 check_taint_not $_, '$_ (wasn\'t tainted) from s/(\w)/\l$1/,'; # this must taint
719 check_taint_not $&, "\t\$&";
720 check_taint_not $`, "\t\$`";
721 check_taint_not $', "\t\$'";
722 check_taint_not $+, "\t\$+";
723 check_taint_not $1, "\t\$1";
724 check_taint_not $2, "\t\$2";
726 $_ = $a; # untaint $_
729 check_taint_not $_, '$_ (wasn\'t tainted) from s/(\w)/\L$1/,';
730 check_taint_not $&, "\t\$&";
731 check_taint_not $`, "\t\$`";
732 check_taint_not $', "\t\$'";
733 check_taint_not $+, "\t\$+";
734 check_taint_not $1, "\t\$1";
735 check_taint_not $2, "\t\$2";
737 $_ = $a; # untaint $_
740 check_taint_not $_, '$_ (wasn\'t tainted) from s/(\w)/\u$1/';
741 check_taint_not $&, "\t\$&";
742 check_taint_not $`, "\t\$`";
743 check_taint_not $', "\t\$'";
744 check_taint_not $+, "\t\$+";
745 check_taint_not $1, "\t\$1";
746 check_taint_not $2, "\t\$2";
748 $_ = $a; # untaint $_
751 check_taint_not $_, '$_ (wasn\'t tainted) from s/(\w)/\U$1/';
752 check_taint_not $&, "\t\$&";
753 check_taint_not $`, "\t\$`";
754 check_taint_not $', "\t\$'";
755 check_taint_not $+, "\t\$+";
756 check_taint_not $1, "\t\$1";
757 check_taint_not $2, "\t\$2";
759 # After all this tainting $a should be cool.
761 check_taint_not $a, '$a still not tainted';
764 check_taint_not $1, '"a" =~ /([a-z])/';
765 "foo.bar_baz" =~ /^(.*)[._](.*?)$/; # Bug 120675
766 check_taint_not $1, '"foo.bar_baz" =~ /^(.*)[._](.*?)$/';
770 # Here are in scope of 'use locale'
772 # I think we've seen quite enough of taint.
773 # Let us do some *real* locale work now,
774 # unless setlocale() is missing (i.e. minitest).
776 # The test number before our first setlocale()
777 my $final_without_setlocale = $test_num;
781 debug "Scanning for locales...\n";
783 require POSIX; import POSIX ':locale_h';
785 my $categories = [ 'LC_CTYPE', 'LC_NUMERIC', 'LC_ALL' ];
786 debug "Scanning for just compatible";
787 my @Locale = find_locales($categories);
788 debug "Scanning for even incompatible";
789 my @include_incompatible_locales = find_locales($categories,
790 'even incompatible locales');
792 # The locales included in the incompatible list that aren't in the compatible
794 my @incompatible_locales;
796 if (@Locale < @include_incompatible_locales) {
800 foreach my $item (@include_incompatible_locales) {
801 push @incompatible_locales, $item unless exists $seen{$item};
804 # For each bad locale, switch into it to find out why it's incompatible
805 for my $bad_locale (@incompatible_locales) {
808 use warnings 'locale';
810 local $SIG{__WARN__} = sub {
813 push @warnings, ($warning =~ s/\n/\n# /sgr);
816 debug "Trying incompatible $bad_locale";
817 my $ret = setlocale(&POSIX::LC_CTYPE, $bad_locale);
819 my $message = "testing of locale '$bad_locale' is skipped";
821 skip $message . ":\n# " . join "\n# ", @warnings;
825 . " setlocale(&POSIX::LC_CTYPE, '$bad_locale') failed");
828 fail $message . ", because it is was found to be incompatible with"
829 . " Perl, but could not discern reason";
840 print "1..$test_num\n";
845 setlocale(&POSIX::LC_ALL, "C");
851 my %Known_bad_locale; # Failed test for a locale known to be bad
853 my @Added_alpha; # Alphas that aren't in the C locale.
857 # This returns a display string denoting the input parameter @_, each
858 # entry of which is a single character in the range 0-255. The first part
859 # of the output is a string of the characters in @_ that are ASCII
860 # graphics, and hence unambiguously displayable. They are given by code
861 # point order. The second part is the remaining code points, the ordinals
862 # of which are each displayed as 2-digit hex. Blanks are inserted so as
863 # to keep anything from the first part looking like a 2-digit hex number.
866 my @chars = sort { ord $a <=> ord $b } @_;
870 push @chars, chr(258); # This sentinel simplifies the loop termination
872 foreach my $i (0 .. @chars - 1) {
873 my $char = $chars[$i];
877 # We avoid using [:posix:] classes, as these are being tested in this
878 # file. Each equivalence class below is for things that can appear in
879 # a range; those that can't be in a range have class -1. 0 for those
880 # which should be output in hex; and >0 for the other ranges
881 if ($char =~ /[A-Z]/) {
884 elsif ($char =~ /[a-z]/) {
887 elsif ($char =~ /[0-9]/) {
890 # Uncomment to get literal punctuation displayed instead of hex
891 #elsif ($char =~ /[[\]!"#\$\%&\'()*+,.\/:\\;<=>?\@\^_`{|}~-]/) {
892 # $class = -1; # Punct never appears in a range
895 $class = 0; # Output in hex
898 if (! defined $range_start) {
900 $output .= " " . $char;
903 $range_start = ord $char;
904 $start_class = $class;
906 } # A range ends if not consecutive, or the class-type changes
907 elsif (ord $char != ($range_end = ord($chars[$i-1])) + 1
908 || $class != $start_class)
911 # Here, the current character is not in the range. This means the
912 # previous character must have been. Output the range up through
914 my $range_length = $range_end - $range_start + 1;
915 if ($start_class > 0) {
916 $output .= " " . chr($range_start);
917 $output .= "-" . chr($range_end) if $range_length > 1;
920 $output .= sprintf(" %02X", $range_start);
921 $output .= sprintf("-%02X", $range_end) if $range_length > 1;
924 # Handle the new current character, as potentially beginning a new
938 # Displays the string unambiguously. ASCII printables are always output
939 # as-is, though perhaps separated by blanks from other characters. If
940 # entirely printable ASCII, just returns the string. Otherwise if valid
941 # UTF-8 it uses the character names for non-printable-ASCII. Otherwise it
942 # outputs hex for each non-ASCII-printable byte.
944 return $string if $string =~ / ^ [[:print:]]* $/xa;
947 my $prev_was_punct = 1; # Beginning is considered punct
948 if (utf8::valid($string) && utf8::is_utf8($string)) {
950 foreach my $char (split "", $string) {
952 # Keep punctuation adjacent to other characters; otherwise
953 # separate them with a blank
954 if ($char =~ /[[:punct:]]/a) {
958 elsif ($char =~ /[[:print:]]/a) {
959 $result .= " " unless $prev_was_punct;
964 $result .= " " unless $prev_was_punct;
965 my $name = charnames::viacode(ord $char);
966 $result .= (defined $name) ? $name : ':unknown:';
973 foreach my $char (split "", $string) {
974 if ($char =~ /[[:punct:]]/a) {
978 elsif ($char =~ /[[:print:]]/a) {
979 $result .= " " unless $prev_was_punct;
984 $result .= " " unless $prev_was_punct;
985 $result .= sprintf("%02X", ord $char);
995 my ($Locale, $i, $pass_fail, $message) = @_;
997 push @{$Okay{$i}}, $Locale;
1001 $message = " ($message)" if $message;
1002 $Known_bad_locale{$i}{$Locale} = 1 if exists $known_bad_locales{$os}
1003 && $Locale =~ $known_bad_locales{$os};
1004 $Problem{$i}{$Locale} = 1;
1005 debug "failed $i ($test_names{$i}) with locale '$Locale'$message\n";
1009 sub report_multi_result {
1010 my ($Locale, $i, $results_ref) = @_;
1012 # $results_ref points to an array, each element of which is a character that was
1013 # in error for this test numbered '$i'. If empty, the test passed
1016 if (@$results_ref) {
1017 $message = join " ", "for", disp_chars(@$results_ref);
1019 report_result($Locale, $i, @$results_ref == 0, $message);
1022 my $first_locales_test_number = $final_without_setlocale
1023 + 1 + @incompatible_locales;
1024 my $locales_test_number;
1025 my $not_necessarily_a_problem_test_number;
1026 my $first_casing_test_number;
1027 my %setlocale_failed; # List of locales that setlocale() didn't work on
1029 foreach my $Locale (@Locale) {
1030 $locales_test_number = $first_locales_test_number - 1;
1032 debug "Locale = $Locale\n";
1034 unless (setlocale(&POSIX::LC_ALL, $Locale)) {
1035 $setlocale_failed{$Locale} = $Locale;
1039 # We test UTF-8 locales only under ':not_characters'; It is easier to
1040 # test them in other test files than here. Non- UTF-8 locales are tested
1041 # only under plain 'use locale', as otherwise we would have to convert
1042 # everything in them to Unicode.
1044 my %UPPER = (); # All alpha X for which uc(X) == X and lc(X) != X
1045 my %lower = (); # All alpha X for which lc(X) == X and uc(X) != X
1046 my %BoThCaSe = (); # All alpha X for which uc(X) == lc(X) == X
1048 my $is_utf8_locale = is_locale_utf8($Locale);
1050 debug "is utf8 locale? = $is_utf8_locale\n";
1052 debug "radix = " . disp_str(localeconv()->{decimal_point}) . "\n";
1054 if (! $is_utf8_locale) {
1056 @{$posixes{'word'}} = grep /\w/, map { chr } 0..255;
1057 @{$posixes{'digit'}} = grep /\d/, map { chr } 0..255;
1058 @{$posixes{'space'}} = grep /\s/, map { chr } 0..255;
1059 @{$posixes{'alpha'}} = grep /[[:alpha:]]/, map {chr } 0..255;
1060 @{$posixes{'alnum'}} = grep /[[:alnum:]]/, map {chr } 0..255;
1061 @{$posixes{'ascii'}} = grep /[[:ascii:]]/, map {chr } 0..255;
1062 @{$posixes{'blank'}} = grep /[[:blank:]]/, map {chr } 0..255;
1063 @{$posixes{'cntrl'}} = grep /[[:cntrl:]]/, map {chr } 0..255;
1064 @{$posixes{'graph'}} = grep /[[:graph:]]/, map {chr } 0..255;
1065 @{$posixes{'lower'}} = grep /[[:lower:]]/, map {chr } 0..255;
1066 @{$posixes{'print'}} = grep /[[:print:]]/, map {chr } 0..255;
1067 @{$posixes{'punct'}} = grep /[[:punct:]]/, map {chr } 0..255;
1068 @{$posixes{'upper'}} = grep /[[:upper:]]/, map {chr } 0..255;
1069 @{$posixes{'xdigit'}} = grep /[[:xdigit:]]/, map {chr } 0..255;
1070 @{$posixes{'cased'}} = grep /[[:upper:][:lower:]]/i, map {chr } 0..255;
1072 # Sieve the uppercase and the lowercase.
1074 for (@{$posixes{'word'}}) {
1075 if (/[^\d_]/) { # skip digits and the _
1086 use locale ':not_characters';
1087 @{$posixes{'word'}} = grep /\w/, map { chr } 0..255;
1088 @{$posixes{'digit'}} = grep /\d/, map { chr } 0..255;
1089 @{$posixes{'space'}} = grep /\s/, map { chr } 0..255;
1090 @{$posixes{'alpha'}} = grep /[[:alpha:]]/, map {chr } 0..255;
1091 @{$posixes{'alnum'}} = grep /[[:alnum:]]/, map {chr } 0..255;
1092 @{$posixes{'ascii'}} = grep /[[:ascii:]]/, map {chr } 0..255;
1093 @{$posixes{'blank'}} = grep /[[:blank:]]/, map {chr } 0..255;
1094 @{$posixes{'cntrl'}} = grep /[[:cntrl:]]/, map {chr } 0..255;
1095 @{$posixes{'graph'}} = grep /[[:graph:]]/, map {chr } 0..255;
1096 @{$posixes{'lower'}} = grep /[[:lower:]]/, map {chr } 0..255;
1097 @{$posixes{'print'}} = grep /[[:print:]]/, map {chr } 0..255;
1098 @{$posixes{'punct'}} = grep /[[:punct:]]/, map {chr } 0..255;
1099 @{$posixes{'upper'}} = grep /[[:upper:]]/, map {chr } 0..255;
1100 @{$posixes{'xdigit'}} = grep /[[:xdigit:]]/, map {chr } 0..255;
1101 @{$posixes{'cased'}} = grep /[[:upper:][:lower:]]/i, map {chr } 0..255;
1102 for (@{$posixes{'word'}}) {
1103 if (/[^\d_]/) { # skip digits and the _
1114 # Ordered, where possible, in groups of "this is a subset of the next
1116 debug ":upper: = ", disp_chars(@{$posixes{'upper'}}), "\n";
1117 debug ":lower: = ", disp_chars(@{$posixes{'lower'}}), "\n";
1118 debug ":cased: = ", disp_chars(@{$posixes{'cased'}}), "\n";
1119 debug ":alpha: = ", disp_chars(@{$posixes{'alpha'}}), "\n";
1120 debug ":alnum: = ", disp_chars(@{$posixes{'alnum'}}), "\n";
1121 debug ' \w = ', disp_chars(@{$posixes{'word'}}), "\n";
1122 debug ":graph: = ", disp_chars(@{$posixes{'graph'}}), "\n";
1123 debug ":print: = ", disp_chars(@{$posixes{'print'}}), "\n";
1124 debug ' \d = ', disp_chars(@{$posixes{'digit'}}), "\n";
1125 debug ":xdigit: = ", disp_chars(@{$posixes{'xdigit'}}), "\n";
1126 debug ":blank: = ", disp_chars(@{$posixes{'blank'}}), "\n";
1127 debug ' \s = ', disp_chars(@{$posixes{'space'}}), "\n";
1128 debug ":punct: = ", disp_chars(@{$posixes{'punct'}}), "\n";
1129 debug ":cntrl: = ", disp_chars(@{$posixes{'cntrl'}}), "\n";
1130 debug ":ascii: = ", disp_chars(@{$posixes{'ascii'}}), "\n";
1132 foreach (keys %UPPER) {
1134 $BoThCaSe{$_}++ if exists $lower{$_};
1136 foreach (keys %lower) {
1137 $BoThCaSe{$_}++ if exists $UPPER{$_};
1139 foreach (keys %BoThCaSe) {
1145 foreach my $ord ( 0 .. 255 ) {
1146 $Unassigned{chr $ord} = 1;
1148 foreach my $class (keys %posixes) {
1149 foreach my $char (@{$posixes{$class}}) {
1150 delete $Unassigned{$char};
1154 debug "UPPER = ", disp_chars(sort { ord $a <=> ord $b } keys %UPPER), "\n";
1155 debug "lower = ", disp_chars(sort { ord $a <=> ord $b } keys %lower), "\n";
1156 debug "BoThCaSe = ", disp_chars(sort { ord $a <=> ord $b } keys %BoThCaSe), "\n";
1157 debug "Unassigned = ", disp_chars(sort { ord $a <=> ord $b } keys %Unassigned), "\n";
1161 foreach my $x (sort { ord $a <=> ord $b } keys %UPPER) {
1164 if ($is_utf8_locale) {
1165 use locale ':not_characters';
1166 $ok = $x =~ /[[:upper:]]/;
1167 $fold_ok = $x =~ /[[:lower:]]/i;
1171 $ok = $x =~ /[[:upper:]]/;
1172 $fold_ok = $x =~ /[[:lower:]]/i;
1174 push @failures, $x unless $ok;
1175 push @fold_failures, $x unless $fold_ok;
1177 $locales_test_number++;
1178 $first_casing_test_number = $locales_test_number;
1179 $test_names{$locales_test_number} = 'Verify that /[[:upper:]]/ matches all alpha X for which uc(X) == X and lc(X) != X';
1180 report_multi_result($Locale, $locales_test_number, \@failures);
1182 $locales_test_number++;
1184 $test_names{$locales_test_number} = 'Verify that /[[:lower:]]/i matches all alpha X for which uc(X) == X and lc(X) != X';
1185 report_multi_result($Locale, $locales_test_number, \@fold_failures);
1188 undef @fold_failures;
1190 foreach my $x (sort { ord $a <=> ord $b } keys %lower) {
1193 if ($is_utf8_locale) {
1194 use locale ':not_characters';
1195 $ok = $x =~ /[[:lower:]]/;
1196 $fold_ok = $x =~ /[[:upper:]]/i;
1200 $ok = $x =~ /[[:lower:]]/;
1201 $fold_ok = $x =~ /[[:upper:]]/i;
1203 push @failures, $x unless $ok;
1204 push @fold_failures, $x unless $fold_ok;
1207 $locales_test_number++;
1208 $test_names{$locales_test_number} = 'Verify that /[[:lower:]]/ matches all alpha X for which lc(X) == X and uc(X) != X';
1209 report_multi_result($Locale, $locales_test_number, \@failures);
1211 $locales_test_number++;
1212 $test_names{$locales_test_number} = 'Verify that /[[:upper:]]/i matches all alpha X for which lc(X) == X and uc(X) != X';
1213 report_multi_result($Locale, $locales_test_number, \@fold_failures);
1215 { # Find the alphabetic characters that are not considered alphabetics
1216 # in the default (C) locale.
1221 for (keys %UPPER, keys %lower, keys %BoThCaSe) {
1222 push(@Added_alpha, $_) if (/\W/);
1226 @Added_alpha = sort { ord $a <=> ord $b } @Added_alpha;
1228 debug "Added_alpha = ", disp_chars(@Added_alpha), "\n";
1230 # Cross-check the whole 8-bit character set.
1232 ++$locales_test_number;
1234 $test_names{$locales_test_number} = 'Verify that \w and [:word:] are identical';
1235 for (map { chr } 0..255) {
1236 if ($is_utf8_locale) {
1237 use locale ':not_characters';
1238 push @f, $_ unless /[[:word:]]/ == /\w/;
1241 push @f, $_ unless /[[:word:]]/ == /\w/;
1244 report_multi_result($Locale, $locales_test_number, \@f);
1246 ++$locales_test_number;
1248 $test_names{$locales_test_number} = 'Verify that \d and [:digit:] are identical';
1249 for (map { chr } 0..255) {
1250 if ($is_utf8_locale) {
1251 use locale ':not_characters';
1252 push @f, $_ unless /[[:digit:]]/ == /\d/;
1255 push @f, $_ unless /[[:digit:]]/ == /\d/;
1258 report_multi_result($Locale, $locales_test_number, \@f);
1260 ++$locales_test_number;
1262 $test_names{$locales_test_number} = 'Verify that \s and [:space:] are identical';
1263 for (map { chr } 0..255) {
1264 if ($is_utf8_locale) {
1265 use locale ':not_characters';
1266 push @f, $_ unless /[[:space:]]/ == /\s/;
1269 push @f, $_ unless /[[:space:]]/ == /\s/;
1272 report_multi_result($Locale, $locales_test_number, \@f);
1274 ++$locales_test_number;
1276 $test_names{$locales_test_number} = 'Verify that [:posix:] and [:^posix:] are mutually exclusive';
1277 for (map { chr } 0..255) {
1278 if ($is_utf8_locale) {
1279 use locale ':not_characters';
1280 push @f, $_ unless (/[[:alpha:]]/ xor /[[:^alpha:]]/) ||
1281 (/[[:alnum:]]/ xor /[[:^alnum:]]/) ||
1282 (/[[:ascii:]]/ xor /[[:^ascii:]]/) ||
1283 (/[[:blank:]]/ xor /[[:^blank:]]/) ||
1284 (/[[:cntrl:]]/ xor /[[:^cntrl:]]/) ||
1285 (/[[:digit:]]/ xor /[[:^digit:]]/) ||
1286 (/[[:graph:]]/ xor /[[:^graph:]]/) ||
1287 (/[[:lower:]]/ xor /[[:^lower:]]/) ||
1288 (/[[:print:]]/ xor /[[:^print:]]/) ||
1289 (/[[:space:]]/ xor /[[:^space:]]/) ||
1290 (/[[:upper:]]/ xor /[[:^upper:]]/) ||
1291 (/[[:word:]]/ xor /[[:^word:]]/) ||
1292 (/[[:xdigit:]]/ xor /[[:^xdigit:]]/) ||
1294 # effectively is what [:cased:] would be if it existed.
1295 (/[[:upper:][:lower:]]/i xor /[^[:upper:][:lower:]]/i);
1298 push @f, $_ unless (/[[:alpha:]]/ xor /[[:^alpha:]]/) ||
1299 (/[[:alnum:]]/ xor /[[:^alnum:]]/) ||
1300 (/[[:ascii:]]/ xor /[[:^ascii:]]/) ||
1301 (/[[:blank:]]/ xor /[[:^blank:]]/) ||
1302 (/[[:cntrl:]]/ xor /[[:^cntrl:]]/) ||
1303 (/[[:digit:]]/ xor /[[:^digit:]]/) ||
1304 (/[[:graph:]]/ xor /[[:^graph:]]/) ||
1305 (/[[:lower:]]/ xor /[[:^lower:]]/) ||
1306 (/[[:print:]]/ xor /[[:^print:]]/) ||
1307 (/[[:space:]]/ xor /[[:^space:]]/) ||
1308 (/[[:upper:]]/ xor /[[:^upper:]]/) ||
1309 (/[[:word:]]/ xor /[[:^word:]]/) ||
1310 (/[[:xdigit:]]/ xor /[[:^xdigit:]]/) ||
1311 (/[[:upper:][:lower:]]/i xor /[^[:upper:][:lower:]]/i);
1314 report_multi_result($Locale, $locales_test_number, \@f);
1316 # The rules for the relationships are given in:
1317 # http://www.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap07.html
1320 ++$locales_test_number;
1322 $test_names{$locales_test_number} = 'Verify that [:lower:] contains at least a-z';
1324 if ($is_utf8_locale) {
1325 use locale ':not_characters';
1326 push @f, $_ unless /[[:lower:]]/;
1329 push @f, $_ unless /[[:lower:]]/;
1332 report_multi_result($Locale, $locales_test_number, \@f);
1334 ++$locales_test_number;
1336 $test_names{$locales_test_number} = 'Verify that [:lower:] is a subset of [:alpha:]';
1337 for (map { chr } 0..255) {
1338 if ($is_utf8_locale) {
1339 use locale ':not_characters';
1340 push @f, $_ if /[[:lower:]]/ and ! /[[:alpha:]]/;
1343 push @f, $_ if /[[:lower:]]/ and ! /[[:alpha:]]/;
1346 report_multi_result($Locale, $locales_test_number, \@f);
1348 ++$locales_test_number;
1350 $test_names{$locales_test_number} = 'Verify that [:upper:] contains at least A-Z';
1352 if ($is_utf8_locale) {
1353 use locale ':not_characters';
1354 push @f, $_ unless /[[:upper:]]/;
1357 push @f, $_ unless /[[:upper:]]/;
1360 report_multi_result($Locale, $locales_test_number, \@f);
1362 ++$locales_test_number;
1364 $test_names{$locales_test_number} = 'Verify that [:upper:] is a subset of [:alpha:]';
1365 for (map { chr } 0..255) {
1366 if ($is_utf8_locale) {
1367 use locale ':not_characters';
1368 push @f, $_ if /[[:upper:]]/ and ! /[[:alpha:]]/;
1371 push @f, $_ if /[[:upper:]]/ and ! /[[:alpha:]]/;
1374 report_multi_result($Locale, $locales_test_number, \@f);
1376 ++$locales_test_number;
1378 $test_names{$locales_test_number} = 'Verify that /[[:lower:]]/i is a subset of [:alpha:]';
1379 for (map { chr } 0..255) {
1380 if ($is_utf8_locale) {
1381 use locale ':not_characters';
1382 push @f, $_ if /[[:lower:]]/i and ! /[[:alpha:]]/;
1385 push @f, $_ if /[[:lower:]]/i and ! /[[:alpha:]]/;
1388 report_multi_result($Locale, $locales_test_number, \@f);
1390 ++$locales_test_number;
1392 $test_names{$locales_test_number} = 'Verify that [:alpha:] is a subset of [:alnum:]';
1393 for (map { chr } 0..255) {
1394 if ($is_utf8_locale) {
1395 use locale ':not_characters';
1396 push @f, $_ if /[[:alpha:]]/ and ! /[[:alnum:]]/;
1399 push @f, $_ if /[[:alpha:]]/ and ! /[[:alnum:]]/;
1402 report_multi_result($Locale, $locales_test_number, \@f);
1404 ++$locales_test_number;
1406 $test_names{$locales_test_number} = 'Verify that [:digit:] contains at least 0-9';
1408 if ($is_utf8_locale) {
1409 use locale ':not_characters';
1410 push @f, $_ unless /[[:digit:]]/;
1413 push @f, $_ unless /[[:digit:]]/;
1416 report_multi_result($Locale, $locales_test_number, \@f);
1418 ++$locales_test_number;
1420 $test_names{$locales_test_number} = 'Verify that [:digit:] is a subset of [:alnum:]';
1421 for (map { chr } 0..255) {
1422 if ($is_utf8_locale) {
1423 use locale ':not_characters';
1424 push @f, $_ if /[[:digit:]]/ and ! /[[:alnum:]]/;
1427 push @f, $_ if /[[:digit:]]/ and ! /[[:alnum:]]/;
1430 report_multi_result($Locale, $locales_test_number, \@f);
1432 ++$locales_test_number;
1434 $test_names{$locales_test_number} = 'Verify that [:digit:] matches either 10 or 20 code points';
1435 report_result($Locale, $locales_test_number, @{$posixes{'digit'}} == 10 || @{$posixes{'digit'}} == 20);
1437 ++$locales_test_number;
1439 $test_names{$locales_test_number} = 'Verify that if there is a second set of digits in [:digit:], they are consecutive';
1440 if (@{$posixes{'digit'}} == 20) {
1442 for (map { chr } 0..255) {
1443 next unless /[[:digit:]]/;
1445 if (defined $previous_ord) {
1446 if ($is_utf8_locale) {
1447 use locale ':not_characters';
1448 push @f, $_ if ord $_ != $previous_ord + 1;
1451 push @f, $_ if ord $_ != $previous_ord + 1;
1454 $previous_ord = ord $_;
1457 report_multi_result($Locale, $locales_test_number, \@f);
1459 ++$locales_test_number;
1461 my @xdigit_digits; # :digit: & :xdigit:
1462 $test_names{$locales_test_number} = 'Verify that [:xdigit:] contains one or two blocks of 10 consecutive [:digit:] chars';
1463 for (map { chr } 0..255) {
1464 if ($is_utf8_locale) {
1465 use locale ':not_characters';
1466 # For utf8 locales, we actually use a stricter test: that :digit:
1467 # is a subset of :xdigit:, as we know that only 0-9 should match
1468 push @f, $_ if /[[:digit:]]/ and ! /[[:xdigit:]]/;
1471 push @xdigit_digits, $_ if /[[:digit:]]/ and /[[:xdigit:]]/;
1474 if (! $is_utf8_locale) {
1476 # For non-utf8 locales, @xdigit_digits is a list of the characters
1477 # that are both :xdigit: and :digit:. Because :digit: is stored in
1478 # increasing code point order (unless the tests above failed),
1479 # @xdigit_digits is as well. There should be exactly 10 or
1481 if (@xdigit_digits != 10 && @xdigit_digits != 20) {
1482 @f = @xdigit_digits;
1486 # Look for contiguity in the series, adding any wrong ones to @f
1487 my @temp = @xdigit_digits;
1489 push @f, $temp[1] if ($temp[0] != $temp[1] - 1)
1491 # Skip this test for the 0th character of
1492 # the second block of 10, as it won't be
1493 # contiguous with the previous block
1494 && (! defined $xdigit_digits[10]
1495 || $temp[1] != $xdigit_digits[10]);
1501 report_multi_result($Locale, $locales_test_number, \@f);
1503 ++$locales_test_number;
1505 $test_names{$locales_test_number} = 'Verify that [:xdigit:] contains at least A-F, a-f';
1506 for ('A' .. 'F', 'a' .. 'f') {
1507 if ($is_utf8_locale) {
1508 use locale ':not_characters';
1509 push @f, $_ unless /[[:xdigit:]]/;
1512 push @f, $_ unless /[[:xdigit:]]/;
1515 report_multi_result($Locale, $locales_test_number, \@f);
1517 ++$locales_test_number;
1519 $test_names{$locales_test_number} = 'Verify that any additional members of [:xdigit:], are in groups of 6 consecutive code points';
1522 for my $chr (map { chr } 0..255) {
1523 next unless $chr =~ /[[:xdigit:]]/;
1524 if ($is_utf8_locale) {
1525 next if $chr =~ /[[:digit:]]/;
1528 next if grep { $chr eq $_ } @xdigit_digits;
1530 next if $chr =~ /[A-Fa-f]/;
1531 if (defined $previous_ord) {
1532 if ($is_utf8_locale) {
1533 use locale ':not_characters';
1534 push @f, $chr if ord $chr != $previous_ord + 1;
1537 push @f, $chr if ord $chr != $previous_ord + 1;
1542 undef $previous_ord;
1545 $previous_ord = ord $chr;
1548 report_multi_result($Locale, $locales_test_number, \@f);
1550 ++$locales_test_number;
1552 $test_names{$locales_test_number} = 'Verify that [:xdigit:] is a subset of [:graph:]';
1553 for (map { chr } 0..255) {
1554 if ($is_utf8_locale) {
1555 use locale ':not_characters';
1556 push @f, $_ if /[[:xdigit:]]/ and ! /[[:graph:]]/;
1559 push @f, $_ if /[[:xdigit:]]/ and ! /[[:graph:]]/;
1562 report_multi_result($Locale, $locales_test_number, \@f);
1564 # Note that xdigit doesn't have to be a subset of alnum
1566 ++$locales_test_number;
1568 $test_names{$locales_test_number} = 'Verify that [:punct:] is a subset of [:graph:]';
1569 for (map { chr } 0..255) {
1570 if ($is_utf8_locale) {
1571 use locale ':not_characters';
1572 push @f, $_ if /[[:punct:]]/ and ! /[[:graph:]]/;
1575 push @f, $_ if /[[:punct:]]/ and ! /[[:graph:]]/;
1578 report_multi_result($Locale, $locales_test_number, \@f);
1580 ++$locales_test_number;
1582 $test_names{$locales_test_number} = 'Verify that the space character is not in [:graph:]';
1583 if ($is_utf8_locale) {
1584 use locale ':not_characters';
1585 push @f, " " if " " =~ /[[:graph:]]/;
1588 push @f, " " if " " =~ /[[:graph:]]/;
1590 report_multi_result($Locale, $locales_test_number, \@f);
1592 ++$locales_test_number;
1594 $test_names{$locales_test_number} = 'Verify that [:space:] contains at least [\f\n\r\t\cK ]';
1595 for (' ', "\f", "\n", "\r", "\t", "\cK") {
1596 if ($is_utf8_locale) {
1597 use locale ':not_characters';
1598 push @f, $_ unless /[[:space:]]/;
1601 push @f, $_ unless /[[:space:]]/;
1604 report_multi_result($Locale, $locales_test_number, \@f);
1606 ++$locales_test_number;
1608 $test_names{$locales_test_number} = 'Verify that [:blank:] contains at least [\t ]';
1610 if ($is_utf8_locale) {
1611 use locale ':not_characters';
1612 push @f, $_ unless /[[:blank:]]/;
1615 push @f, $_ unless /[[:blank:]]/;
1618 report_multi_result($Locale, $locales_test_number, \@f);
1620 ++$locales_test_number;
1622 $test_names{$locales_test_number} = 'Verify that [:blank:] is a subset of [:space:]';
1623 for (map { chr } 0..255) {
1624 if ($is_utf8_locale) {
1625 use locale ':not_characters';
1626 push @f, $_ if /[[:blank:]]/ and ! /[[:space:]]/;
1629 push @f, $_ if /[[:blank:]]/ and ! /[[:space:]]/;
1632 report_multi_result($Locale, $locales_test_number, \@f);
1634 ++$locales_test_number;
1636 $test_names{$locales_test_number} = 'Verify that [:graph:] is a subset of [:print:]';
1637 for (map { chr } 0..255) {
1638 if ($is_utf8_locale) {
1639 use locale ':not_characters';
1640 push @f, $_ if /[[:graph:]]/ and ! /[[:print:]]/;
1643 push @f, $_ if /[[:graph:]]/ and ! /[[:print:]]/;
1646 report_multi_result($Locale, $locales_test_number, \@f);
1648 ++$locales_test_number;
1650 $test_names{$locales_test_number} = 'Verify that the space character is in [:print:]';
1651 if ($is_utf8_locale) {
1652 use locale ':not_characters';
1653 push @f, " " if " " !~ /[[:print:]]/;
1656 push @f, " " if " " !~ /[[:print:]]/;
1658 report_multi_result($Locale, $locales_test_number, \@f);
1660 ++$locales_test_number;
1662 $test_names{$locales_test_number} = 'Verify that isn\'t both [:cntrl:] and [:print:]';
1663 for (map { chr } 0..255) {
1664 if ($is_utf8_locale) {
1665 use locale ':not_characters';
1666 push @f, $_ if (/[[:print:]]/ and /[[:cntrl:]]/);
1669 push @f, $_ if (/[[:print:]]/ and /[[:cntrl:]]/);
1672 report_multi_result($Locale, $locales_test_number, \@f);
1674 ++$locales_test_number;
1676 $test_names{$locales_test_number} = 'Verify that isn\'t both [:alpha:] and [:digit:]';
1677 for (map { chr } 0..255) {
1678 if ($is_utf8_locale) {
1679 use locale ':not_characters';
1680 push @f, $_ if /[[:alpha:]]/ and /[[:digit:]]/;
1683 push @f, $_ if /[[:alpha:]]/ and /[[:digit:]]/;
1686 report_multi_result($Locale, $locales_test_number, \@f);
1688 ++$locales_test_number;
1690 $test_names{$locales_test_number} = 'Verify that isn\'t both [:alnum:] and [:punct:]';
1691 for (map { chr } 0..255) {
1692 if ($is_utf8_locale) {
1693 use locale ':not_characters';
1694 push @f, $_ if /[[:alnum:]]/ and /[[:punct:]]/;
1697 push @f, $_ if /[[:alnum:]]/ and /[[:punct:]]/;
1700 report_multi_result($Locale, $locales_test_number, \@f);
1702 ++$locales_test_number;
1704 $test_names{$locales_test_number} = 'Verify that isn\'t both [:xdigit:] and [:punct:]';
1705 for (map { chr } 0..255) {
1706 if ($is_utf8_locale) {
1707 use locale ':not_characters';
1708 push @f, $_ if (/[[:punct:]]/ and /[[:xdigit:]]/);
1711 push @f, $_ if (/[[:punct:]]/ and /[[:xdigit:]]/);
1714 report_multi_result($Locale, $locales_test_number, \@f);
1716 ++$locales_test_number;
1718 $test_names{$locales_test_number} = 'Verify that isn\'t both [:graph:] and [:space:]';
1719 for (map { chr } 0..255) {
1720 if ($is_utf8_locale) {
1721 use locale ':not_characters';
1722 push @f, $_ if (/[[:graph:]]/ and /[[:space:]]/);
1725 push @f, $_ if (/[[:graph:]]/ and /[[:space:]]/);
1728 report_multi_result($Locale, $locales_test_number, \@f);
1730 foreach ($first_casing_test_number..$locales_test_number) {
1731 $problematical_tests{$_} = 1;
1735 # Test for read-only scalars' locale vs non-locale comparisons.
1741 if ($is_utf8_locale) {
1742 use locale ':not_characters';
1743 $ok = ($a cmp "qwerty") == 0;
1747 $ok = ($a cmp "qwerty") == 0;
1749 report_result($Locale, ++$locales_test_number, $ok);
1750 $test_names{$locales_test_number} = 'Verify that cmp works with a read-only scalar; no- vs locale';
1754 my ($from, $to, $lesser, $greater,
1755 @test, %test, $test, $yes, $no, $sign);
1757 ++$locales_test_number;
1758 $test_names{$locales_test_number} = 'Verify that "le", "ne", etc work';
1759 $not_necessarily_a_problem_test_number = $locales_test_number;
1762 $from = int(($_*@{$posixes{'word'}})/10);
1763 $to = $from + int(@{$posixes{'word'}}/10);
1764 $to = $#{$posixes{'word'}} if ($to > $#{$posixes{'word'}});
1765 $lesser = join('', @{$posixes{'word'}}[$from..$to]);
1766 # Select a slice one character on.
1768 $to = $#{$posixes{'word'}} if ($to > $#{$posixes{'word'}});
1769 $greater = join('', @{$posixes{'word'}}[$from..$to]);
1770 if ($is_utf8_locale) {
1771 use locale ':not_characters';
1772 ($yes, $no, $sign) = ($lesser lt $greater
1774 : ("not ", " ", -1));
1778 ($yes, $no, $sign) = ($lesser lt $greater
1780 : ("not ", " ", -1));
1782 # all these tests should FAIL (return 0). Exact lt or gt cannot
1783 # be tested because in some locales, say, eacute and E may test
1787 $no.' ($lesser le $greater)', # 1
1788 'not ($lesser ne $greater)', # 2
1789 ' ($lesser eq $greater)', # 3
1790 $yes.' ($lesser ge $greater)', # 4
1791 $yes.' ($lesser ge $greater)', # 5
1792 $yes.' ($greater le $lesser )', # 7
1793 'not ($greater ne $lesser )', # 8
1794 ' ($greater eq $lesser )', # 9
1795 $no.' ($greater ge $lesser )', # 10
1796 'not (($lesser cmp $greater) == -($sign))' # 11
1798 @test{@test} = 0 x @test;
1800 for my $ti (@test) {
1801 if ($is_utf8_locale) {
1802 use locale ':not_characters';
1803 $test{$ti} = eval $ti;
1806 # Already in 'use locale';
1807 $test{$ti} = eval $ti;
1809 $test ||= $test{$ti}
1811 report_result($Locale, $locales_test_number, $test == 0);
1813 debug "lesser = '$lesser'\n";
1814 debug "greater = '$greater'\n";
1815 debug "lesser cmp greater = ",
1816 $lesser cmp $greater, "\n";
1817 debug "greater cmp lesser = ",
1818 $greater cmp $lesser, "\n";
1819 debug "(greater) from = $from, to = $to\n";
1820 for my $ti (@test) {
1821 debugf("# %-40s %-4s", $ti,
1822 $test{$ti} ? 'FAIL' : 'ok');
1823 if ($ti =~ /\(\.*(\$.+ +cmp +\$[^\)]+)\.*\)/) {
1824 debugf("(%s == %4d)", $1, eval $1);
1835 my @sorted_controls;
1837 ++$locales_test_number;
1838 $test_names{$locales_test_number}
1839 = 'Skip in locales where there are no controls;'
1840 . ' otherwise verify that \0 sorts before any (other) control';
1841 if (! $posixes{'cntrl'}) {
1842 report_result($Locale, $locales_test_number, 1);
1844 # We use all code points for the tests below since there aren't
1846 push @sorted_controls, chr $_ for 1..255;
1847 @sorted_controls = sort @sorted_controls;
1850 @sorted_controls = @{$posixes{'cntrl'}};
1851 push @sorted_controls, "\0",
1852 unless grep { $_ eq "\0" } @sorted_controls;
1853 @sorted_controls = sort @sorted_controls;
1855 for my $control (@sorted_controls) {
1856 $output .= " " . disp_chars($control);
1858 debug "sorted :cntrl: (plus NUL) = $output\n";
1859 my $ok = $sorted_controls[0] eq "\0";
1860 report_result($Locale, $locales_test_number, $ok);
1862 shift @sorted_controls if $ok;
1865 my $lowest_control = $sorted_controls[0];
1867 ++$locales_test_number;
1868 $test_names{$locales_test_number}
1869 = 'Skip in locales where all controls have primary sorting weight; '
1870 . 'otherwise verify that \0 doesn\'t have primary sorting weight';
1871 if ("a${lowest_control}c" lt "ab") {
1872 report_result($Locale, $locales_test_number, 1);
1875 my $ok = "ab" lt "a\0c";
1876 report_result($Locale, $locales_test_number, $ok);
1879 ++$locales_test_number;
1880 $test_names{$locales_test_number}
1881 = 'Verify that strings with embedded NUL collate';
1882 my $ok = "a\0a\0a" lt "a${lowest_control}a${lowest_control}a";
1883 report_result($Locale, $locales_test_number, $ok);
1885 ++$locales_test_number;
1886 $test_names{$locales_test_number}
1887 = 'Verify that strings with embedded NUL and '
1888 . 'extra trailing NUL collate';
1889 $ok = "a\0a\0" lt "a${lowest_control}a${lowest_control}";
1890 report_result($Locale, $locales_test_number, $ok);
1892 ++$locales_test_number;
1893 $test_names{$locales_test_number}
1894 = 'Verify that empty strings collate';
1896 report_result($Locale, $locales_test_number, $ok);
1898 ++$locales_test_number;
1899 $test_names{$locales_test_number}
1900 = "Skip in non-UTF-8 locales; otherwise verify that UTF8ness "
1901 . "doesn't matter with collation";
1902 if (! $is_utf8_locale) {
1903 report_result($Locale, $locales_test_number, 1);
1907 # khw can't think of anything better. Start with a string that is
1908 # higher than its UTF-8 representation in both EBCDIC and ASCII
1909 my $string = chr utf8::unicode_to_native(0xff);
1910 my $utf8_string = $string;
1911 utf8::upgrade($utf8_string);
1913 # 8 should be lt 9 in all locales (except ones that aren't
1914 # ASCII-based, which might fail this)
1915 $ok = ("a${string}8") lt ("a${utf8_string}9");
1916 report_result($Locale, $locales_test_number, $ok);
1919 ++$locales_test_number;
1920 $test_names{$locales_test_number}
1921 = "Skip in UTF-8 locales; otherwise verify that single byte "
1922 . "collates before 0x100 and above";
1923 if ($is_utf8_locale) {
1924 report_result($Locale, $locales_test_number, 1);
1927 my $max_collating = chr 0; # Find byte that collates highest
1928 for my $i (0 .. 255) {
1930 $max_collating = $char if $char gt $max_collating;
1932 $ok = $max_collating lt chr 0x100;
1933 report_result($Locale, $locales_test_number, $ok);
1936 ++$locales_test_number;
1937 $test_names{$locales_test_number}
1938 = "Skip in UTF-8 locales; otherwise verify that 0x100 and "
1939 . "above collate in code point order";
1940 if ($is_utf8_locale) {
1941 report_result($Locale, $locales_test_number, 1);
1944 $ok = chr 0x100 lt chr 0x101;
1945 report_result($Locale, $locales_test_number, $ok);
1981 if (! $is_utf8_locale) {
1984 my ($x, $y) = (1.23, 1.23);
1987 printf ''; # printf used to reset locale to "C"
1992 my $z = sprintf ''; # sprintf used to reset locale to "C"
1999 local $SIG{__WARN__} =
2005 # The == (among other ops) used to warn for locales
2006 # that had something else than "." as the radix character.
2030 $ok12 = abs(($f + $g) - 3.57) < 0.01;
2032 $ok14 = $ok14_5 = $ok15 = $ok16 = 1; # Skip for non-utf8 locales
2036 $ok17 = "1.5:1.25" eq sprintf("%g:%g", $h, $i);
2038 $ok18 = $j eq sprintf("%g:%g", $h, $i);
2041 use locale ':not_characters';
2043 my ($x, $y) = (1.23, 1.23);
2045 printf ''; # printf used to reset locale to "C"
2050 my $z = sprintf ''; # sprintf used to reset locale to "C"
2056 local $SIG{__WARN__} =
2082 $ok12 = abs(($f + $g) - 3.57) < 0.01;
2085 # Look for non-ASCII error messages, and verify that the first
2086 # such is in UTF-8 (the others almost certainly will be like the
2087 # first). This is only done if the current locale has LC_MESSAGES
2090 if ( locales_enabled('LC_MESSAGES')
2091 && setlocale(&POSIX::LC_MESSAGES, $Locale))
2093 foreach my $err (keys %!) {
2095 $! = eval "&Errno::$err"; # Convert to strerror() output
2097 my $strerror = "$!";
2098 if ("$strerror" =~ /\P{ASCII}/) {
2099 $ok14 = utf8::is_utf8($strerror);
2101 $ok14_5 = "$!" !~ /\P{ASCII}/;
2103 "non-ASCII \$! for error $errnum='$strerror'"))
2110 # Similarly, we verify that a non-ASCII radix is in UTF-8. This
2111 # also catches if there is a disparity between sprintf and
2114 my $string_g = "$g";
2115 my $sprintf_g = sprintf("%g", $g);
2117 $ok15 = $string_g =~ / ^ \p{ASCII}+ $ /x || utf8::is_utf8($string_g);
2118 $ok16 = $sprintf_g eq $string_g;
2122 $ok17 = "1.5:1.25" eq sprintf("%g:%g", $h, $i);
2124 $ok18 = $j eq sprintf("%g:%g", $h, $i);
2128 if (setlocale(&POSIX::LC_TIME, $Locale)) { # These tests aren't affected by
2130 my @times = CORE::localtime();
2133 $ok19 = POSIX::strftime("%p", @times) ne "%p"; # [perl #119425]
2134 my $date = POSIX::strftime("'%A' '%B' '%Z' '%p'", @times);
2135 debug("'Day' 'Month' 'TZ' 'am/pm' = ", disp_str($date));
2137 # If there is any non-ascii, it better be UTF-8 in a UTF-8 locale, and
2138 # not UTF-8 if the locale isn't UTF-8.
2139 $ok20 = $date =~ / ^ \p{ASCII}+ $ /x
2140 || $is_utf8_locale == utf8::is_utf8($date);
2144 if (locales_enabled('LC_MESSAGES')) {
2145 foreach my $err (keys %!) {
2148 $! = eval "&Errno::$err"; # Convert to strerror() output
2149 my $strerror = "$!";
2150 if ($strerror =~ /\P{ASCII}/) {
2152 debug(disp_str("non-ASCII strerror=$strerror"));
2158 report_result($Locale, ++$locales_test_number, $ok1);
2159 $test_names{$locales_test_number} = 'Verify that an intervening printf doesn\'t change assignment results';
2160 my $first_a_test = $locales_test_number;
2162 debug "$first_a_test..$locales_test_number: \$a = $a, \$b = $b, Locale = $Locale\n";
2164 report_result($Locale, ++$locales_test_number, $ok2);
2165 $test_names{$locales_test_number} = 'Verify that an intervening sprintf doesn\'t change assignment results';
2167 my $first_c_test = $locales_test_number;
2169 $test_names{++$locales_test_number} = 'Verify that a different locale radix works when doing "==" with a constant';
2170 report_result($Locale, $locales_test_number, $ok3);
2171 $problematical_tests{$locales_test_number} = 1;
2173 $test_names{++$locales_test_number} = 'Verify that a different locale radix works when doing "==" with a scalar';
2174 report_result($Locale, $locales_test_number, $ok4);
2175 $problematical_tests{$locales_test_number} = 1;
2177 report_result($Locale, ++$locales_test_number, $ok5);
2178 $test_names{$locales_test_number} = 'Verify that a different locale radix works when doing "==" with a scalar and an intervening sprintf';
2179 $problematical_tests{$locales_test_number} = 1;
2181 debug "$first_c_test..$locales_test_number: \$c = $c, \$d = $d, Locale = $Locale\n";
2183 report_result($Locale, ++$locales_test_number, $ok6);
2184 $test_names{$locales_test_number} = 'Verify that can assign stringified under inner no-locale block';
2185 my $first_e_test = $locales_test_number;
2187 report_result($Locale, ++$locales_test_number, $ok7);
2188 $test_names{$locales_test_number} = 'Verify that "==" with a scalar still works in inner no locale';
2190 $test_names{++$locales_test_number} = 'Verify that "==" with a scalar and an intervening sprintf still works in inner no locale';
2191 report_result($Locale, $locales_test_number, $ok8);
2192 $problematical_tests{$locales_test_number} = 1;
2194 debug "$first_e_test..$locales_test_number: \$e = $e, no locale\n";
2196 report_result($Locale, ++$locales_test_number, $ok9);
2197 $test_names{$locales_test_number} = 'Verify that after a no-locale block, a different locale radix still works when doing "==" with a constant';
2198 $problematical_tests{$locales_test_number} = 1;
2199 my $first_f_test = $locales_test_number;
2201 report_result($Locale, ++$locales_test_number, $ok10);
2202 $test_names{$locales_test_number} = 'Verify that after a no-locale block, a different locale radix still works when doing "==" with a scalar';
2203 $problematical_tests{$locales_test_number} = 1;
2205 $test_names{++$locales_test_number} = 'Verify that after a no-locale block, a different locale radix still works when doing "==" with a scalar and an intervening sprintf';
2206 report_result($Locale, $locales_test_number, $ok11);
2207 $problematical_tests{$locales_test_number} = 1;
2209 report_result($Locale, ++$locales_test_number, $ok12);
2210 $test_names{$locales_test_number} = 'Verify that after a no-locale block, a different locale radix can participate in an addition and function call as numeric';
2211 $problematical_tests{$locales_test_number} = 1;
2213 report_result($Locale, ++$locales_test_number, $ok13);
2214 $test_names{$locales_test_number} = 'Verify that don\'t get warning under "==" even if radix is not a dot';
2215 $problematical_tests{$locales_test_number} = 1;
2217 report_result($Locale, ++$locales_test_number, $ok14);
2218 $test_names{$locales_test_number} = 'Verify that non-ASCII UTF-8 error messages are in UTF-8';
2220 report_result($Locale, ++$locales_test_number, $ok14_5);
2221 $test_names{$locales_test_number} = '... and are ASCII outside "use locale"';
2223 report_result($Locale, ++$locales_test_number, $ok15);
2224 $test_names{$locales_test_number} = 'Verify that a number with a UTF-8 radix has a UTF-8 stringification';
2225 $problematical_tests{$locales_test_number} = 1;
2227 report_result($Locale, ++$locales_test_number, $ok16);
2228 $test_names{$locales_test_number} = 'Verify that a sprintf of a number with a UTF-8 radix yields UTF-8';
2229 $problematical_tests{$locales_test_number} = 1;
2231 report_result($Locale, ++$locales_test_number, $ok17);
2232 $test_names{$locales_test_number} = 'Verify that a sprintf of a number outside locale scope uses a dot radix';
2234 report_result($Locale, ++$locales_test_number, $ok18);
2235 $test_names{$locales_test_number} = 'Verify that a sprintf of a number back within locale scope uses locale radix';
2236 $problematical_tests{$locales_test_number} = 1;
2238 report_result($Locale, ++$locales_test_number, $ok19);
2239 $test_names{$locales_test_number} = 'Verify that strftime doesn\'t return "%p" in locales where %p is empty';
2241 report_result($Locale, ++$locales_test_number, $ok20);
2242 $test_names{$locales_test_number} = 'Verify that strftime returns date with UTF-8 flag appropriately set';
2243 $problematical_tests{$locales_test_number} = 1; # This is broken in
2246 report_result($Locale, ++$locales_test_number, $ok21);
2247 $test_names{$locales_test_number} = '"$!" is ASCII-only outside of locale scope';
2249 debug "$first_f_test..$locales_test_number: \$f = $f, \$g = $g, back to locale = $Locale\n";
2251 # Does taking lc separately differ from taking
2252 # the lc "in-line"? (This was the bug 19990704.002 (#965), change #3568.)
2253 # The bug was in the caching of the 'o'-magic.
2254 if (! $is_utf8_locale) {
2260 return $lc0 cmp $lc1;
2264 return lc($_[0]) cmp lc($_[1]);
2271 report_result($Locale, ++$locales_test_number,
2272 lcA($x, $y) == 1 && lcB($x, $y) == 1 ||
2273 lcA($x, $z) == 0 && lcB($x, $z) == 0);
2276 use locale ':not_characters';
2281 return $lc0 cmp $lc1;
2285 return lc($_[0]) cmp lc($_[1]);
2292 report_result($Locale, ++$locales_test_number,
2293 lcC($x, $y) == 1 && lcD($x, $y) == 1 ||
2294 lcC($x, $z) == 0 && lcD($x, $z) == 0);
2296 $test_names{$locales_test_number} = 'Verify "lc(foo) cmp lc(bar)" is the same as using intermediaries for the cmp';
2298 # Does lc of an UPPER (if different from the UPPER) match
2299 # case-insensitively the UPPER, and does the UPPER match
2300 # case-insensitively the lc of the UPPER. And vice versa.
2304 my $re = qr/[\[\(\{\*\+\?\|\^\$\\]/;
2307 ++$locales_test_number;
2308 $test_names{$locales_test_number} = 'Verify case insensitive matching works';
2309 foreach my $x (sort { ord $a <=> ord $b } keys %UPPER) {
2310 if (! $is_utf8_locale) {
2312 next unless uc $y eq $x;
2313 debug_more( "UPPER=", disp_chars(($x)),
2314 "; lc=", disp_chars(($y)), "; ",
2315 "; fc=", disp_chars((fc $x)), "; ",
2316 disp_chars(($x)), "=~/", disp_chars(($y)), "/i=",
2317 $x =~ /\Q$y/i ? 1 : 0,
2319 disp_chars(($y)), "=~/", disp_chars(($x)), "/i=",
2320 $y =~ /\Q$x/i ? 1 : 0,
2323 # If $x and $y contain regular expression characters
2324 # AND THEY lowercase (/i) to regular expression characters,
2325 # regcomp() will be mightily confused. No, the \Q doesn't
2326 # help here (maybe regex engine internal lowercasing
2327 # is done after the \Q?) An example of this happening is
2328 # the bg_BG (Bulgarian) locale under EBCDIC (OS/390 USS):
2329 # the chr(173) (the "[") is the lowercase of the chr(235).
2331 # Similarly losing EBCDIC locales include cs_cz, cs_CZ,
2332 # el_gr, el_GR, en_us.IBM-037 (!), en_US.IBM-037 (!),
2333 # et_ee, et_EE, hr_hr, hr_HR, hu_hu, hu_HU, lt_LT,
2334 # mk_mk, mk_MK, nl_nl.IBM-037, nl_NL.IBM-037,
2335 # pl_pl, pl_PL, ro_ro, ro_RO, ru_ru, ru_RU,
2336 # sk_sk, sk_SK, sl_si, sl_SI, tr_tr, tr_TR.
2338 # Similar things can happen even under (bastardised)
2339 # non-EBCDIC locales: in many European countries before the
2340 # advent of ISO 8859-x nationally customised versions of
2341 # ISO 646 were devised, reusing certain punctuation
2342 # characters for modified characters needed by the
2343 # country/language. For example, the "|" might have
2344 # stood for U+00F6 or LATIN SMALL LETTER O WITH DIAERESIS.
2346 if ($x =~ $re || $y =~ $re) {
2347 print "# Regex characters in '$x' or '$y', skipping test $locales_test_number for locale '$Locale'\n";
2350 push @f, $x unless $x =~ /\Q$y/i && $y =~ /\Q$x/i;
2352 # fc is not a locale concept, so Perl uses lc for it.
2353 push @f, $x unless lc $x eq fc $x;
2356 use locale ':not_characters';
2358 next unless uc $y eq $x;
2359 debug_more( "UPPER=", disp_chars(($x)),
2360 "; lc=", disp_chars(($y)), "; ",
2361 "; fc=", disp_chars((fc $x)), "; ",
2362 disp_chars(($x)), "=~/", disp_chars(($y)), "/i=",
2363 $x =~ /\Q$y/i ? 1 : 0,
2365 disp_chars(($y)), "=~/", disp_chars(($x)), "/i=",
2366 $y =~ /\Q$x/i ? 1 : 0,
2369 push @f, $x unless $x =~ /\Q$y/i && $y =~ /\Q$x/i;
2371 # The places where Unicode's lc is different from fc are
2372 # skipped here by virtue of the 'next unless uc...' line above
2373 push @f, $x unless lc $x eq fc $x;
2377 foreach my $x (sort { ord $a <=> ord $b } keys %lower) {
2378 if (! $is_utf8_locale) {
2380 next unless lc $y eq $x;
2381 debug_more( "lower=", disp_chars(($x)),
2382 "; uc=", disp_chars(($y)), "; ",
2383 "; fc=", disp_chars((fc $x)), "; ",
2384 disp_chars(($x)), "=~/", disp_chars(($y)), "/i=",
2385 $x =~ /\Q$y/i ? 1 : 0,
2387 disp_chars(($y)), "=~/", disp_chars(($x)), "/i=",
2388 $y =~ /\Q$x/i ? 1 : 0,
2390 if ($x =~ $re || $y =~ $re) { # See above.
2391 print "# Regex characters in '$x' or '$y', skipping test $locales_test_number for locale '$Locale'\n";
2394 push @f, $x unless $x =~ /\Q$y/i && $y =~ /\Q$x/i;
2396 push @f, $x unless lc $x eq fc $x;
2399 use locale ':not_characters';
2401 next unless lc $y eq $x;
2402 debug_more( "lower=", disp_chars(($x)),
2403 "; uc=", disp_chars(($y)), "; ",
2404 "; fc=", disp_chars((fc $x)), "; ",
2405 disp_chars(($x)), "=~/", disp_chars(($y)), "/i=",
2406 $x =~ /\Q$y/i ? 1 : 0,
2408 disp_chars(($y)), "=~/", disp_chars(($x)), "/i=",
2409 $y =~ /\Q$x/i ? 1 : 0,
2411 push @f, $x unless $x =~ /\Q$y/i && $y =~ /\Q$x/i;
2413 push @f, $x unless lc $x eq fc $x;
2416 report_multi_result($Locale, $locales_test_number, \@f);
2417 $problematical_tests{$locales_test_number} = 1;
2423 ++$locales_test_number;
2424 $test_names{$locales_test_number} = 'Verify atof with locale radix and negative exponent';
2425 $problematical_tests{$locales_test_number} = 1;
2427 my $radix = POSIX::localeconv()->{decimal_point};
2429 "3.14e+9", "3${radix}14e+9", "3.14e-9", "3${radix}14e-9",
2430 "-3.14e+9", "-3${radix}14e+9", "-3.14e-9", "-3${radix}14e-9",
2433 if (! $is_utf8_locale) {
2435 for my $num (@nums) {
2437 unless sprintf("%g", $num) =~ /3.+14/;
2441 use locale ':not_characters';
2442 for my $num (@nums) {
2444 unless sprintf("%g", $num) =~ /3.+14/;
2448 report_result($Locale, $locales_test_number, @f == 0);
2450 print "# failed $locales_test_number locale '$Locale' numbers @f\n"
2455 my $final_locales_test_number = $locales_test_number;
2457 # Recount the errors.
2460 foreach $test_num ($first_locales_test_number..$final_locales_test_number) {
2461 my $has_non_global_failure = $Problem{$test_num}
2462 || ! defined $Okay{$test_num}
2463 || ! @{$Okay{$test_num}};
2464 print "not " if %setlocale_failed || $has_non_global_failure;
2465 print "ok $test_num";
2466 $test_names{$test_num} = "" unless defined $test_names{$test_num};
2468 # If TODO is in the test name, make it thus
2469 my $todo = $test_names{$test_num} =~ s/\s*TODO\s*//;
2470 print " $test_names{$test_num}";
2474 elsif (%setlocale_failed || ! $has_non_global_failure) {
2477 elsif ($has_non_global_failure) {
2479 # If there are any locales that pass this test, or are known-bad, it
2480 # may be that there are enough passes that we TODO the failure, but
2481 # only for tests that we have decided can be problematical.
2482 if ( ($Okay{$test_num} || $Known_bad_locale{$test_num})
2483 && grep { $_ == $test_num } keys %problematical_tests)
2485 # Don't count the known-bad failures when calculating the
2486 # percentage that fail.
2487 my $known_failures = (exists $Known_bad_locale{$test_num})
2488 ? scalar(keys $Known_bad_locale{$test_num}->%*)
2490 my $adjusted_failures = scalar(keys $Problem{$test_num}->%*)
2493 # Specially handle failures where only known-bad locales fail.
2494 # This makes the diagnositics clearer.
2495 if ($adjusted_failures <= 0) {
2496 print " # TODO fails only on known bad locales: ",
2497 join " ", keys $Known_bad_locale{$test_num}->%*, "\n";
2501 # Round to nearest .1%
2502 my $percent_fail = (int(.5 + (1000 * $adjusted_failures
2503 / scalar(@Locale))))
2505 $todo = $percent_fail < $acceptable_failure_percentage;
2506 print " # TODO" if $todo;
2510 print "# $percent_fail% of locales (",
2511 scalar(keys $Problem{$test_num}->%*),
2514 ") fail the above test (TODO cut-off is ",
2515 $acceptable_failure_percentage,
2519 print "# ", 100 - $percent_fail, "% of locales not known to be problematic on this platform\n";
2520 print "# pass the above test, so it is likely that the failures\n";
2521 print "# are errors in the locale definitions. The test is marked TODO, as the\n";
2522 print "# problem is not likely to be Perl's\n";
2527 print "# The code points that had this failure are given above. Look for lines\n";
2528 print "# that match 'failed $test_num'\n";
2531 print "# For more details, rerun, with environment variable PERL_DEBUG_FULL_TEST=1.\n";
2532 print "# Then look at that output for lines that match 'failed $test_num'\n";
2534 if (defined $not_necessarily_a_problem_test_number
2535 && $test_num == $not_necessarily_a_problem_test_number)
2537 print "# The failure of test $not_necessarily_a_problem_test_number is not necessarily fatal.\n";
2538 print "# It usually indicates a problem in the environment,\n";
2539 print "# not in Perl itself.\n";
2544 $test_num = $final_locales_test_number;
2546 if ( ! defined $Config{d_setlocale_accepts_any_locale_name}) {
2550 local $SIG{__WARN__} = sub {
2551 $warned = $_[0] =~ /uninitialized/;
2553 my $z = "y" . setlocale(&POSIX::LC_ALL, "xyzzy");
2554 ok($warned, "variable set to setlocale(\"invalid locale name\") is considered uninitialized");
2557 # Test that tainting and case changing works on utf8 strings. These tests are
2558 # placed last to avoid disturbing the hard-coded test numbers that existed at
2559 # the time these were added above this in this file.
2560 # This also tests that locale overrides unicode_strings in the same scope for
2562 setlocale(&POSIX::LC_ALL, "C");
2565 use feature 'unicode_strings';
2567 foreach my $function ("uc", "ucfirst", "lc", "lcfirst", "fc") {
2568 my @list; # List of code points to test for $function
2570 # Used to calculate the changed case for ASCII characters by using the
2571 # ord, instead of using one of the functions under test.
2572 my $ascii_case_change_delta;
2573 my $above_latin1_case_change_delta; # Same for the specific ords > 255
2576 # We test an ASCII character, which should change case;
2577 # a Latin1 character, which shouldn't change case under this C locale,
2578 # an above-Latin1 character that when the case is changed would cross
2579 # the 255/256 boundary, so doesn't change case
2580 # (the \x{149} is one of these, but changes into 2 characters, the
2581 # first one of which doesn't cross the boundary.
2582 # the final one in each list is an above-Latin1 character whose case
2583 # does change. The code below uses its position in its list as a
2584 # marker to indicate that it, unlike the other code points above
2585 # ASCII, has a successful case change
2587 # All casing operations under locale (but not :not_characters) should
2589 if ($function =~ /^u/) {
2591 chr(utf8::unicode_to_native(0xe0)),
2592 chr(utf8::unicode_to_native(0xff)),
2593 "\x{fb00}", "\x{149}", "\x{101}");
2594 $ascii_case_change_delta = ($is_ebcdic) ? +64 : -32;
2595 $above_latin1_case_change_delta = -1;
2599 chr(utf8::unicode_to_native(0xC0)),
2600 "\x{17F}", "\x{100}");
2601 $ascii_case_change_delta = ($is_ebcdic) ? -64 : +32;
2602 $above_latin1_case_change_delta = +1;
2604 foreach my $is_utf8_locale (0 .. 1) {
2605 foreach my $j (0 .. $#list) {
2606 my $char = $list[$j];
2608 for my $encoded_in_utf8 (0 .. 1) {
2611 if (! $is_utf8_locale) {
2612 no warnings 'locale';
2613 $should_be = ($j == $#list)
2614 ? chr(ord($char) + $above_latin1_case_change_delta)
2615 : (length $char == 0 || utf8::native_to_unicode(ord($char)) > 127)
2617 : chr(ord($char) + $ascii_case_change_delta);
2619 # This monstrosity is in order to avoid using an eval,
2620 # which might perturb the results
2621 $changed = ($function eq "uc")
2623 : ($function eq "ucfirst")
2625 : ($function eq "lc")
2627 : ($function eq "lcfirst")
2629 : ($function eq "fc")
2631 : die("Unexpected function \"$function\"");
2637 # For utf8-locales the case changing functions
2638 # should work just like they do outside of locale.
2639 # Can use eval here because not testing it when
2641 $should_be = eval "$function('$char')";
2642 die "Unexpected eval error $@ from 'eval \"$function('$char')\"'" if $@;
2645 use locale ':not_characters';
2646 $changed = ($function eq "uc")
2648 : ($function eq "ucfirst")
2650 : ($function eq "lc")
2652 : ($function eq "lcfirst")
2654 : ($function eq "fc")
2656 : die("Unexpected function \"$function\"");
2658 ok($changed eq $should_be,
2659 "$function(\"$char\") in C locale "
2660 . (($is_utf8_locale)
2661 ? "(use locale ':not_characters'"
2663 . (($encoded_in_utf8)
2664 ? "; encoded in utf8)"
2665 : "; not encoded in utf8)")
2666 . " should be \"$should_be\", got \"$changed\"");
2668 # Tainting shouldn't happen for use locale :not_character
2671 ? check_taint($changed)
2672 : check_taint_not($changed);
2674 # Use UTF-8 next time through the loop
2675 utf8::upgrade($char);
2682 # Give final advice.
2686 foreach ($first_locales_test_number..$final_locales_test_number) {
2688 my @f = sort keys %{ $Problem{$_} };
2690 # Don't list the failures caused by known-bad locales.
2691 if (exists $known_bad_locales{$os}) {
2692 @f = grep { $_ !~ $known_bad_locales{$os} } @f;
2695 my $f = join(" ", @f);
2696 $f =~ s/(.{50,60}) /$1\n#\t/g;
2699 "# The locale ", (@f == 1 ? "definition" : "definitions"), "\n#\n",
2701 "# on your system may have errors because the locale test $_\n",
2702 "# \"$test_names{$_}\"\n",
2703 "# failed in ", (@f == 1 ? "that locale" : "those locales"),
2707 # If your users are not using these locales you are safe for the moment,
2708 # but please report this failure first to perlbug\@perl.org using the
2709 # perlbug script (as described in the INSTALL file) so that the exact
2710 # details of the failures can be sorted out first and then your operating
2711 # system supplier can be alerted about these anomalies.
2718 # Tell which locales were okay and which were not.
2723 foreach my $l (@Locale) {
2725 if ($setlocale_failed{$l}) {
2730 ($first_locales_test_number..$final_locales_test_number)
2732 $p++ if $Problem{$t}{$l};
2735 push @s, $l if $p == 0;
2736 push @F, $l unless $p == 0;
2740 my $s = join(" ", @s);
2741 $s =~ s/(.{50,60}) /$1\n#\t/g;
2744 "# The following locales\n#\n",
2746 "# tested okay.\n#\n",
2748 print "# None of your locales were fully okay.\n";
2752 my $F = join(" ", @F);
2753 $F =~ s/(.{50,60}) /$1\n#\t/g;
2757 $details = "# For more details, rerun, with environment variable PERL_DEBUG_FULL_TEST=1.\n";
2759 elsif ($debug == 1) {
2760 $details = "# For even more details, rerun, with environment variable PERL_DEBUG_FULL_TEST=2.\n";
2764 "# The following locales\n#\n",
2766 "# had problems.\n#\n",
2769 print "# None of your locales were broken.\n";
2773 if (exists $known_bad_locales{$os} && ! %Known_bad_locale) {
2775 print "ok $test_num $^O no longer has known bad locales # TODO\n";
2778 print "1..$test_num\n";