3 # This tests plain 'use locale' and adorned 'use locale ":not_characters"'
4 # Because these pragmas are compile time, and I (khw) am trying to test
5 # without using 'eval' as much as possible, which might cloud the issue, the
6 # crucial parts of the code are duplicated in a block for each pragma.
8 # To make a TODO test, add the string 'TODO' to its %test_names value
10 my $is_ebcdic = ord("A") == 193;
12 no warnings 'locale'; # We test even weird locales; and do some scary things
15 binmode STDOUT, ':utf8';
16 binmode STDERR, ':utf8';
22 require './loc_tools.pl';
23 unless (locales_enabled('LC_CTYPE')) {
28 require Config; import Config;
32 use feature 'fc', 'postderef';
34 # =1 adds debugging output; =2 increases the verbosity somewhat
35 our $debug = $ENV{PERL_DEBUG_FULL_TEST} // 0;
37 # Certain tests have been shown to be problematical for a few locales. Don't
38 # fail them unless at least this percentage of the tested locales fail.
39 # On AIX machines, many locales call a no-break space a graphic.
40 # (There aren't 1000 locales currently in existence, so 99.9 works)
41 my $acceptable_failure_percentage = ($^O =~ / ^ ( AIX ) $ /ix)
45 # The list of test numbers of the problematic tests.
46 my %problematical_tests;
48 # If any %problematical_tests fails in one of these locales, it is
50 my %known_bad_locales = (
51 irix => qr/ ^ (?: cs | hu | sk ) $/x,
52 darwin => qr/ ^ lt_LT.ISO8859 /ix,
53 os390 => qr/ ^ italian /ix,
56 # cygwin isn't returning proper radix length in this locale, but supposedly to
57 # be fixed in later versions.
58 if ($^O eq 'cygwin' && version->new(($Config{osvers} =~ /^(\d+(?:\.\d+)+)/)[0]) le v2.4.1) {
59 $known_bad_locales{'cygwin'} = qr/ ^ ps_AF /ix;
64 my $dumper = Dumpvalue->new(
72 my($mess) = join "", '# ', @_;
74 print STDERR $dumper->stringify($mess,1), "\n";
83 return unless $debug > 1;
88 printf STDERR @_ if $debug;
96 my ($result, $message) = @_;
97 $message = "" unless defined $message;
99 print 'not ' unless ($result);
100 print "ok " . ++$test_num;
103 return ($result) ? 1 : 0;
106 # First we'll do a lot of taint checking for locales.
107 # This is the easiest to test, actually, as any locale,
108 # even the default locale will taint under 'use locale'.
110 sub is_tainted { # hello, camel two.
111 no warnings 'uninitialized' ;
114 not eval { $dummy = join("", @_), kill 0; 1 }
117 sub check_taint ($;$) {
118 my $message_tail = $_[1] // "";
120 # Extra blanks are so aligns with taint_not output
121 $message_tail = ": $message_tail" if $message_tail;
122 ok is_tainted($_[0]), "verify that is tainted$message_tail";
125 sub check_taint_not ($;$) {
126 my $message_tail = $_[1] // "";
127 $message_tail = ": $message_tail" if $message_tail;
128 ok((not is_tainted($_[0])), "verify that isn't tainted$message_tail");
131 foreach my $category (qw(ALL COLLATE CTYPE MESSAGES MONETARY NUMERIC TIME)) {
132 my $short_result = locales_enabled($category);
133 ok ($short_result == 0 || $short_result == 1,
134 "Verify locales_enabled('$category') returns 0 or 1");
135 debug("locales_enabled('$category') returned '$short_result'");
136 my $long_result = locales_enabled("LC_$category");
137 if (! ok ($long_result == $short_result,
138 " and locales_enabled('LC_$category') returns "
141 debug("locales_enabled('LC_$category') returned $long_result");
145 "\tb\t" =~ /^m?(\s)(.*)\1$/;
146 check_taint_not $&, "not tainted outside 'use locale'";
149 use locale; # engage locale and therefore locale taint.
151 # BE SURE TO COPY ANYTHING YOU ADD to these tests to the block below for
154 check_taint_not $a, '$a';
156 check_taint uc($a), 'uc($a)';
157 check_taint "\U$a", '"\U$a"';
158 check_taint ucfirst($a), 'ucfirst($a)';
159 check_taint "\u$a", '"\u$a"';
160 check_taint lc($a), 'lc($a)';
161 check_taint fc($a), 'fc($a)';
162 check_taint "\L$a", '"\L$a"';
163 check_taint "\F$a", '"\F$a"';
164 check_taint lcfirst($a), 'lcfirst($a)';
165 check_taint "\l$a", '"\l$a"';
167 check_taint_not sprintf('%e', 123.456), "sprintf('%e', 123.456)";
168 check_taint_not sprintf('%f', 123.456), "sprintf('%f', 123.456)";
169 check_taint_not sprintf('%g', 123.456), "sprintf('%g', 123.456)";
170 check_taint_not sprintf('%d', 123.456), "sprintf('%d', 123.456)";
171 check_taint_not sprintf('%x', 123.456), "sprintf('%x', 123.456)";
173 $_ = $a; # untaint $_
175 $_ = uc($a); # taint $_
177 check_taint $_, '$_ = uc($a)';
179 /(\w)/; # taint $&, $`, $', $+, $1.
180 check_taint $&, "\$& from /(\\w)/";
181 check_taint $`, "\t\$`";
182 check_taint $', "\t\$'";
183 check_taint $+, "\t\$+";
184 check_taint $1, "\t\$1";
185 check_taint_not $2, "\t\$2";
187 /(.)/; # untaint $&, $`, $', $+, $1.
188 check_taint_not $&, "\$& from /(.)/";
189 check_taint_not $`, "\t\$`";
190 check_taint_not $', "\t\$'";
191 check_taint_not $+, "\t\$+";
192 check_taint_not $1, "\t\$1";
193 check_taint_not $2, "\t\$2";
195 /(\W)/; # taint $&, $`, $', $+, $1.
196 check_taint $&, "\$& from /(\\W)/";
197 check_taint $`, "\t\$`";
198 check_taint $', "\t\$'";
199 check_taint $+, "\t\$+";
200 check_taint $1, "\t\$1";
201 check_taint_not $2, "\t\$2";
203 /(.)/; # untaint $&, $`, $', $+, $1.
204 check_taint_not $&, "\$& from /(.)/";
205 check_taint_not $`, "\t\$`";
206 check_taint_not $', "\t\$'";
207 check_taint_not $+, "\t\$+";
208 check_taint_not $1, "\t\$1";
209 check_taint_not $2, "\t\$2";
211 /(\s)/; # taint $&, $`, $', $+, $1.
212 check_taint $&, "\$& from /(\\s)/";
213 check_taint $`, "\t\$`";
214 check_taint $', "\t\$'";
215 check_taint $+, "\t\$+";
216 check_taint $1, "\t\$1";
217 check_taint_not $2, "\t\$2";
219 /(.)/; # untaint $&, $`, $', $+, $1.
220 check_taint_not $&, "\$& from /(.)/";
222 /(\S)/; # taint $&, $`, $', $+, $1.
223 check_taint $&, "\$& from /(\\S)/";
224 check_taint $`, "\t\$`";
225 check_taint $', "\t\$'";
226 check_taint $+, "\t\$+";
227 check_taint $1, "\t\$1";
228 check_taint_not $2, "\t\$2";
230 /(.)/; # untaint $&, $`, $', $+, $1.
231 check_taint_not $&, "\$& from /(.)/";
233 "0" =~ /(\d)/; # taint $&, $`, $', $+, $1.
234 check_taint $&, "\$& from /(\\d)/";
235 check_taint $`, "\t\$`";
236 check_taint $', "\t\$'";
237 check_taint $+, "\t\$+";
238 check_taint $1, "\t\$1";
239 check_taint_not $2, "\t\$2";
241 /(.)/; # untaint $&, $`, $', $+, $1.
242 check_taint_not $&, "\$& from /(.)/";
244 /(\D)/; # taint $&, $`, $', $+, $1.
245 check_taint $&, "\$& from /(\\D)/";
246 check_taint $`, "\t\$`";
247 check_taint $', "\t\$'";
248 check_taint $+, "\t\$+";
249 check_taint $1, "\t\$1";
250 check_taint_not $2, "\t\$2";
252 /(.)/; # untaint $&, $`, $', $+, $1.
253 check_taint_not $&, "\$& from /(.)/";
255 /([[:alnum:]])/; # taint $&, $`, $', $+, $1.
256 check_taint $&, "\$& from /([[:alnum:]])/";
257 check_taint $`, "\t\$`";
258 check_taint $', "\t\$'";
259 check_taint $+, "\t\$+";
260 check_taint $1, "\t\$1";
261 check_taint_not $2, "\t\$2";
263 /(.)/; # untaint $&, $`, $', $+, $1.
264 check_taint_not $&, "\$& from /(.)/";
266 /([[:^alnum:]])/; # taint $&, $`, $', $+, $1.
267 check_taint $&, "\$& from /([[:^alnum:]])/";
268 check_taint $`, "\t\$`";
269 check_taint $', "\t\$'";
270 check_taint $+, "\t\$+";
271 check_taint $1, "\t\$1";
272 check_taint_not $2, "\t\$2";
274 "a" =~ /(a)|(\w)/; # taint $&, $`, $', $+, $1.
275 check_taint $&, "\$& from /(a)|(\\w)/";
276 check_taint $`, "\t\$`";
277 check_taint $', "\t\$'";
278 check_taint $+, "\t\$+";
279 check_taint $1, "\t\$1";
280 ok($1 eq 'a', ("\t" x 5) . "\$1 is 'a'");
281 ok(! defined $2, ("\t" x 5) . "\$2 is undefined");
282 check_taint_not $2, "\t\$2";
283 check_taint_not $3, "\t\$3";
285 /(.)/; # untaint $&, $`, $', $+, $1.
286 check_taint_not $&, "\$& from /(.)/";
288 "\N{CYRILLIC SMALL LETTER A}" =~ /(\N{CYRILLIC CAPITAL LETTER A})/i; # no tainting because no locale dependence
289 check_taint_not $&, "\$& from /(\\N{CYRILLIC CAPITAL LETTER A})/i";
290 check_taint_not $`, "\t\$`";
291 check_taint_not $', "\t\$'";
292 check_taint_not $+, "\t\$+";
293 check_taint_not $1, "\t\$1";
294 ok($1 eq "\N{CYRILLIC SMALL LETTER A}", ("\t" x 4) . "\t\$1 is 'small cyrillic a'");
295 check_taint_not $2, "\t\$2";
297 /(.)/; # untaint $&, $`, $', $+, $1.
298 check_taint_not $&, "\$& from /./";
300 "(\N{KELVIN SIGN})" =~ /(\N{KELVIN SIGN})/i; # taints because depends on locale
301 check_taint $&, "\$& from /(\\N{KELVIN SIGN})/i";
302 check_taint $`, "\t\$`";
303 check_taint $', "\t\$'";
304 check_taint $+, "\t\$+";
305 check_taint $1, "\t\$1";
306 check_taint_not $2, "\t\$2";
308 /(.)/; # untaint $&, $`, $', $+, $1.
309 check_taint_not $&, "\$& from /(.)/";
311 "a:" =~ /(.)\b(.)/; # taint $&, $`, $', $+, $1.
312 check_taint $&, "\$& from /(.)\\b(.)/";
313 check_taint $`, "\t\$`";
314 check_taint $', "\t\$'";
315 check_taint $+, "\t\$+";
316 check_taint $1, "\t\$1";
317 check_taint $2, "\t\$2";
318 check_taint_not $3, "\t\$3";
320 /(.)/; # untaint $&, $`, $', $+, $1.
321 check_taint_not $&, "\$& from /./";
323 "aa" =~ /(.)\B(.)/; # taint $&, $`, $', $+, $1.
324 check_taint $&, "\$& from /(.)\\B(.)/";
325 check_taint $`, "\t\$`";
326 check_taint $', "\t\$'";
327 check_taint $+, "\t\$+";
328 check_taint $1, "\t\$1";
329 check_taint $2, "\t\$2";
330 check_taint_not $3, "\t\$3";
332 /(.)/; # untaint $&, $`, $', $+, $1.
333 check_taint_not $&, "\$& from /./";
335 "aaa" =~ /(.).(\1)/i; # notaint because not locale dependent
336 check_taint_not $&, "\$ & from /(.).(\\1)/";
337 check_taint_not $`, "\t\$`";
338 check_taint_not $', "\t\$'";
339 check_taint_not $+, "\t\$+";
340 check_taint_not $1, "\t\$1";
341 check_taint_not $2, "\t\$2";
342 check_taint_not $3, "\t\$3";
344 /(.)/; # untaint $&, $`, $', $+, $1.
345 check_taint_not $&, "\$ & from /./";
347 $_ = $a; # untaint $_
349 check_taint_not $_, 'untainting $_ works';
351 /(b)/; # this must not taint
352 check_taint_not $&, "\$ & from /(b)/";
353 check_taint_not $`, "\t\$`";
354 check_taint_not $', "\t\$'";
355 check_taint_not $+, "\t\$+";
356 check_taint_not $1, "\t\$1";
357 check_taint_not $2, "\t\$2";
359 $_ = $a; # untaint $_
361 check_taint_not $_, 'untainting $_ works';
363 $b = uc($a); # taint $b
364 s/(.+)/$b/; # this must taint only the $_
366 check_taint $_, '$_ (wasn\'t tainted) from s/(.+)/$b/ where $b is tainted';
367 check_taint_not $&, "\t\$&";
368 check_taint_not $`, "\t\$`";
369 check_taint_not $', "\t\$'";
370 check_taint_not $+, "\t\$+";
371 check_taint_not $1, "\t\$1";
372 check_taint_not $2, "\t\$2";
374 $_ = $a; # untaint $_
376 s/(.+)/b/; # this must not taint
377 check_taint_not $_, '$_ (wasn\'t tainted) from s/(.+)/b/';
378 check_taint_not $&, "\t\$&";
379 check_taint_not $`, "\t\$`";
380 check_taint_not $', "\t\$'";
381 check_taint_not $+, "\t\$+";
382 check_taint_not $1, "\t\$1";
383 check_taint_not $2, "\t\$2";
385 $b = $a; # untaint $b
387 ($b = $a) =~ s/\w/$&/;
388 check_taint $b, '$b from ($b = $a) =~ s/\w/$&/'; # $b should be tainted.
389 check_taint_not $a, '$a from ($b = $a) =~ s/\w/$&/'; # $a should be not.
391 $_ = $a; # untaint $_
393 s/(\w)/\l$1/; # this must taint
394 check_taint $_, '$_ (wasn\'t tainted) from s/(\w)/\l$1/,'; # this must taint
395 check_taint $&, "\t\$&";
396 check_taint $`, "\t\$`";
397 check_taint $', "\t\$'";
398 check_taint $+, "\t\$+";
399 check_taint $1, "\t\$1";
400 check_taint_not $2, "\t\$2";
402 $_ = $a; # untaint $_
404 s/(\w)/\L$1/; # this must taint
405 check_taint $_, '$_ (wasn\'t tainted) from s/(\w)/\L$1/,';
406 check_taint $&, "\t\$&";
407 check_taint $`, "\t\$`";
408 check_taint $', "\t\$'";
409 check_taint $+, "\t\$+";
410 check_taint $1, "\t\$1";
411 check_taint_not $2, "\t\$2";
413 $_ = $a; # untaint $_
415 s/(\w)/\u$1/; # this must taint
416 check_taint $_, '$_ (wasn\'t tainted) from s/(\w)/\u$1/';
417 check_taint $&, "\t\$&";
418 check_taint $`, "\t\$`";
419 check_taint $', "\t\$'";
420 check_taint $+, "\t\$+";
421 check_taint $1, "\t\$1";
422 check_taint_not $2, "\t\$2";
424 $_ = $a; # untaint $_
426 s/(\w)/\U$1/; # this must taint
427 check_taint $_, '$_ (wasn\'t tainted) from s/(\w)/\U$1/';
428 check_taint $&, "\t\$&";
429 check_taint $`, "\t\$`";
430 check_taint $', "\t\$'";
431 check_taint $+, "\t\$+";
432 check_taint $1, "\t\$1";
433 check_taint_not $2, "\t\$2";
435 # After all this tainting $a should be cool.
437 check_taint_not $a, '$a still not tainted';
440 check_taint_not $1, '"a" =~ /([a-z])/';
441 "foo.bar_baz" =~ /^(.*)[._](.*?)$/; # Bug 120675
442 check_taint_not $1, '"foo.bar_baz" =~ /^(.*)[._](.*?)$/';
444 # BE SURE TO COPY ANYTHING YOU ADD to the block below
446 { # This is just the previous tests copied here with a different
447 # compile-time pragma.
449 use locale ':not_characters'; # engage restricted locale with different
451 check_taint_not $a, '$a';
453 check_taint_not uc($a), 'uc($a)';
454 check_taint_not "\U$a", '"\U$a"';
455 check_taint_not ucfirst($a), 'ucfirst($a)';
456 check_taint_not "\u$a", '"\u$a"';
457 check_taint_not lc($a), 'lc($a)';
458 check_taint_not fc($a), 'fc($a)';
459 check_taint_not "\L$a", '"\L$a"';
460 check_taint_not "\F$a", '"\F$a"';
461 check_taint_not lcfirst($a), 'lcfirst($a)';
462 check_taint_not "\l$a", '"\l$a"';
464 check_taint_not sprintf('%e', 123.456), "sprintf('%e', 123.456)";
465 check_taint_not sprintf('%f', 123.456), "sprintf('%f', 123.456)";
466 check_taint_not sprintf('%g', 123.456), "sprintf('%g', 123.456)";
467 check_taint_not sprintf('%d', 123.456), "sprintf('%d', 123.456)";
468 check_taint_not sprintf('%x', 123.456), "sprintf('%x', 123.456)";
470 $_ = $a; # untaint $_
474 check_taint_not $_, '$_ = uc($a)';
477 check_taint_not $&, "\$& from /(\\w)/";
478 check_taint_not $`, "\t\$`";
479 check_taint_not $', "\t\$'";
480 check_taint_not $+, "\t\$+";
481 check_taint_not $1, "\t\$1";
482 check_taint_not $2, "\t\$2";
484 /(.)/; # untaint $&, $`, $', $+, $1.
485 check_taint_not $&, "\$& from /(.)/";
486 check_taint_not $`, "\t\$`";
487 check_taint_not $', "\t\$'";
488 check_taint_not $+, "\t\$+";
489 check_taint_not $1, "\t\$1";
490 check_taint_not $2, "\t\$2";
493 check_taint_not $&, "\$& from /(\\W)/";
494 check_taint_not $`, "\t\$`";
495 check_taint_not $', "\t\$'";
496 check_taint_not $+, "\t\$+";
497 check_taint_not $1, "\t\$1";
498 check_taint_not $2, "\t\$2";
500 /(.)/; # untaint $&, $`, $', $+, $1.
501 check_taint_not $&, "\$& from /(.)/";
502 check_taint_not $`, "\t\$`";
503 check_taint_not $', "\t\$'";
504 check_taint_not $+, "\t\$+";
505 check_taint_not $1, "\t\$1";
506 check_taint_not $2, "\t\$2";
509 check_taint_not $&, "\$& from /(\\s)/";
510 check_taint_not $`, "\t\$`";
511 check_taint_not $', "\t\$'";
512 check_taint_not $+, "\t\$+";
513 check_taint_not $1, "\t\$1";
514 check_taint_not $2, "\t\$2";
516 /(.)/; # untaint $&, $`, $', $+, $1.
517 check_taint_not $&, "\$& from /(.)/";
520 check_taint_not $&, "\$& from /(\\S)/";
521 check_taint_not $`, "\t\$`";
522 check_taint_not $', "\t\$'";
523 check_taint_not $+, "\t\$+";
524 check_taint_not $1, "\t\$1";
525 check_taint_not $2, "\t\$2";
527 /(.)/; # untaint $&, $`, $', $+, $1.
528 check_taint_not $&, "\$& from /(.)/";
531 check_taint_not $&, "\$& from /(\\d)/";
532 check_taint_not $`, "\t\$`";
533 check_taint_not $', "\t\$'";
534 check_taint_not $+, "\t\$+";
535 check_taint_not $1, "\t\$1";
536 check_taint_not $2, "\t\$2";
538 /(.)/; # untaint $&, $`, $', $+, $1.
539 check_taint_not $&, "\$& from /(.)/";
542 check_taint_not $&, "\$& from /(\\D)/";
543 check_taint_not $`, "\t\$`";
544 check_taint_not $', "\t\$'";
545 check_taint_not $+, "\t\$+";
546 check_taint_not $1, "\t\$1";
547 check_taint_not $2, "\t\$2";
549 /(.)/; # untaint $&, $`, $', $+, $1.
550 check_taint_not $&, "\$& from /(.)/";
553 check_taint_not $&, "\$& from /([[:alnum:]])/";
554 check_taint_not $`, "\t\$`";
555 check_taint_not $', "\t\$'";
556 check_taint_not $+, "\t\$+";
557 check_taint_not $1, "\t\$1";
558 check_taint_not $2, "\t\$2";
560 /(.)/; # untaint $&, $`, $', $+, $1.
561 check_taint_not $&, "\$& from /(.)/";
564 check_taint_not $&, "\$& from /([[:^alnum:]])/";
565 check_taint_not $`, "\t\$`";
566 check_taint_not $', "\t\$'";
567 check_taint_not $+, "\t\$+";
568 check_taint_not $1, "\t\$1";
569 check_taint_not $2, "\t\$2";
572 check_taint_not $&, "\$& from /(a)|(\\w)/";
573 check_taint_not $`, "\t\$`";
574 check_taint_not $', "\t\$'";
575 check_taint_not $+, "\t\$+";
576 check_taint_not $1, "\t\$1";
577 ok($1 eq 'a', ("\t" x 5) . "\$1 is 'a'");
578 ok(! defined $2, ("\t" x 5) . "\$2 is undefined");
579 check_taint_not $2, "\t\$2";
580 check_taint_not $3, "\t\$3";
582 /(.)/; # untaint $&, $`, $', $+, $1.
583 check_taint_not $&, "\$& from /(.)/";
585 "\N{CYRILLIC SMALL LETTER A}" =~ /(\N{CYRILLIC CAPITAL LETTER A})/i;
586 check_taint_not $&, "\$& from /(\\N{CYRILLIC CAPITAL LETTER A})/i";
587 check_taint_not $`, "\t\$`";
588 check_taint_not $', "\t\$'";
589 check_taint_not $+, "\t\$+";
590 check_taint_not $1, "\t\$1";
591 ok($1 eq "\N{CYRILLIC SMALL LETTER A}", ("\t" x 4) . "\t\$1 is 'small cyrillic a'");
592 check_taint_not $2, "\t\$2";
594 /(.)/; # untaint $&, $`, $', $+, $1.
595 check_taint_not $&, "\$& from /./";
597 "(\N{KELVIN SIGN})" =~ /(\N{KELVIN SIGN})/i;
598 check_taint_not $&, "\$& from /(\\N{KELVIN SIGN})/i";
599 check_taint_not $`, "\t\$`";
600 check_taint_not $', "\t\$'";
601 check_taint_not $+, "\t\$+";
602 check_taint_not $1, "\t\$1";
603 check_taint_not $2, "\t\$2";
605 /(.)/; # untaint $&, $`, $', $+, $1.
606 check_taint_not $&, "\$& from /(.)/";
609 check_taint_not $&, "\$& from /(.)\\b(.)/";
610 check_taint_not $`, "\t\$`";
611 check_taint_not $', "\t\$'";
612 check_taint_not $+, "\t\$+";
613 check_taint_not $1, "\t\$1";
614 check_taint_not $2, "\t\$2";
615 check_taint_not $3, "\t\$3";
617 /(.)/; # untaint $&, $`, $', $+, $1.
618 check_taint_not $&, "\$& from /./";
621 check_taint_not $&, "\$& from /(.)\\B(.)/";
622 check_taint_not $`, "\t\$`";
623 check_taint_not $', "\t\$'";
624 check_taint_not $+, "\t\$+";
625 check_taint_not $1, "\t\$1";
626 check_taint_not $2, "\t\$2";
627 check_taint_not $3, "\t\$3";
629 /(.)/; # untaint $&, $`, $', $+, $1.
630 check_taint_not $&, "\$& from /./";
632 "aaa" =~ /(.).(\1)/i; # notaint because not locale dependent
633 check_taint_not $&, "\$ & from /(.).(\\1)/";
634 check_taint_not $`, "\t\$`";
635 check_taint_not $', "\t\$'";
636 check_taint_not $+, "\t\$+";
637 check_taint_not $1, "\t\$1";
638 check_taint_not $2, "\t\$2";
639 check_taint_not $3, "\t\$3";
641 /(.)/; # untaint $&, $`, $', $+, $1.
642 check_taint_not $&, "\$ & from /./";
644 $_ = $a; # untaint $_
646 check_taint_not $_, 'untainting $_ works';
649 check_taint_not $&, "\$ & from /(b)/";
650 check_taint_not $`, "\t\$`";
651 check_taint_not $', "\t\$'";
652 check_taint_not $+, "\t\$+";
653 check_taint_not $1, "\t\$1";
654 check_taint_not $2, "\t\$2";
656 $_ = $a; # untaint $_
658 check_taint_not $_, 'untainting $_ works';
661 check_taint_not $_, '$_ (wasn\'t tainted) from s/(.+)/b/';
662 check_taint_not $&, "\t\$&";
663 check_taint_not $`, "\t\$`";
664 check_taint_not $', "\t\$'";
665 check_taint_not $+, "\t\$+";
666 check_taint_not $1, "\t\$1";
667 check_taint_not $2, "\t\$2";
669 $b = $a; # untaint $b
671 ($b = $a) =~ s/\w/$&/;
672 check_taint_not $b, '$b from ($b = $a) =~ s/\w/$&/';
673 check_taint_not $a, '$a from ($b = $a) =~ s/\w/$&/';
675 $_ = $a; # untaint $_
678 check_taint_not $_, '$_ (wasn\'t tainted) from s/(\w)/\l$1/,'; # this must taint
679 check_taint_not $&, "\t\$&";
680 check_taint_not $`, "\t\$`";
681 check_taint_not $', "\t\$'";
682 check_taint_not $+, "\t\$+";
683 check_taint_not $1, "\t\$1";
684 check_taint_not $2, "\t\$2";
686 $_ = $a; # untaint $_
689 check_taint_not $_, '$_ (wasn\'t tainted) from s/(\w)/\L$1/,';
690 check_taint_not $&, "\t\$&";
691 check_taint_not $`, "\t\$`";
692 check_taint_not $', "\t\$'";
693 check_taint_not $+, "\t\$+";
694 check_taint_not $1, "\t\$1";
695 check_taint_not $2, "\t\$2";
697 $_ = $a; # untaint $_
700 check_taint_not $_, '$_ (wasn\'t tainted) from s/(\w)/\u$1/';
701 check_taint_not $&, "\t\$&";
702 check_taint_not $`, "\t\$`";
703 check_taint_not $', "\t\$'";
704 check_taint_not $+, "\t\$+";
705 check_taint_not $1, "\t\$1";
706 check_taint_not $2, "\t\$2";
708 $_ = $a; # untaint $_
711 check_taint_not $_, '$_ (wasn\'t tainted) from s/(\w)/\U$1/';
712 check_taint_not $&, "\t\$&";
713 check_taint_not $`, "\t\$`";
714 check_taint_not $', "\t\$'";
715 check_taint_not $+, "\t\$+";
716 check_taint_not $1, "\t\$1";
717 check_taint_not $2, "\t\$2";
719 # After all this tainting $a should be cool.
721 check_taint_not $a, '$a still not tainted';
724 check_taint_not $1, '"a" =~ /([a-z])/';
725 "foo.bar_baz" =~ /^(.*)[._](.*?)$/; # Bug 120675
726 check_taint_not $1, '"foo.bar_baz" =~ /^(.*)[._](.*?)$/';
730 # Here are in scope of 'use locale'
732 # I think we've seen quite enough of taint.
733 # Let us do some *real* locale work now,
734 # unless setlocale() is missing (i.e. minitest).
736 # The test number before our first setlocale()
737 my $final_without_setlocale = $test_num;
741 debug "Scanning for locales...\n";
743 require POSIX; import POSIX ':locale_h';
745 my @Locale = find_locales([ &POSIX::LC_CTYPE, &POSIX::LC_NUMERIC, &POSIX::LC_ALL ]);
753 print "1..$test_num\n";
758 setlocale(&POSIX::LC_ALL, "C");
764 my %Known_bad_locale; # Failed test for a locale known to be bad
766 my @Added_alpha; # Alphas that aren't in the C locale.
770 # This returns a display string denoting the input parameter @_, each
771 # entry of which is a single character in the range 0-255. The first part
772 # of the output is a string of the characters in @_ that are ASCII
773 # graphics, and hence unambiguously displayable. They are given by code
774 # point order. The second part is the remaining code points, the ordinals
775 # of which are each displayed as 2-digit hex. Blanks are inserted so as
776 # to keep anything from the first part looking like a 2-digit hex number.
779 my @chars = sort { ord $a <=> ord $b } @_;
783 push @chars, chr(258); # This sentinel simplifies the loop termination
785 foreach my $i (0 .. @chars - 1) {
786 my $char = $chars[$i];
790 # We avoid using [:posix:] classes, as these are being tested in this
791 # file. Each equivalence class below is for things that can appear in
792 # a range; those that can't be in a range have class -1. 0 for those
793 # which should be output in hex; and >0 for the other ranges
794 if ($char =~ /[A-Z]/) {
797 elsif ($char =~ /[a-z]/) {
800 elsif ($char =~ /[0-9]/) {
803 # Uncomment to get literal punctuation displayed instead of hex
804 #elsif ($char =~ /[[\]!"#\$\%&\'()*+,.\/:\\;<=>?\@\^_`{|}~-]/) {
805 # $class = -1; # Punct never appears in a range
808 $class = 0; # Output in hex
811 if (! defined $range_start) {
813 $output .= " " . $char;
816 $range_start = ord $char;
817 $start_class = $class;
819 } # A range ends if not consecutive, or the class-type changes
820 elsif (ord $char != ($range_end = ord($chars[$i-1])) + 1
821 || $class != $start_class)
824 # Here, the current character is not in the range. This means the
825 # previous character must have been. Output the range up through
827 my $range_length = $range_end - $range_start + 1;
828 if ($start_class > 0) {
829 $output .= " " . chr($range_start);
830 $output .= "-" . chr($range_end) if $range_length > 1;
833 $output .= sprintf(" %02X", $range_start);
834 $output .= sprintf("-%02X", $range_end) if $range_length > 1;
837 # Handle the new current character, as potentially beginning a new
851 # Displays the string unambiguously. ASCII printables are always output
852 # as-is, though perhaps separated by blanks from other characters. If
853 # entirely printable ASCII, just returns the string. Otherwise if valid
854 # UTF-8 it uses the character names for non-printable-ASCII. Otherwise it
855 # outputs hex for each non-ASCII-printable byte.
857 return $string if $string =~ / ^ [[:print:]]* $/xa;
860 my $prev_was_punct = 1; # Beginning is considered punct
861 if (utf8::valid($string) && utf8::is_utf8($string)) {
863 foreach my $char (split "", $string) {
865 # Keep punctuation adjacent to other characters; otherwise
866 # separate them with a blank
867 if ($char =~ /[[:punct:]]/a) {
871 elsif ($char =~ /[[:print:]]/a) {
872 $result .= " " unless $prev_was_punct;
877 $result .= " " unless $prev_was_punct;
878 my $name = charnames::viacode(ord $char);
879 $result .= (defined $name) ? $name : ':unknown:';
886 foreach my $char (split "", $string) {
887 if ($char =~ /[[:punct:]]/a) {
891 elsif ($char =~ /[[:print:]]/a) {
892 $result .= " " unless $prev_was_punct;
897 $result .= " " unless $prev_was_punct;
898 $result .= sprintf("%02X", ord $char);
908 my ($Locale, $i, $pass_fail, $message) = @_;
910 push @{$Okay{$i}}, $Locale;
914 $message = " ($message)" if $message;
915 $Known_bad_locale{$i}{$Locale} = 1 if exists $known_bad_locales{$^O}
916 && $Locale =~ $known_bad_locales{$^O};
917 $Problem{$i}{$Locale} = 1;
918 debug "failed $i ($test_names{$i}) with locale '$Locale'$message\n";
922 sub report_multi_result {
923 my ($Locale, $i, $results_ref) = @_;
925 # $results_ref points to an array, each element of which is a character that was
926 # in error for this test numbered '$i'. If empty, the test passed
930 $message = join " ", "for", disp_chars(@$results_ref);
932 report_result($Locale, $i, @$results_ref == 0, $message);
935 my $first_locales_test_number = $final_without_setlocale + 1;
936 my $locales_test_number;
937 my $not_necessarily_a_problem_test_number;
938 my $first_casing_test_number;
939 my %setlocale_failed; # List of locales that setlocale() didn't work on
941 foreach my $Locale (@Locale) {
942 $locales_test_number = $first_locales_test_number - 1;
944 debug "Locale = $Locale\n";
946 unless (setlocale(&POSIX::LC_ALL, $Locale)) {
947 $setlocale_failed{$Locale} = $Locale;
951 # We test UTF-8 locales only under ':not_characters'; It is easier to
952 # test them in other test files than here. Non- UTF-8 locales are tested
953 # only under plain 'use locale', as otherwise we would have to convert
954 # everything in them to Unicode.
956 my %UPPER = (); # All alpha X for which uc(X) == X and lc(X) != X
957 my %lower = (); # All alpha X for which lc(X) == X and uc(X) != X
958 my %BoThCaSe = (); # All alpha X for which uc(X) == lc(X) == X
960 my $is_utf8_locale = is_locale_utf8($Locale);
962 debug "is utf8 locale? = $is_utf8_locale\n";
964 debug "radix = " . disp_str(localeconv()->{decimal_point}) . "\n";
966 if (! $is_utf8_locale) {
968 @{$posixes{'word'}} = grep /\w/, map { chr } 0..255;
969 @{$posixes{'digit'}} = grep /\d/, map { chr } 0..255;
970 @{$posixes{'space'}} = grep /\s/, map { chr } 0..255;
971 @{$posixes{'alpha'}} = grep /[[:alpha:]]/, map {chr } 0..255;
972 @{$posixes{'alnum'}} = grep /[[:alnum:]]/, map {chr } 0..255;
973 @{$posixes{'ascii'}} = grep /[[:ascii:]]/, map {chr } 0..255;
974 @{$posixes{'blank'}} = grep /[[:blank:]]/, map {chr } 0..255;
975 @{$posixes{'cntrl'}} = grep /[[:cntrl:]]/, map {chr } 0..255;
976 @{$posixes{'graph'}} = grep /[[:graph:]]/, map {chr } 0..255;
977 @{$posixes{'lower'}} = grep /[[:lower:]]/, map {chr } 0..255;
978 @{$posixes{'print'}} = grep /[[:print:]]/, map {chr } 0..255;
979 @{$posixes{'punct'}} = grep /[[:punct:]]/, map {chr } 0..255;
980 @{$posixes{'upper'}} = grep /[[:upper:]]/, map {chr } 0..255;
981 @{$posixes{'xdigit'}} = grep /[[:xdigit:]]/, map {chr } 0..255;
982 @{$posixes{'cased'}} = grep /[[:upper:][:lower:]]/i, map {chr } 0..255;
984 # Sieve the uppercase and the lowercase.
986 for (@{$posixes{'word'}}) {
987 if (/[^\d_]/) { # skip digits and the _
998 use locale ':not_characters';
999 @{$posixes{'word'}} = grep /\w/, map { chr } 0..255;
1000 @{$posixes{'digit'}} = grep /\d/, map { chr } 0..255;
1001 @{$posixes{'space'}} = grep /\s/, map { chr } 0..255;
1002 @{$posixes{'alpha'}} = grep /[[:alpha:]]/, map {chr } 0..255;
1003 @{$posixes{'alnum'}} = grep /[[:alnum:]]/, map {chr } 0..255;
1004 @{$posixes{'ascii'}} = grep /[[:ascii:]]/, map {chr } 0..255;
1005 @{$posixes{'blank'}} = grep /[[:blank:]]/, map {chr } 0..255;
1006 @{$posixes{'cntrl'}} = grep /[[:cntrl:]]/, map {chr } 0..255;
1007 @{$posixes{'graph'}} = grep /[[:graph:]]/, map {chr } 0..255;
1008 @{$posixes{'lower'}} = grep /[[:lower:]]/, map {chr } 0..255;
1009 @{$posixes{'print'}} = grep /[[:print:]]/, map {chr } 0..255;
1010 @{$posixes{'punct'}} = grep /[[:punct:]]/, map {chr } 0..255;
1011 @{$posixes{'upper'}} = grep /[[:upper:]]/, map {chr } 0..255;
1012 @{$posixes{'xdigit'}} = grep /[[:xdigit:]]/, map {chr } 0..255;
1013 @{$posixes{'cased'}} = grep /[[:upper:][:lower:]]/i, map {chr } 0..255;
1014 for (@{$posixes{'word'}}) {
1015 if (/[^\d_]/) { # skip digits and the _
1026 # Ordered, where possible, in groups of "this is a subset of the next
1028 debug ":upper: = ", disp_chars(@{$posixes{'upper'}}), "\n";
1029 debug ":lower: = ", disp_chars(@{$posixes{'lower'}}), "\n";
1030 debug ":cased: = ", disp_chars(@{$posixes{'cased'}}), "\n";
1031 debug ":alpha: = ", disp_chars(@{$posixes{'alpha'}}), "\n";
1032 debug ":alnum: = ", disp_chars(@{$posixes{'alnum'}}), "\n";
1033 debug ' \w = ', disp_chars(@{$posixes{'word'}}), "\n";
1034 debug ":graph: = ", disp_chars(@{$posixes{'graph'}}), "\n";
1035 debug ":print: = ", disp_chars(@{$posixes{'print'}}), "\n";
1036 debug ' \d = ', disp_chars(@{$posixes{'digit'}}), "\n";
1037 debug ":xdigit: = ", disp_chars(@{$posixes{'xdigit'}}), "\n";
1038 debug ":blank: = ", disp_chars(@{$posixes{'blank'}}), "\n";
1039 debug ' \s = ', disp_chars(@{$posixes{'space'}}), "\n";
1040 debug ":punct: = ", disp_chars(@{$posixes{'punct'}}), "\n";
1041 debug ":cntrl: = ", disp_chars(@{$posixes{'cntrl'}}), "\n";
1042 debug ":ascii: = ", disp_chars(@{$posixes{'ascii'}}), "\n";
1044 foreach (keys %UPPER) {
1046 $BoThCaSe{$_}++ if exists $lower{$_};
1048 foreach (keys %lower) {
1049 $BoThCaSe{$_}++ if exists $UPPER{$_};
1051 foreach (keys %BoThCaSe) {
1057 foreach my $ord ( 0 .. 255 ) {
1058 $Unassigned{chr $ord} = 1;
1060 foreach my $class (keys %posixes) {
1061 foreach my $char (@{$posixes{$class}}) {
1062 delete $Unassigned{$char};
1066 debug "UPPER = ", disp_chars(sort { ord $a <=> ord $b } keys %UPPER), "\n";
1067 debug "lower = ", disp_chars(sort { ord $a <=> ord $b } keys %lower), "\n";
1068 debug "BoThCaSe = ", disp_chars(sort { ord $a <=> ord $b } keys %BoThCaSe), "\n";
1069 debug "Unassigned = ", disp_chars(sort { ord $a <=> ord $b } keys %Unassigned), "\n";
1073 foreach my $x (sort { ord $a <=> ord $b } keys %UPPER) {
1076 if ($is_utf8_locale) {
1077 use locale ':not_characters';
1078 $ok = $x =~ /[[:upper:]]/;
1079 $fold_ok = $x =~ /[[:lower:]]/i;
1083 $ok = $x =~ /[[:upper:]]/;
1084 $fold_ok = $x =~ /[[:lower:]]/i;
1086 push @failures, $x unless $ok;
1087 push @fold_failures, $x unless $fold_ok;
1089 $locales_test_number++;
1090 $first_casing_test_number = $locales_test_number;
1091 $test_names{$locales_test_number} = 'Verify that /[[:upper:]]/ matches all alpha X for which uc(X) == X and lc(X) != X';
1092 report_multi_result($Locale, $locales_test_number, \@failures);
1094 $locales_test_number++;
1096 $test_names{$locales_test_number} = 'Verify that /[[:lower:]]/i matches all alpha X for which uc(X) == X and lc(X) != X';
1097 report_multi_result($Locale, $locales_test_number, \@fold_failures);
1100 undef @fold_failures;
1102 foreach my $x (sort { ord $a <=> ord $b } keys %lower) {
1105 if ($is_utf8_locale) {
1106 use locale ':not_characters';
1107 $ok = $x =~ /[[:lower:]]/;
1108 $fold_ok = $x =~ /[[:upper:]]/i;
1112 $ok = $x =~ /[[:lower:]]/;
1113 $fold_ok = $x =~ /[[:upper:]]/i;
1115 push @failures, $x unless $ok;
1116 push @fold_failures, $x unless $fold_ok;
1119 $locales_test_number++;
1120 $test_names{$locales_test_number} = 'Verify that /[[:lower:]]/ matches all alpha X for which lc(X) == X and uc(X) != X';
1121 report_multi_result($Locale, $locales_test_number, \@failures);
1123 $locales_test_number++;
1124 $test_names{$locales_test_number} = 'Verify that /[[:upper:]]/i matches all alpha X for which lc(X) == X and uc(X) != X';
1125 report_multi_result($Locale, $locales_test_number, \@fold_failures);
1127 { # Find the alphabetic characters that are not considered alphabetics
1128 # in the default (C) locale.
1133 for (keys %UPPER, keys %lower, keys %BoThCaSe) {
1134 push(@Added_alpha, $_) if (/\W/);
1138 @Added_alpha = sort { ord $a <=> ord $b } @Added_alpha;
1140 debug "Added_alpha = ", disp_chars(@Added_alpha), "\n";
1142 # Cross-check the whole 8-bit character set.
1144 ++$locales_test_number;
1146 $test_names{$locales_test_number} = 'Verify that \w and [:word:] are identical';
1147 for (map { chr } 0..255) {
1148 if ($is_utf8_locale) {
1149 use locale ':not_characters';
1150 push @f, $_ unless /[[:word:]]/ == /\w/;
1153 push @f, $_ unless /[[:word:]]/ == /\w/;
1156 report_multi_result($Locale, $locales_test_number, \@f);
1158 ++$locales_test_number;
1160 $test_names{$locales_test_number} = 'Verify that \d and [:digit:] are identical';
1161 for (map { chr } 0..255) {
1162 if ($is_utf8_locale) {
1163 use locale ':not_characters';
1164 push @f, $_ unless /[[:digit:]]/ == /\d/;
1167 push @f, $_ unless /[[:digit:]]/ == /\d/;
1170 report_multi_result($Locale, $locales_test_number, \@f);
1172 ++$locales_test_number;
1174 $test_names{$locales_test_number} = 'Verify that \s and [:space:] are identical';
1175 for (map { chr } 0..255) {
1176 if ($is_utf8_locale) {
1177 use locale ':not_characters';
1178 push @f, $_ unless /[[:space:]]/ == /\s/;
1181 push @f, $_ unless /[[:space:]]/ == /\s/;
1184 report_multi_result($Locale, $locales_test_number, \@f);
1186 ++$locales_test_number;
1188 $test_names{$locales_test_number} = 'Verify that [:posix:] and [:^posix:] are mutually exclusive';
1189 for (map { chr } 0..255) {
1190 if ($is_utf8_locale) {
1191 use locale ':not_characters';
1192 push @f, $_ unless (/[[:alpha:]]/ xor /[[:^alpha:]]/) ||
1193 (/[[:alnum:]]/ xor /[[:^alnum:]]/) ||
1194 (/[[:ascii:]]/ xor /[[:^ascii:]]/) ||
1195 (/[[:blank:]]/ xor /[[:^blank:]]/) ||
1196 (/[[:cntrl:]]/ xor /[[:^cntrl:]]/) ||
1197 (/[[:digit:]]/ xor /[[:^digit:]]/) ||
1198 (/[[:graph:]]/ xor /[[:^graph:]]/) ||
1199 (/[[:lower:]]/ xor /[[:^lower:]]/) ||
1200 (/[[:print:]]/ xor /[[:^print:]]/) ||
1201 (/[[:space:]]/ xor /[[:^space:]]/) ||
1202 (/[[:upper:]]/ xor /[[:^upper:]]/) ||
1203 (/[[:word:]]/ xor /[[:^word:]]/) ||
1204 (/[[:xdigit:]]/ xor /[[:^xdigit:]]/) ||
1206 # effectively is what [:cased:] would be if it existed.
1207 (/[[:upper:][:lower:]]/i xor /[^[:upper:][:lower:]]/i);
1210 push @f, $_ unless (/[[:alpha:]]/ xor /[[:^alpha:]]/) ||
1211 (/[[:alnum:]]/ xor /[[:^alnum:]]/) ||
1212 (/[[:ascii:]]/ xor /[[:^ascii:]]/) ||
1213 (/[[:blank:]]/ xor /[[:^blank:]]/) ||
1214 (/[[:cntrl:]]/ xor /[[:^cntrl:]]/) ||
1215 (/[[:digit:]]/ xor /[[:^digit:]]/) ||
1216 (/[[:graph:]]/ xor /[[:^graph:]]/) ||
1217 (/[[:lower:]]/ xor /[[:^lower:]]/) ||
1218 (/[[:print:]]/ xor /[[:^print:]]/) ||
1219 (/[[:space:]]/ xor /[[:^space:]]/) ||
1220 (/[[:upper:]]/ xor /[[:^upper:]]/) ||
1221 (/[[:word:]]/ xor /[[:^word:]]/) ||
1222 (/[[:xdigit:]]/ xor /[[:^xdigit:]]/) ||
1223 (/[[:upper:][:lower:]]/i xor /[^[:upper:][:lower:]]/i);
1226 report_multi_result($Locale, $locales_test_number, \@f);
1228 # The rules for the relationships are given in:
1229 # http://www.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap07.html
1232 ++$locales_test_number;
1234 $test_names{$locales_test_number} = 'Verify that [:lower:] contains at least a-z';
1236 if ($is_utf8_locale) {
1237 use locale ':not_characters';
1238 push @f, $_ unless /[[:lower:]]/;
1241 push @f, $_ unless /[[:lower:]]/;
1244 report_multi_result($Locale, $locales_test_number, \@f);
1246 ++$locales_test_number;
1248 $test_names{$locales_test_number} = 'Verify that [:lower:] is a subset of [:alpha:]';
1249 for (map { chr } 0..255) {
1250 if ($is_utf8_locale) {
1251 use locale ':not_characters';
1252 push @f, $_ if /[[:lower:]]/ and ! /[[:alpha:]]/;
1255 push @f, $_ if /[[:lower:]]/ and ! /[[:alpha:]]/;
1258 report_multi_result($Locale, $locales_test_number, \@f);
1260 ++$locales_test_number;
1262 $test_names{$locales_test_number} = 'Verify that [:upper:] contains at least A-Z';
1264 if ($is_utf8_locale) {
1265 use locale ':not_characters';
1266 push @f, $_ unless /[[:upper:]]/;
1269 push @f, $_ unless /[[:upper:]]/;
1272 report_multi_result($Locale, $locales_test_number, \@f);
1274 ++$locales_test_number;
1276 $test_names{$locales_test_number} = 'Verify that [:upper:] is a subset of [:alpha:]';
1277 for (map { chr } 0..255) {
1278 if ($is_utf8_locale) {
1279 use locale ':not_characters';
1280 push @f, $_ if /[[:upper:]]/ and ! /[[:alpha:]]/;
1283 push @f, $_ if /[[:upper:]]/ and ! /[[:alpha:]]/;
1286 report_multi_result($Locale, $locales_test_number, \@f);
1288 ++$locales_test_number;
1290 $test_names{$locales_test_number} = 'Verify that /[[:lower:]]/i is a subset of [:alpha:]';
1291 for (map { chr } 0..255) {
1292 if ($is_utf8_locale) {
1293 use locale ':not_characters';
1294 push @f, $_ if /[[:lower:]]/i and ! /[[:alpha:]]/;
1297 push @f, $_ if /[[:lower:]]/i and ! /[[:alpha:]]/;
1300 report_multi_result($Locale, $locales_test_number, \@f);
1302 ++$locales_test_number;
1304 $test_names{$locales_test_number} = 'Verify that [:alpha:] is a subset of [:alnum:]';
1305 for (map { chr } 0..255) {
1306 if ($is_utf8_locale) {
1307 use locale ':not_characters';
1308 push @f, $_ if /[[:alpha:]]/ and ! /[[:alnum:]]/;
1311 push @f, $_ if /[[:alpha:]]/ and ! /[[:alnum:]]/;
1314 report_multi_result($Locale, $locales_test_number, \@f);
1316 ++$locales_test_number;
1318 $test_names{$locales_test_number} = 'Verify that [:digit:] contains at least 0-9';
1320 if ($is_utf8_locale) {
1321 use locale ':not_characters';
1322 push @f, $_ unless /[[:digit:]]/;
1325 push @f, $_ unless /[[:digit:]]/;
1328 report_multi_result($Locale, $locales_test_number, \@f);
1330 ++$locales_test_number;
1332 $test_names{$locales_test_number} = 'Verify that [:digit:] is a subset of [:alnum:]';
1333 for (map { chr } 0..255) {
1334 if ($is_utf8_locale) {
1335 use locale ':not_characters';
1336 push @f, $_ if /[[:digit:]]/ and ! /[[:alnum:]]/;
1339 push @f, $_ if /[[:digit:]]/ and ! /[[:alnum:]]/;
1342 report_multi_result($Locale, $locales_test_number, \@f);
1344 ++$locales_test_number;
1346 $test_names{$locales_test_number} = 'Verify that [:digit:] matches either 10 or 20 code points';
1347 report_result($Locale, $locales_test_number, @{$posixes{'digit'}} == 10 || @{$posixes{'digit'}} == 20);
1349 ++$locales_test_number;
1351 $test_names{$locales_test_number} = 'Verify that if there is a second set of digits in [:digit:], they are consecutive';
1352 if (@{$posixes{'digit'}} == 20) {
1354 for (map { chr } 0..255) {
1355 next unless /[[:digit:]]/;
1357 if (defined $previous_ord) {
1358 if ($is_utf8_locale) {
1359 use locale ':not_characters';
1360 push @f, $_ if ord $_ != $previous_ord + 1;
1363 push @f, $_ if ord $_ != $previous_ord + 1;
1366 $previous_ord = ord $_;
1369 report_multi_result($Locale, $locales_test_number, \@f);
1371 ++$locales_test_number;
1373 my @xdigit_digits; # :digit: & :xdigit:
1374 $test_names{$locales_test_number} = 'Verify that [:xdigit:] contains one or two blocks of 10 consecutive [:digit:] chars';
1375 for (map { chr } 0..255) {
1376 if ($is_utf8_locale) {
1377 use locale ':not_characters';
1378 # For utf8 locales, we actually use a stricter test: that :digit:
1379 # is a subset of :xdigit:, as we know that only 0-9 should match
1380 push @f, $_ if /[[:digit:]]/ and ! /[[:xdigit:]]/;
1383 push @xdigit_digits, $_ if /[[:digit:]]/ and /[[:xdigit:]]/;
1386 if (! $is_utf8_locale) {
1388 # For non-utf8 locales, @xdigit_digits is a list of the characters
1389 # that are both :xdigit: and :digit:. Because :digit: is stored in
1390 # increasing code point order (unless the tests above failed),
1391 # @xdigit_digits is as well. There should be exactly 10 or
1393 if (@xdigit_digits != 10 && @xdigit_digits != 20) {
1394 @f = @xdigit_digits;
1398 # Look for contiguity in the series, adding any wrong ones to @f
1399 my @temp = @xdigit_digits;
1401 push @f, $temp[1] if ($temp[0] != $temp[1] - 1)
1403 # Skip this test for the 0th character of
1404 # the second block of 10, as it won't be
1405 # contiguous with the previous block
1406 && (! defined $xdigit_digits[10]
1407 || $temp[1] != $xdigit_digits[10]);
1413 report_multi_result($Locale, $locales_test_number, \@f);
1415 ++$locales_test_number;
1417 $test_names{$locales_test_number} = 'Verify that [:xdigit:] contains at least A-F, a-f';
1418 for ('A' .. 'F', 'a' .. 'f') {
1419 if ($is_utf8_locale) {
1420 use locale ':not_characters';
1421 push @f, $_ unless /[[:xdigit:]]/;
1424 push @f, $_ unless /[[:xdigit:]]/;
1427 report_multi_result($Locale, $locales_test_number, \@f);
1429 ++$locales_test_number;
1431 $test_names{$locales_test_number} = 'Verify that any additional members of [:xdigit:], are in groups of 6 consecutive code points';
1434 for my $chr (map { chr } 0..255) {
1435 next unless $chr =~ /[[:xdigit:]]/;
1436 if ($is_utf8_locale) {
1437 next if $chr =~ /[[:digit:]]/;
1440 next if grep { $chr eq $_ } @xdigit_digits;
1442 next if $chr =~ /[A-Fa-f]/;
1443 if (defined $previous_ord) {
1444 if ($is_utf8_locale) {
1445 use locale ':not_characters';
1446 push @f, $chr if ord $chr != $previous_ord + 1;
1449 push @f, $chr if ord $chr != $previous_ord + 1;
1454 undef $previous_ord;
1457 $previous_ord = ord $chr;
1460 report_multi_result($Locale, $locales_test_number, \@f);
1462 ++$locales_test_number;
1464 $test_names{$locales_test_number} = 'Verify that [:xdigit:] is a subset of [:graph:]';
1465 for (map { chr } 0..255) {
1466 if ($is_utf8_locale) {
1467 use locale ':not_characters';
1468 push @f, $_ if /[[:xdigit:]]/ and ! /[[:graph:]]/;
1471 push @f, $_ if /[[:xdigit:]]/ and ! /[[:graph:]]/;
1474 report_multi_result($Locale, $locales_test_number, \@f);
1476 # Note that xdigit doesn't have to be a subset of alnum
1478 ++$locales_test_number;
1480 $test_names{$locales_test_number} = 'Verify that [:punct:] is a subset of [:graph:]';
1481 for (map { chr } 0..255) {
1482 if ($is_utf8_locale) {
1483 use locale ':not_characters';
1484 push @f, $_ if /[[:punct:]]/ and ! /[[:graph:]]/;
1487 push @f, $_ if /[[:punct:]]/ and ! /[[:graph:]]/;
1490 report_multi_result($Locale, $locales_test_number, \@f);
1492 ++$locales_test_number;
1494 $test_names{$locales_test_number} = 'Verify that the space character is not in [:graph:]';
1495 if ($is_utf8_locale) {
1496 use locale ':not_characters';
1497 push @f, " " if " " =~ /[[:graph:]]/;
1500 push @f, " " if " " =~ /[[:graph:]]/;
1502 report_multi_result($Locale, $locales_test_number, \@f);
1504 ++$locales_test_number;
1506 $test_names{$locales_test_number} = 'Verify that [:space:] contains at least [\f\n\r\t\cK ]';
1507 for (' ', "\f", "\n", "\r", "\t", "\cK") {
1508 if ($is_utf8_locale) {
1509 use locale ':not_characters';
1510 push @f, $_ unless /[[:space:]]/;
1513 push @f, $_ unless /[[:space:]]/;
1516 report_multi_result($Locale, $locales_test_number, \@f);
1518 ++$locales_test_number;
1520 $test_names{$locales_test_number} = 'Verify that [:blank:] contains at least [\t ]';
1522 if ($is_utf8_locale) {
1523 use locale ':not_characters';
1524 push @f, $_ unless /[[:blank:]]/;
1527 push @f, $_ unless /[[:blank:]]/;
1530 report_multi_result($Locale, $locales_test_number, \@f);
1532 ++$locales_test_number;
1534 $test_names{$locales_test_number} = 'Verify that [:blank:] is a subset of [:space:]';
1535 for (map { chr } 0..255) {
1536 if ($is_utf8_locale) {
1537 use locale ':not_characters';
1538 push @f, $_ if /[[:blank:]]/ and ! /[[:space:]]/;
1541 push @f, $_ if /[[:blank:]]/ and ! /[[:space:]]/;
1544 report_multi_result($Locale, $locales_test_number, \@f);
1546 ++$locales_test_number;
1548 $test_names{$locales_test_number} = 'Verify that [:graph:] is a subset of [:print:]';
1549 for (map { chr } 0..255) {
1550 if ($is_utf8_locale) {
1551 use locale ':not_characters';
1552 push @f, $_ if /[[:graph:]]/ and ! /[[:print:]]/;
1555 push @f, $_ if /[[:graph:]]/ and ! /[[:print:]]/;
1558 report_multi_result($Locale, $locales_test_number, \@f);
1560 ++$locales_test_number;
1562 $test_names{$locales_test_number} = 'Verify that the space character is in [:print:]';
1563 if ($is_utf8_locale) {
1564 use locale ':not_characters';
1565 push @f, " " if " " !~ /[[:print:]]/;
1568 push @f, " " if " " !~ /[[:print:]]/;
1570 report_multi_result($Locale, $locales_test_number, \@f);
1572 ++$locales_test_number;
1574 $test_names{$locales_test_number} = 'Verify that isn\'t both [:cntrl:] and [:print:]';
1575 for (map { chr } 0..255) {
1576 if ($is_utf8_locale) {
1577 use locale ':not_characters';
1578 push @f, $_ if (/[[:print:]]/ and /[[:cntrl:]]/);
1581 push @f, $_ if (/[[:print:]]/ and /[[:cntrl:]]/);
1584 report_multi_result($Locale, $locales_test_number, \@f);
1586 ++$locales_test_number;
1588 $test_names{$locales_test_number} = 'Verify that isn\'t both [:alpha:] and [:digit:]';
1589 for (map { chr } 0..255) {
1590 if ($is_utf8_locale) {
1591 use locale ':not_characters';
1592 push @f, $_ if /[[:alpha:]]/ and /[[:digit:]]/;
1595 push @f, $_ if /[[:alpha:]]/ and /[[:digit:]]/;
1598 report_multi_result($Locale, $locales_test_number, \@f);
1600 ++$locales_test_number;
1602 $test_names{$locales_test_number} = 'Verify that isn\'t both [:alnum:] and [:punct:]';
1603 for (map { chr } 0..255) {
1604 if ($is_utf8_locale) {
1605 use locale ':not_characters';
1606 push @f, $_ if /[[:alnum:]]/ and /[[:punct:]]/;
1609 push @f, $_ if /[[:alnum:]]/ and /[[:punct:]]/;
1612 report_multi_result($Locale, $locales_test_number, \@f);
1614 ++$locales_test_number;
1616 $test_names{$locales_test_number} = 'Verify that isn\'t both [:xdigit:] and [:punct:]';
1617 for (map { chr } 0..255) {
1618 if ($is_utf8_locale) {
1619 use locale ':not_characters';
1620 push @f, $_ if (/[[:punct:]]/ and /[[:xdigit:]]/);
1623 push @f, $_ if (/[[:punct:]]/ and /[[:xdigit:]]/);
1626 report_multi_result($Locale, $locales_test_number, \@f);
1628 ++$locales_test_number;
1630 $test_names{$locales_test_number} = 'Verify that isn\'t both [:graph:] and [:space:]';
1631 for (map { chr } 0..255) {
1632 if ($is_utf8_locale) {
1633 use locale ':not_characters';
1634 push @f, $_ if (/[[:graph:]]/ and /[[:space:]]/);
1637 push @f, $_ if (/[[:graph:]]/ and /[[:space:]]/);
1640 report_multi_result($Locale, $locales_test_number, \@f);
1642 foreach ($first_casing_test_number..$locales_test_number) {
1643 $problematical_tests{$_} = 1;
1647 # Test for read-only scalars' locale vs non-locale comparisons.
1653 if ($is_utf8_locale) {
1654 use locale ':not_characters';
1655 $ok = ($a cmp "qwerty") == 0;
1659 $ok = ($a cmp "qwerty") == 0;
1661 report_result($Locale, ++$locales_test_number, $ok);
1662 $test_names{$locales_test_number} = 'Verify that cmp works with a read-only scalar; no- vs locale';
1666 my ($from, $to, $lesser, $greater,
1667 @test, %test, $test, $yes, $no, $sign);
1669 ++$locales_test_number;
1670 $test_names{$locales_test_number} = 'Verify that "le", "ne", etc work';
1671 $not_necessarily_a_problem_test_number = $locales_test_number;
1674 $from = int(($_*@{$posixes{'word'}})/10);
1675 $to = $from + int(@{$posixes{'word'}}/10);
1676 $to = $#{$posixes{'word'}} if ($to > $#{$posixes{'word'}});
1677 $lesser = join('', @{$posixes{'word'}}[$from..$to]);
1678 # Select a slice one character on.
1680 $to = $#{$posixes{'word'}} if ($to > $#{$posixes{'word'}});
1681 $greater = join('', @{$posixes{'word'}}[$from..$to]);
1682 if ($is_utf8_locale) {
1683 use locale ':not_characters';
1684 ($yes, $no, $sign) = ($lesser lt $greater
1686 : ("not ", " ", -1));
1690 ($yes, $no, $sign) = ($lesser lt $greater
1692 : ("not ", " ", -1));
1694 # all these tests should FAIL (return 0). Exact lt or gt cannot
1695 # be tested because in some locales, say, eacute and E may test
1699 $no.' ($lesser le $greater)', # 1
1700 'not ($lesser ne $greater)', # 2
1701 ' ($lesser eq $greater)', # 3
1702 $yes.' ($lesser ge $greater)', # 4
1703 $yes.' ($lesser ge $greater)', # 5
1704 $yes.' ($greater le $lesser )', # 7
1705 'not ($greater ne $lesser )', # 8
1706 ' ($greater eq $lesser )', # 9
1707 $no.' ($greater ge $lesser )', # 10
1708 'not (($lesser cmp $greater) == -($sign))' # 11
1710 @test{@test} = 0 x @test;
1712 for my $ti (@test) {
1713 if ($is_utf8_locale) {
1714 use locale ':not_characters';
1715 $test{$ti} = eval $ti;
1718 # Already in 'use locale';
1719 $test{$ti} = eval $ti;
1721 $test ||= $test{$ti}
1723 report_result($Locale, $locales_test_number, $test == 0);
1725 debug "lesser = '$lesser'\n";
1726 debug "greater = '$greater'\n";
1727 debug "lesser cmp greater = ",
1728 $lesser cmp $greater, "\n";
1729 debug "greater cmp lesser = ",
1730 $greater cmp $lesser, "\n";
1731 debug "(greater) from = $from, to = $to\n";
1732 for my $ti (@test) {
1733 debugf("# %-40s %-4s", $ti,
1734 $test{$ti} ? 'FAIL' : 'ok');
1735 if ($ti =~ /\(\.*(\$.+ +cmp +\$[^\)]+)\.*\)/) {
1736 debugf("(%s == %4d)", $1, eval $1);
1747 my @sorted_controls = sort @{$posixes{'cntrl'}};
1749 for my $control (@sorted_controls) {
1750 $output .= " " . disp_chars($control);
1752 debug "sorted :cntrl: = $output\n";
1754 ++$locales_test_number;
1755 $test_names{$locales_test_number}
1756 = 'Verify that \0 sorts before any other control';
1757 my $ok = $sorted_controls[0] eq "\0";
1758 report_result($Locale, $locales_test_number, $ok);
1759 shift @sorted_controls;
1760 my $lowest_control = $sorted_controls[0];
1762 ++$locales_test_number;
1763 $test_names{$locales_test_number}
1764 = 'Skip in locales where all controls have primary sorting weight; '
1765 . 'otherwise verify that \0 doesn\'t have primary sorting weight';
1766 if ("a${lowest_control}c" lt "ab") {
1767 report_result($Locale, $locales_test_number, 1);
1770 my $ok = "ab" lt "a\0c";
1771 report_result($Locale, $locales_test_number, $ok);
1774 ++$locales_test_number;
1775 $test_names{$locales_test_number}
1776 = 'Verify that strings with embedded NUL collate';
1777 $ok = "a\0a\0a" lt "a${lowest_control}a${lowest_control}a";
1778 report_result($Locale, $locales_test_number, $ok);
1780 ++$locales_test_number;
1781 $test_names{$locales_test_number}
1782 = 'Verify that strings with embedded NUL and '
1783 . 'extra trailing NUL collate';
1784 $ok = "a\0a\0" lt "a${lowest_control}a${lowest_control}";
1785 report_result($Locale, $locales_test_number, $ok);
1787 ++$locales_test_number;
1788 $test_names{$locales_test_number}
1789 = 'Verify that empty strings collate';
1791 report_result($Locale, $locales_test_number, $ok);
1793 ++$locales_test_number;
1794 $test_names{$locales_test_number}
1795 = "Skip in non-UTF-8 locales; otherwise verify that UTF8ness "
1796 . "doesn't matter with collation";
1797 if (! $is_utf8_locale) {
1798 report_result($Locale, $locales_test_number, 1);
1802 # khw can't think of anything better. Start with a string that is
1803 # higher than its UTF-8 representation in both EBCDIC and ASCII
1804 my $string = chr utf8::unicode_to_native(0xff);
1805 my $utf8_string = $string;
1806 utf8::upgrade($utf8_string);
1808 # 8 should be lt 9 in all locales (except ones that aren't
1809 # ASCII-based, which might fail this)
1810 $ok = ("a${string}8") lt ("a${utf8_string}9");
1811 report_result($Locale, $locales_test_number, $ok);
1814 ++$locales_test_number;
1815 $test_names{$locales_test_number}
1816 = "Skip in UTF-8 locales; otherwise verify that single byte "
1817 . "collates before 0x100 and above";
1818 if ($is_utf8_locale) {
1819 report_result($Locale, $locales_test_number, 1);
1822 my $max_collating = chr 0; # Find byte that collates highest
1823 for my $i (0 .. 255) {
1825 $max_collating = $char if $char gt $max_collating;
1827 $ok = $max_collating lt chr 0x100;
1828 report_result($Locale, $locales_test_number, $ok);
1831 ++$locales_test_number;
1832 $test_names{$locales_test_number}
1833 = "Skip in UTF-8 locales; otherwise verify that 0x100 and "
1834 . "above collate in code point order";
1835 if ($is_utf8_locale) {
1836 report_result($Locale, $locales_test_number, 1);
1839 $ok = chr 0x100 lt chr 0x101;
1840 report_result($Locale, $locales_test_number, $ok);
1876 if (! $is_utf8_locale) {
1879 my ($x, $y) = (1.23, 1.23);
1882 printf ''; # printf used to reset locale to "C"
1887 my $z = sprintf ''; # sprintf used to reset locale to "C"
1894 local $SIG{__WARN__} =
1900 # The == (among other ops) used to warn for locales
1901 # that had something else than "." as the radix character.
1925 $ok12 = abs(($f + $g) - 3.57) < 0.01;
1927 $ok14 = $ok14_5 = $ok15 = $ok16 = 1; # Skip for non-utf8 locales
1931 $ok17 = "1.5:1.25" eq sprintf("%g:%g", $h, $i);
1933 $ok18 = $j eq sprintf("%g:%g", $h, $i);
1936 use locale ':not_characters';
1938 my ($x, $y) = (1.23, 1.23);
1940 printf ''; # printf used to reset locale to "C"
1945 my $z = sprintf ''; # sprintf used to reset locale to "C"
1951 local $SIG{__WARN__} =
1977 $ok12 = abs(($f + $g) - 3.57) < 0.01;
1980 # Look for non-ASCII error messages, and verify that the first
1981 # such is in UTF-8 (the others almost certainly will be like the
1982 # first). This is only done if the current locale has LC_MESSAGES
1985 if ( locales_enabled('LC_MESSAGES')
1986 && setlocale(&POSIX::LC_MESSAGES, $Locale))
1988 foreach my $err (keys %!) {
1990 $! = eval "&Errno::$err"; # Convert to strerror() output
1992 my $strerror = "$!";
1993 if ("$strerror" =~ /\P{ASCII}/) {
1994 $ok14 = utf8::is_utf8($strerror);
1996 $ok14_5 = "$!" !~ /\P{ASCII}/;
1998 "non-ASCII \$! for error $errnum='$strerror'"))
2005 # Similarly, we verify that a non-ASCII radix is in UTF-8. This
2006 # also catches if there is a disparity between sprintf and
2009 my $string_g = "$g";
2010 my $sprintf_g = sprintf("%g", $g);
2012 $ok15 = $string_g =~ / ^ \p{ASCII}+ $ /x || utf8::is_utf8($string_g);
2013 $ok16 = $sprintf_g eq $string_g;
2017 $ok17 = "1.5:1.25" eq sprintf("%g:%g", $h, $i);
2019 $ok18 = $j eq sprintf("%g:%g", $h, $i);
2023 if (setlocale(&POSIX::LC_TIME, $Locale)) { # These tests aren't affected by
2025 my @times = CORE::localtime();
2028 $ok19 = POSIX::strftime("%p", @times) ne "%p"; # [perl #119425]
2029 my $date = POSIX::strftime("'%A' '%B' '%Z' '%p'", @times);
2030 debug("'Day' 'Month' 'TZ' 'am/pm' = ", disp_str($date));
2032 # If there is any non-ascii, it better be UTF-8 in a UTF-8 locale, and
2033 # not UTF-8 if the locale isn't UTF-8.
2034 $ok20 = $date =~ / ^ \p{ASCII}+ $ /x
2035 || $is_utf8_locale == utf8::is_utf8($date);
2039 if (locales_enabled('LC_MESSAGES')) {
2040 foreach my $err (keys %!) {
2043 $! = eval "&Errno::$err"; # Convert to strerror() output
2044 my $strerror = "$!";
2045 if ($strerror =~ /\P{ASCII}/) {
2047 debug(disp_str("non-ASCII strerror=$strerror"));
2053 report_result($Locale, ++$locales_test_number, $ok1);
2054 $test_names{$locales_test_number} = 'Verify that an intervening printf doesn\'t change assignment results';
2055 my $first_a_test = $locales_test_number;
2057 debug "$first_a_test..$locales_test_number: \$a = $a, \$b = $b, Locale = $Locale\n";
2059 report_result($Locale, ++$locales_test_number, $ok2);
2060 $test_names{$locales_test_number} = 'Verify that an intervening sprintf doesn\'t change assignment results';
2062 my $first_c_test = $locales_test_number;
2064 $test_names{++$locales_test_number} = 'Verify that a different locale radix works when doing "==" with a constant';
2065 if ($Config{usequadmath}) {
2066 print "# Skip: no locale radix with usequadmath ($test_names{$locales_test_number})\n";
2067 report_result($Locale, $locales_test_number, 1);
2069 report_result($Locale, $locales_test_number, $ok3);
2070 $problematical_tests{$locales_test_number} = 1;
2073 $test_names{++$locales_test_number} = 'Verify that a different locale radix works when doing "==" with a scalar';
2074 if ($Config{usequadmath}) {
2075 print "# Skip: no locale radix with usequadmath ($test_names{$locales_test_number})\n";
2076 report_result($Locale, $locales_test_number, 1);
2078 report_result($Locale, $locales_test_number, $ok4);
2079 $problematical_tests{$locales_test_number} = 1;
2082 report_result($Locale, ++$locales_test_number, $ok5);
2083 $test_names{$locales_test_number} = 'Verify that a different locale radix works when doing "==" with a scalar and an intervening sprintf';
2084 $problematical_tests{$locales_test_number} = 1;
2086 debug "$first_c_test..$locales_test_number: \$c = $c, \$d = $d, Locale = $Locale\n";
2088 report_result($Locale, ++$locales_test_number, $ok6);
2089 $test_names{$locales_test_number} = 'Verify that can assign stringified under inner no-locale block';
2090 my $first_e_test = $locales_test_number;
2092 report_result($Locale, ++$locales_test_number, $ok7);
2093 $test_names{$locales_test_number} = 'Verify that "==" with a scalar still works in inner no locale';
2095 $test_names{++$locales_test_number} = 'Verify that "==" with a scalar and an intervening sprintf still works in inner no locale';
2096 if ($Config{usequadmath}) {
2097 print "# Skip: no locale radix with usequadmath ($test_names{$locales_test_number})\n";
2098 report_result($Locale, $locales_test_number, 1);
2100 report_result($Locale, $locales_test_number, $ok8);
2101 $problematical_tests{$locales_test_number} = 1;
2104 debug "$first_e_test..$locales_test_number: \$e = $e, no locale\n";
2106 report_result($Locale, ++$locales_test_number, $ok9);
2107 $test_names{$locales_test_number} = 'Verify that after a no-locale block, a different locale radix still works when doing "==" with a constant';
2108 $problematical_tests{$locales_test_number} = 1;
2109 my $first_f_test = $locales_test_number;
2111 report_result($Locale, ++$locales_test_number, $ok10);
2112 $test_names{$locales_test_number} = 'Verify that after a no-locale block, a different locale radix still works when doing "==" with a scalar';
2113 $problematical_tests{$locales_test_number} = 1;
2115 $test_names{++$locales_test_number} = 'Verify that after a no-locale block, a different locale radix still works when doing "==" with a scalar and an intervening sprintf';
2116 if ($Config{usequadmath}) {
2117 print "# Skip: no locale radix with usequadmath ($test_names{$locales_test_number})\n";
2118 report_result($Locale, $locales_test_number, 1);
2120 report_result($Locale, $locales_test_number, $ok11);
2121 $problematical_tests{$locales_test_number} = 1;
2124 report_result($Locale, ++$locales_test_number, $ok12);
2125 $test_names{$locales_test_number} = 'Verify that after a no-locale block, a different locale radix can participate in an addition and function call as numeric';
2126 $problematical_tests{$locales_test_number} = 1;
2128 report_result($Locale, ++$locales_test_number, $ok13);
2129 $test_names{$locales_test_number} = 'Verify that don\'t get warning under "==" even if radix is not a dot';
2130 $problematical_tests{$locales_test_number} = 1;
2132 report_result($Locale, ++$locales_test_number, $ok14);
2133 $test_names{$locales_test_number} = 'Verify that non-ASCII UTF-8 error messages are in UTF-8';
2135 report_result($Locale, ++$locales_test_number, $ok14_5);
2136 $test_names{$locales_test_number} = '... and are ASCII outside "use locale"';
2138 report_result($Locale, ++$locales_test_number, $ok15);
2139 $test_names{$locales_test_number} = 'Verify that a number with a UTF-8 radix has a UTF-8 stringification';
2141 report_result($Locale, ++$locales_test_number, $ok16);
2142 $test_names{$locales_test_number} = 'Verify that a sprintf of a number with a UTF-8 radix yields UTF-8';
2143 $problematical_tests{$locales_test_number} = 1;
2145 report_result($Locale, ++$locales_test_number, $ok17);
2146 $test_names{$locales_test_number} = 'Verify that a sprintf of a number outside locale scope uses a dot radix';
2148 report_result($Locale, ++$locales_test_number, $ok18);
2149 $test_names{$locales_test_number} = 'Verify that a sprintf of a number back within locale scope uses locale radix';
2150 $problematical_tests{$locales_test_number} = 1;
2152 report_result($Locale, ++$locales_test_number, $ok19);
2153 $test_names{$locales_test_number} = 'Verify that strftime doesn\'t return "%p" in locales where %p is empty';
2155 report_result($Locale, ++$locales_test_number, $ok20);
2156 $test_names{$locales_test_number} = 'Verify that strftime returns date with UTF-8 flag appropriately set';
2157 $problematical_tests{$locales_test_number} = 1; # This is broken in
2160 report_result($Locale, ++$locales_test_number, $ok21);
2161 $test_names{$locales_test_number} = '"$!" is ASCII only outside of locale scope';
2163 debug "$first_f_test..$locales_test_number: \$f = $f, \$g = $g, back to locale = $Locale\n";
2165 # Does taking lc separately differ from taking
2166 # the lc "in-line"? (This was the bug 19990704.002 (#965), change #3568.)
2167 # The bug was in the caching of the 'o'-magic.
2168 if (! $is_utf8_locale) {
2174 return $lc0 cmp $lc1;
2178 return lc($_[0]) cmp lc($_[1]);
2185 report_result($Locale, ++$locales_test_number,
2186 lcA($x, $y) == 1 && lcB($x, $y) == 1 ||
2187 lcA($x, $z) == 0 && lcB($x, $z) == 0);
2190 use locale ':not_characters';
2195 return $lc0 cmp $lc1;
2199 return lc($_[0]) cmp lc($_[1]);
2206 report_result($Locale, ++$locales_test_number,
2207 lcC($x, $y) == 1 && lcD($x, $y) == 1 ||
2208 lcC($x, $z) == 0 && lcD($x, $z) == 0);
2210 $test_names{$locales_test_number} = 'Verify "lc(foo) cmp lc(bar)" is the same as using intermediaries for the cmp';
2212 # Does lc of an UPPER (if different from the UPPER) match
2213 # case-insensitively the UPPER, and does the UPPER match
2214 # case-insensitively the lc of the UPPER. And vice versa.
2218 my $re = qr/[\[\(\{\*\+\?\|\^\$\\]/;
2221 ++$locales_test_number;
2222 $test_names{$locales_test_number} = 'Verify case insensitive matching works';
2223 foreach my $x (sort { ord $a <=> ord $b } keys %UPPER) {
2224 if (! $is_utf8_locale) {
2226 next unless uc $y eq $x;
2227 debug_more( "UPPER=", disp_chars(($x)),
2228 "; lc=", disp_chars(($y)), "; ",
2229 "; fc=", disp_chars((fc $x)), "; ",
2230 disp_chars(($x)), "=~/", disp_chars(($y)), "/i=",
2231 $x =~ /\Q$y/i ? 1 : 0,
2233 disp_chars(($y)), "=~/", disp_chars(($x)), "/i=",
2234 $y =~ /\Q$x/i ? 1 : 0,
2237 # If $x and $y contain regular expression characters
2238 # AND THEY lowercase (/i) to regular expression characters,
2239 # regcomp() will be mightily confused. No, the \Q doesn't
2240 # help here (maybe regex engine internal lowercasing
2241 # is done after the \Q?) An example of this happening is
2242 # the bg_BG (Bulgarian) locale under EBCDIC (OS/390 USS):
2243 # the chr(173) (the "[") is the lowercase of the chr(235).
2245 # Similarly losing EBCDIC locales include cs_cz, cs_CZ,
2246 # el_gr, el_GR, en_us.IBM-037 (!), en_US.IBM-037 (!),
2247 # et_ee, et_EE, hr_hr, hr_HR, hu_hu, hu_HU, lt_LT,
2248 # mk_mk, mk_MK, nl_nl.IBM-037, nl_NL.IBM-037,
2249 # pl_pl, pl_PL, ro_ro, ro_RO, ru_ru, ru_RU,
2250 # sk_sk, sk_SK, sl_si, sl_SI, tr_tr, tr_TR.
2252 # Similar things can happen even under (bastardised)
2253 # non-EBCDIC locales: in many European countries before the
2254 # advent of ISO 8859-x nationally customised versions of
2255 # ISO 646 were devised, reusing certain punctuation
2256 # characters for modified characters needed by the
2257 # country/language. For example, the "|" might have
2258 # stood for U+00F6 or LATIN SMALL LETTER O WITH DIAERESIS.
2260 if ($x =~ $re || $y =~ $re) {
2261 print "# Regex characters in '$x' or '$y', skipping test $locales_test_number for locale '$Locale'\n";
2264 push @f, $x unless $x =~ /\Q$y/i && $y =~ /\Q$x/i;
2266 # fc is not a locale concept, so Perl uses lc for it.
2267 push @f, $x unless lc $x eq fc $x;
2270 use locale ':not_characters';
2272 next unless uc $y eq $x;
2273 debug_more( "UPPER=", disp_chars(($x)),
2274 "; lc=", disp_chars(($y)), "; ",
2275 "; fc=", disp_chars((fc $x)), "; ",
2276 disp_chars(($x)), "=~/", disp_chars(($y)), "/i=",
2277 $x =~ /\Q$y/i ? 1 : 0,
2279 disp_chars(($y)), "=~/", disp_chars(($x)), "/i=",
2280 $y =~ /\Q$x/i ? 1 : 0,
2283 push @f, $x unless $x =~ /\Q$y/i && $y =~ /\Q$x/i;
2285 # The places where Unicode's lc is different from fc are
2286 # skipped here by virtue of the 'next unless uc...' line above
2287 push @f, $x unless lc $x eq fc $x;
2291 foreach my $x (sort { ord $a <=> ord $b } keys %lower) {
2292 if (! $is_utf8_locale) {
2294 next unless lc $y eq $x;
2295 debug_more( "lower=", disp_chars(($x)),
2296 "; uc=", disp_chars(($y)), "; ",
2297 "; fc=", disp_chars((fc $x)), "; ",
2298 disp_chars(($x)), "=~/", disp_chars(($y)), "/i=",
2299 $x =~ /\Q$y/i ? 1 : 0,
2301 disp_chars(($y)), "=~/", disp_chars(($x)), "/i=",
2302 $y =~ /\Q$x/i ? 1 : 0,
2304 if ($x =~ $re || $y =~ $re) { # See above.
2305 print "# Regex characters in '$x' or '$y', skipping test $locales_test_number for locale '$Locale'\n";
2308 push @f, $x unless $x =~ /\Q$y/i && $y =~ /\Q$x/i;
2310 push @f, $x unless lc $x eq fc $x;
2313 use locale ':not_characters';
2315 next unless lc $y eq $x;
2316 debug_more( "lower=", disp_chars(($x)),
2317 "; uc=", disp_chars(($y)), "; ",
2318 "; fc=", disp_chars((fc $x)), "; ",
2319 disp_chars(($x)), "=~/", disp_chars(($y)), "/i=",
2320 $x =~ /\Q$y/i ? 1 : 0,
2322 disp_chars(($y)), "=~/", disp_chars(($x)), "/i=",
2323 $y =~ /\Q$x/i ? 1 : 0,
2325 push @f, $x unless $x =~ /\Q$y/i && $y =~ /\Q$x/i;
2327 push @f, $x unless lc $x eq fc $x;
2330 report_multi_result($Locale, $locales_test_number, \@f);
2331 $problematical_tests{$locales_test_number} = 1;
2337 ++$locales_test_number;
2338 $test_names{$locales_test_number} = 'Verify atof with locale radix and negative exponent';
2339 $problematical_tests{$locales_test_number} = 1;
2341 my $radix = POSIX::localeconv()->{decimal_point};
2343 "3.14e+9", "3${radix}14e+9", "3.14e-9", "3${radix}14e-9",
2344 "-3.14e+9", "-3${radix}14e+9", "-3.14e-9", "-3${radix}14e-9",
2347 if (! $is_utf8_locale) {
2349 for my $num (@nums) {
2351 unless sprintf("%g", $num) =~ /3.+14/;
2355 use locale ':not_characters';
2356 for my $num (@nums) {
2358 unless sprintf("%g", $num) =~ /3.+14/;
2362 if ($Config{usequadmath}) {
2363 print "# Skip: no locale radix with usequadmath ($Locale)\n";
2364 report_result($Locale, $locales_test_number, 1);
2366 report_result($Locale, $locales_test_number, @f == 0);
2368 print "# failed $locales_test_number locale '$Locale' numbers @f\n"
2374 my $final_locales_test_number = $locales_test_number;
2376 # Recount the errors.
2379 foreach $test_num ($first_locales_test_number..$final_locales_test_number) {
2380 if (%setlocale_failed) {
2383 elsif ($Problem{$test_num}
2384 || ! defined $Okay{$test_num}
2385 || ! @{$Okay{$test_num}})
2387 if (defined $not_necessarily_a_problem_test_number
2388 && $test_num == $not_necessarily_a_problem_test_number)
2390 print "# The failure of test $not_necessarily_a_problem_test_number is not necessarily fatal.\n";
2391 print "# It usually indicates a problem in the environment,\n";
2392 print "# not in Perl itself.\n";
2395 # If there are any locales that pass this test, or are known-bad, it
2396 # may be that there are enough passes that we TODO the failure.
2397 if (($Okay{$test_num} || $Known_bad_locale{$test_num})
2398 && grep { $_ == $test_num } keys %problematical_tests)
2400 no warnings 'experimental::postderef';
2402 # Don't count the known-bad failures when calculating the
2403 # percentage that fail.
2404 my $known_failures = (exists $Known_bad_locale{$test_num})
2405 ? scalar(keys $Known_bad_locale{$test_num}->%*)
2407 my $adjusted_failures = scalar(keys $Problem{$test_num}->%*)
2410 # Specially handle failures where only known-bad locales fail.
2411 # This makes the diagnositics clearer.
2412 if ($adjusted_failures <= 0) {
2413 print "not ok $test_num $test_names{$test_num} # TODO fails only on ",
2414 "known bad locales: ",
2415 join " ", keys $Known_bad_locale{$test_num}->%*, "\n";
2419 # Round to nearest .1%
2420 my $percent_fail = (int(.5 + (1000 * $adjusted_failures
2421 / scalar(@Locale))))
2423 if ($percent_fail < $acceptable_failure_percentage) {
2425 $test_names{$test_num} .= 'TODO';
2426 print "# ", 100 - $percent_fail, "% of locales not known to be problematic on this platform\n";
2427 print "# pass the following test, so it is likely that the failures\n";
2428 print "# are errors in the locale definitions. The test is marked TODO, as the\n";
2429 print "# problem is not likely to be Perl's\n";
2433 print "# $percent_fail% of locales (",
2434 scalar(keys $Problem{$test_num}->%*),
2437 ") fail the above test (TODO cut-off is ",
2438 $acceptable_failure_percentage,
2444 print "# The code points that had this failure are given above. Look for lines\n";
2445 print "# that match 'failed $test_num'\n";
2448 print "# For more details, rerun, with environment variable PERL_DEBUG_FULL_TEST=1.\n";
2449 print "# Then look at that output for lines that match 'failed $test_num'\n";
2453 print "ok $test_num";
2454 if (defined $test_names{$test_num}) {
2455 # If TODO is in the test name, make it thus
2456 my $todo = $test_names{$test_num} =~ s/TODO\s*//;
2457 print " $test_names{$test_num}";
2458 print " # TODO" if $todo;
2463 $test_num = $final_locales_test_number;
2465 unless ( $^O =~ m!^(dragonfly|openbsd|bitrig|mirbsd)$! ) {
2469 local $SIG{__WARN__} = sub {
2470 $warned = $_[0] =~ /uninitialized/;
2472 my $z = "y" . setlocale(&POSIX::LC_ALL, "xyzzy");
2473 ok($warned, "variable set to setlocale(BAD LOCALE) is considered uninitialized");
2476 # Test that tainting and case changing works on utf8 strings. These tests are
2477 # placed last to avoid disturbing the hard-coded test numbers that existed at
2478 # the time these were added above this in this file.
2479 # This also tests that locale overrides unicode_strings in the same scope for
2481 setlocale(&POSIX::LC_ALL, "C");
2484 use feature 'unicode_strings';
2486 foreach my $function ("uc", "ucfirst", "lc", "lcfirst", "fc") {
2487 my @list; # List of code points to test for $function
2489 # Used to calculate the changed case for ASCII characters by using the
2490 # ord, instead of using one of the functions under test.
2491 my $ascii_case_change_delta;
2492 my $above_latin1_case_change_delta; # Same for the specific ords > 255
2495 # We test an ASCII character, which should change case;
2496 # a Latin1 character, which shouldn't change case under this C locale,
2497 # an above-Latin1 character that when the case is changed would cross
2498 # the 255/256 boundary, so doesn't change case
2499 # (the \x{149} is one of these, but changes into 2 characters, the
2500 # first one of which doesn't cross the boundary.
2501 # the final one in each list is an above-Latin1 character whose case
2502 # does change. The code below uses its position in its list as a
2503 # marker to indicate that it, unlike the other code points above
2504 # ASCII, has a successful case change
2506 # All casing operations under locale (but not :not_characters) should
2508 if ($function =~ /^u/) {
2510 chr(utf8::unicode_to_native(0xe0)),
2511 chr(utf8::unicode_to_native(0xff)),
2512 "\x{fb00}", "\x{149}", "\x{101}");
2513 $ascii_case_change_delta = ($is_ebcdic) ? +64 : -32;
2514 $above_latin1_case_change_delta = -1;
2518 chr(utf8::unicode_to_native(0xC0)),
2519 "\x{17F}", "\x{100}");
2520 $ascii_case_change_delta = ($is_ebcdic) ? -64 : +32;
2521 $above_latin1_case_change_delta = +1;
2523 foreach my $is_utf8_locale (0 .. 1) {
2524 foreach my $j (0 .. $#list) {
2525 my $char = $list[$j];
2527 for my $encoded_in_utf8 (0 .. 1) {
2530 if (! $is_utf8_locale) {
2531 no warnings 'locale';
2532 $should_be = ($j == $#list)
2533 ? chr(ord($char) + $above_latin1_case_change_delta)
2534 : (length $char == 0 || utf8::native_to_unicode(ord($char)) > 127)
2536 : chr(ord($char) + $ascii_case_change_delta);
2538 # This monstrosity is in order to avoid using an eval,
2539 # which might perturb the results
2540 $changed = ($function eq "uc")
2542 : ($function eq "ucfirst")
2544 : ($function eq "lc")
2546 : ($function eq "lcfirst")
2548 : ($function eq "fc")
2550 : die("Unexpected function \"$function\"");
2556 # For utf8-locales the case changing functions
2557 # should work just like they do outside of locale.
2558 # Can use eval here because not testing it when
2560 $should_be = eval "$function('$char')";
2561 die "Unexpected eval error $@ from 'eval \"$function('$char')\"'" if $@;
2564 use locale ':not_characters';
2565 $changed = ($function eq "uc")
2567 : ($function eq "ucfirst")
2569 : ($function eq "lc")
2571 : ($function eq "lcfirst")
2573 : ($function eq "fc")
2575 : die("Unexpected function \"$function\"");
2577 ok($changed eq $should_be,
2578 "$function(\"$char\") in C locale "
2579 . (($is_utf8_locale)
2580 ? "(use locale ':not_characters'"
2582 . (($encoded_in_utf8)
2583 ? "; encoded in utf8)"
2584 : "; not encoded in utf8)")
2585 . " should be \"$should_be\", got \"$changed\"");
2587 # Tainting shouldn't happen for use locale :not_character
2590 ? check_taint($changed)
2591 : check_taint_not($changed);
2593 # Use UTF-8 next time through the loop
2594 utf8::upgrade($char);
2601 # Give final advice.
2605 foreach ($first_locales_test_number..$final_locales_test_number) {
2607 my @f = sort keys %{ $Problem{$_} };
2609 # Don't list the failures caused by known-bad locales.
2610 if (exists $known_bad_locales{$^O}) {
2611 @f = grep { $_ !~ $known_bad_locales{$^O} } @f;
2614 my $f = join(" ", @f);
2615 $f =~ s/(.{50,60}) /$1\n#\t/g;
2618 "# The locale ", (@f == 1 ? "definition" : "definitions"), "\n#\n",
2620 "# on your system may have errors because the locale test $_\n",
2621 "# \"$test_names{$_}\"\n",
2622 "# failed in ", (@f == 1 ? "that locale" : "those locales"),
2626 # If your users are not using these locales you are safe for the moment,
2627 # but please report this failure first to perlbug\@perl.org using the
2628 # perlbug script (as described in the INSTALL file) so that the exact
2629 # details of the failures can be sorted out first and then your operating
2630 # system supplier can be alerted about these anomalies.
2637 # Tell which locales were okay and which were not.
2642 foreach my $l (@Locale) {
2644 if ($setlocale_failed{$l}) {
2649 ($first_locales_test_number..$final_locales_test_number)
2651 $p++ if $Problem{$t}{$l};
2654 push @s, $l if $p == 0;
2655 push @F, $l unless $p == 0;
2659 my $s = join(" ", @s);
2660 $s =~ s/(.{50,60}) /$1\n#\t/g;
2663 "# The following locales\n#\n",
2665 "# tested okay.\n#\n",
2667 print "# None of your locales were fully okay.\n";
2671 my $F = join(" ", @F);
2672 $F =~ s/(.{50,60}) /$1\n#\t/g;
2676 $details = "# For more details, rerun, with environment variable PERL_DEBUG_FULL_TEST=1.\n";
2678 elsif ($debug == 1) {
2679 $details = "# For even more details, rerun, with environment variable PERL_DEBUG_FULL_TEST=2.\n";
2683 "# The following locales\n#\n",
2685 "# had problems.\n#\n",
2688 print "# None of your locales were broken.\n";
2692 if (exists $known_bad_locales{$^O} && ! %Known_bad_locale) {
2694 print "ok $test_num $^O no longer has known bad locales # TODO\n";
2697 print "1..$test_num\n";