3 # This tests plain 'use locale' and adorned 'use locale ":not_characters"'
4 # Because these pragmas are compile time, and I (khw) am trying to test
5 # without using 'eval' as much as possible, which might cloud the issue, the
6 # crucial parts of the code are duplicated in a block for each pragma.
8 # To make a TODO test, add the string 'TODO' to its %test_names value
10 binmode STDOUT, ':utf8';
11 binmode STDERR, ':utf8';
17 require Config; import Config;
18 if (!$Config{d_setlocale} || $Config{ccflags} =~ /\bD?NO_LOCALE\b/) {
22 require './loc_tools.pl';
29 # =1 adds debugging output; =2 increases the verbosity somewhat
30 my $debug = $ENV{PERL_DEBUG_FULL_TEST} // 0;
32 # Certain tests have been shown to be problematical for a few locales. Don't
33 # fail them unless at least this percentage of the tested locales fail.
34 # On AIX machines, many locales call a no-break space a graphic.
35 # (There aren't 1000 locales currently in existence, so 99.9 works)
36 my $acceptable_failure_percentage = ($^O =~ / ^ ( AIX ) $ /ix)
40 # The list of test numbers of the problematic tests.
41 my %problematical_tests;
46 my $dumper = Dumpvalue->new(
53 my($mess) = join "", '# ', @_;
55 print $dumper->stringify($mess,1), "\n";
59 return unless $debug > 1;
72 my ($result, $message) = @_;
73 $message = "" unless defined $message;
75 print 'not ' unless ($result);
76 print "ok " . ++$test_num;
81 # First we'll do a lot of taint checking for locales.
82 # This is the easiest to test, actually, as any locale,
83 # even the default locale will taint under 'use locale'.
85 sub is_tainted { # hello, camel two.
86 no warnings 'uninitialized' ;
89 not eval { $dummy = join("", @_), kill 0; 1 }
92 sub check_taint ($;$) {
93 my $message_tail = $_[1] // "";
95 # Extra blanks are so aligns with taint_not output
96 $message_tail = ": $message_tail" if $message_tail;
97 ok is_tainted($_[0]), "verify that is tainted$message_tail";
100 sub check_taint_not ($;$) {
101 my $message_tail = $_[1] // "";
102 $message_tail = ": $message_tail" if $message_tail;
103 ok((not is_tainted($_[0])), "verify that isn't tainted$message_tail");
106 "\tb\t" =~ /^m?(\s)(.*)\1$/;
107 check_taint_not $&, "not tainted outside 'use locale'";
110 use locale; # engage locale and therefore locale taint.
112 # BE SURE TO COPY ANYTHING YOU ADD to these tests to the block below for
115 check_taint_not $a, '$a';
117 check_taint uc($a), 'uc($a)';
118 check_taint "\U$a", '"\U$a"';
119 check_taint ucfirst($a), 'ucfirst($a)';
120 check_taint "\u$a", '"\u$a"';
121 check_taint lc($a), 'lc($a)';
122 check_taint fc($a), 'fc($a)';
123 check_taint "\L$a", '"\L$a"';
124 check_taint "\F$a", '"\F$a"';
125 check_taint lcfirst($a), 'lcfirst($a)';
126 check_taint "\l$a", '"\l$a"';
128 check_taint_not sprintf('%e', 123.456), "sprintf('%e', 123.456)";
129 check_taint_not sprintf('%f', 123.456), "sprintf('%f', 123.456)";
130 check_taint_not sprintf('%g', 123.456), "sprintf('%g', 123.456)";
131 check_taint_not sprintf('%d', 123.456), "sprintf('%d', 123.456)";
132 check_taint_not sprintf('%x', 123.456), "sprintf('%x', 123.456)";
134 $_ = $a; # untaint $_
136 $_ = uc($a); # taint $_
138 check_taint $_, '$_ = uc($a)';
140 /(\w)/; # taint $&, $`, $', $+, $1.
141 check_taint $&, "\$& from /(\\w)/";
142 check_taint $`, "\t\$`";
143 check_taint $', "\t\$'";
144 check_taint $+, "\t\$+";
145 check_taint $1, "\t\$1";
146 check_taint_not $2, "\t\$2";
148 /(.)/; # untaint $&, $`, $', $+, $1.
149 check_taint_not $&, "\$& from /(.)/";
150 check_taint_not $`, "\t\$`";
151 check_taint_not $', "\t\$'";
152 check_taint_not $+, "\t\$+";
153 check_taint_not $1, "\t\$1";
154 check_taint_not $2, "\t\$2";
156 /(\W)/; # taint $&, $`, $', $+, $1.
157 check_taint $&, "\$& from /(\\W)/";
158 check_taint $`, "\t\$`";
159 check_taint $', "\t\$'";
160 check_taint $+, "\t\$+";
161 check_taint $1, "\t\$1";
162 check_taint_not $2, "\t\$2";
164 /(.)/; # untaint $&, $`, $', $+, $1.
165 check_taint_not $&, "\$& from /(.)/";
166 check_taint_not $`, "\t\$`";
167 check_taint_not $', "\t\$'";
168 check_taint_not $+, "\t\$+";
169 check_taint_not $1, "\t\$1";
170 check_taint_not $2, "\t\$2";
172 /(\s)/; # taint $&, $`, $', $+, $1.
173 check_taint $&, "\$& from /(\\s)/";
174 check_taint $`, "\t\$`";
175 check_taint $', "\t\$'";
176 check_taint $+, "\t\$+";
177 check_taint $1, "\t\$1";
178 check_taint_not $2, "\t\$2";
180 /(.)/; # untaint $&, $`, $', $+, $1.
181 check_taint_not $&, "\$& from /(.)/";
183 /(\S)/; # taint $&, $`, $', $+, $1.
184 check_taint $&, "\$& from /(\\S)/";
185 check_taint $`, "\t\$`";
186 check_taint $', "\t\$'";
187 check_taint $+, "\t\$+";
188 check_taint $1, "\t\$1";
189 check_taint_not $2, "\t\$2";
191 /(.)/; # untaint $&, $`, $', $+, $1.
192 check_taint_not $&, "\$& from /(.)/";
194 "0" =~ /(\d)/; # taint $&, $`, $', $+, $1.
195 check_taint $&, "\$& from /(\\d)/";
196 check_taint $`, "\t\$`";
197 check_taint $', "\t\$'";
198 check_taint $+, "\t\$+";
199 check_taint $1, "\t\$1";
200 check_taint_not $2, "\t\$2";
202 /(.)/; # untaint $&, $`, $', $+, $1.
203 check_taint_not $&, "\$& from /(.)/";
205 /(\D)/; # taint $&, $`, $', $+, $1.
206 check_taint $&, "\$& from /(\\D)/";
207 check_taint $`, "\t\$`";
208 check_taint $', "\t\$'";
209 check_taint $+, "\t\$+";
210 check_taint $1, "\t\$1";
211 check_taint_not $2, "\t\$2";
213 /(.)/; # untaint $&, $`, $', $+, $1.
214 check_taint_not $&, "\$& from /(.)/";
216 /([[:alnum:]])/; # taint $&, $`, $', $+, $1.
217 check_taint $&, "\$& from /([[:alnum:]])/";
218 check_taint $`, "\t\$`";
219 check_taint $', "\t\$'";
220 check_taint $+, "\t\$+";
221 check_taint $1, "\t\$1";
222 check_taint_not $2, "\t\$2";
224 /(.)/; # untaint $&, $`, $', $+, $1.
225 check_taint_not $&, "\$& from /(.)/";
227 /([[:^alnum:]])/; # taint $&, $`, $', $+, $1.
228 check_taint $&, "\$& from /([[:^alnum:]])/";
229 check_taint $`, "\t\$`";
230 check_taint $', "\t\$'";
231 check_taint $+, "\t\$+";
232 check_taint $1, "\t\$1";
233 check_taint_not $2, "\t\$2";
235 "a" =~ /(a)|(\w)/; # taint $&, $`, $', $+, $1.
236 check_taint $&, "\$& from /(a)|(\\w)/";
237 check_taint $`, "\t\$`";
238 check_taint $', "\t\$'";
239 check_taint $+, "\t\$+";
240 check_taint $1, "\t\$1";
241 ok($1 eq 'a', ("\t" x 5) . "\$1 is 'a'");
242 ok(! defined $2, ("\t" x 5) . "\$2 is undefined");
243 check_taint_not $2, "\t\$2";
244 check_taint_not $3, "\t\$3";
246 /(.)/; # untaint $&, $`, $', $+, $1.
247 check_taint_not $&, "\$& from /(.)/";
249 "\N{CYRILLIC SMALL LETTER A}" =~ /(\N{CYRILLIC CAPITAL LETTER A})/i; # no tainting because no locale dependence
250 check_taint_not $&, "\$& from /(\\N{CYRILLIC CAPITAL LETTER A})/i";
251 check_taint_not $`, "\t\$`";
252 check_taint_not $', "\t\$'";
253 check_taint_not $+, "\t\$+";
254 check_taint_not $1, "\t\$1";
255 ok($1 eq "\N{CYRILLIC SMALL LETTER A}", ("\t" x 4) . "\t\$1 is 'small cyrillic a'");
256 check_taint_not $2, "\t\$2";
258 /(.)/; # untaint $&, $`, $', $+, $1.
259 check_taint_not $&, "\$& from /./";
261 "(\N{KELVIN SIGN})" =~ /(\N{KELVIN SIGN})/i; # taints because depends on locale
262 check_taint $&, "\$& from /(\\N{KELVIN SIGN})/i";
263 check_taint $`, "\t\$`";
264 check_taint $', "\t\$'";
265 check_taint $+, "\t\$+";
266 check_taint $1, "\t\$1";
267 check_taint_not $2, "\t\$2";
269 /(.)/; # untaint $&, $`, $', $+, $1.
270 check_taint_not $&, "\$& from /(.)/";
272 "a:" =~ /(.)\b(.)/; # taint $&, $`, $', $+, $1.
273 check_taint $&, "\$& from /(.)\\b(.)/";
274 check_taint $`, "\t\$`";
275 check_taint $', "\t\$'";
276 check_taint $+, "\t\$+";
277 check_taint $1, "\t\$1";
278 check_taint $2, "\t\$2";
279 check_taint_not $3, "\t\$3";
281 /(.)/; # untaint $&, $`, $', $+, $1.
282 check_taint_not $&, "\$& from /./";
284 "aa" =~ /(.)\B(.)/; # taint $&, $`, $', $+, $1.
285 check_taint $&, "\$& from /(.)\\B(.)/";
286 check_taint $`, "\t\$`";
287 check_taint $', "\t\$'";
288 check_taint $+, "\t\$+";
289 check_taint $1, "\t\$1";
290 check_taint $2, "\t\$2";
291 check_taint_not $3, "\t\$3";
293 /(.)/; # untaint $&, $`, $', $+, $1.
294 check_taint_not $&, "\$& from /./";
296 "aaa" =~ /(.).(\1)/i; # notaint because not locale dependent
297 check_taint_not $&, "\$ & from /(.).(\\1)/";
298 check_taint_not $`, "\t\$`";
299 check_taint_not $', "\t\$'";
300 check_taint_not $+, "\t\$+";
301 check_taint_not $1, "\t\$1";
302 check_taint_not $2, "\t\$2";
303 check_taint_not $3, "\t\$3";
305 /(.)/; # untaint $&, $`, $', $+, $1.
306 check_taint_not $&, "\$ & from /./";
308 $_ = $a; # untaint $_
310 check_taint_not $_, 'untainting $_ works';
312 /(b)/; # this must not taint
313 check_taint_not $&, "\$ & from /(b)/";
314 check_taint_not $`, "\t\$`";
315 check_taint_not $', "\t\$'";
316 check_taint_not $+, "\t\$+";
317 check_taint_not $1, "\t\$1";
318 check_taint_not $2, "\t\$2";
320 $_ = $a; # untaint $_
322 check_taint_not $_, 'untainting $_ works';
324 $b = uc($a); # taint $b
325 s/(.+)/$b/; # this must taint only the $_
327 check_taint $_, '$_ (wasn\'t tainted) from s/(.+)/$b/ where $b is tainted';
328 check_taint_not $&, "\t\$&";
329 check_taint_not $`, "\t\$`";
330 check_taint_not $', "\t\$'";
331 check_taint_not $+, "\t\$+";
332 check_taint_not $1, "\t\$1";
333 check_taint_not $2, "\t\$2";
335 $_ = $a; # untaint $_
337 s/(.+)/b/; # this must not taint
338 check_taint_not $_, '$_ (wasn\'t tainted) from s/(.+)/b/';
339 check_taint_not $&, "\t\$&";
340 check_taint_not $`, "\t\$`";
341 check_taint_not $', "\t\$'";
342 check_taint_not $+, "\t\$+";
343 check_taint_not $1, "\t\$1";
344 check_taint_not $2, "\t\$2";
346 $b = $a; # untaint $b
348 ($b = $a) =~ s/\w/$&/;
349 check_taint $b, '$b from ($b = $a) =~ s/\w/$&/'; # $b should be tainted.
350 check_taint_not $a, '$a from ($b = $a) =~ s/\w/$&/'; # $a should be not.
352 $_ = $a; # untaint $_
354 s/(\w)/\l$1/; # this must taint
355 check_taint $_, '$_ (wasn\'t tainted) from s/(\w)/\l$1/,'; # this must taint
356 check_taint $&, "\t\$&";
357 check_taint $`, "\t\$`";
358 check_taint $', "\t\$'";
359 check_taint $+, "\t\$+";
360 check_taint $1, "\t\$1";
361 check_taint_not $2, "\t\$2";
363 $_ = $a; # untaint $_
365 s/(\w)/\L$1/; # this must taint
366 check_taint $_, '$_ (wasn\'t tainted) from s/(\w)/\L$1/,';
367 check_taint $&, "\t\$&";
368 check_taint $`, "\t\$`";
369 check_taint $', "\t\$'";
370 check_taint $+, "\t\$+";
371 check_taint $1, "\t\$1";
372 check_taint_not $2, "\t\$2";
374 $_ = $a; # untaint $_
376 s/(\w)/\u$1/; # this must taint
377 check_taint $_, '$_ (wasn\'t tainted) from s/(\w)/\u$1/';
378 check_taint $&, "\t\$&";
379 check_taint $`, "\t\$`";
380 check_taint $', "\t\$'";
381 check_taint $+, "\t\$+";
382 check_taint $1, "\t\$1";
383 check_taint_not $2, "\t\$2";
385 $_ = $a; # untaint $_
387 s/(\w)/\U$1/; # this must taint
388 check_taint $_, '$_ (wasn\'t tainted) from s/(\w)/\U$1/';
389 check_taint $&, "\t\$&";
390 check_taint $`, "\t\$`";
391 check_taint $', "\t\$'";
392 check_taint $+, "\t\$+";
393 check_taint $1, "\t\$1";
394 check_taint_not $2, "\t\$2";
396 # After all this tainting $a should be cool.
398 check_taint_not $a, '$a still not tainted';
401 check_taint_not $1, '"a" =~ /([a-z])/';
402 "foo.bar_baz" =~ /^(.*)[._](.*?)$/; # Bug 120675
403 check_taint_not $1, '"foo.bar_baz" =~ /^(.*)[._](.*?)$/';
405 # BE SURE TO COPY ANYTHING YOU ADD to the block below
407 { # This is just the previous tests copied here with a different
408 # compile-time pragma.
410 use locale ':not_characters'; # engage restricted locale with different
412 check_taint_not $a, '$a';
414 check_taint_not uc($a), 'uc($a)';
415 check_taint_not "\U$a", '"\U$a"';
416 check_taint_not ucfirst($a), 'ucfirst($a)';
417 check_taint_not "\u$a", '"\u$a"';
418 check_taint_not lc($a), 'lc($a)';
419 check_taint_not fc($a), 'fc($a)';
420 check_taint_not "\L$a", '"\L$a"';
421 check_taint_not "\F$a", '"\F$a"';
422 check_taint_not lcfirst($a), 'lcfirst($a)';
423 check_taint_not "\l$a", '"\l$a"';
425 check_taint_not sprintf('%e', 123.456), "sprintf('%e', 123.456)";
426 check_taint_not sprintf('%f', 123.456), "sprintf('%f', 123.456)";
427 check_taint_not sprintf('%g', 123.456), "sprintf('%g', 123.456)";
428 check_taint_not sprintf('%d', 123.456), "sprintf('%d', 123.456)";
429 check_taint_not sprintf('%x', 123.456), "sprintf('%x', 123.456)";
431 $_ = $a; # untaint $_
435 check_taint_not $_, '$_ = uc($a)';
438 check_taint_not $&, "\$& from /(\\w)/";
439 check_taint_not $`, "\t\$`";
440 check_taint_not $', "\t\$'";
441 check_taint_not $+, "\t\$+";
442 check_taint_not $1, "\t\$1";
443 check_taint_not $2, "\t\$2";
445 /(.)/; # untaint $&, $`, $', $+, $1.
446 check_taint_not $&, "\$& from /(.)/";
447 check_taint_not $`, "\t\$`";
448 check_taint_not $', "\t\$'";
449 check_taint_not $+, "\t\$+";
450 check_taint_not $1, "\t\$1";
451 check_taint_not $2, "\t\$2";
454 check_taint_not $&, "\$& from /(\\W)/";
455 check_taint_not $`, "\t\$`";
456 check_taint_not $', "\t\$'";
457 check_taint_not $+, "\t\$+";
458 check_taint_not $1, "\t\$1";
459 check_taint_not $2, "\t\$2";
461 /(.)/; # untaint $&, $`, $', $+, $1.
462 check_taint_not $&, "\$& from /(.)/";
463 check_taint_not $`, "\t\$`";
464 check_taint_not $', "\t\$'";
465 check_taint_not $+, "\t\$+";
466 check_taint_not $1, "\t\$1";
467 check_taint_not $2, "\t\$2";
470 check_taint_not $&, "\$& from /(\\s)/";
471 check_taint_not $`, "\t\$`";
472 check_taint_not $', "\t\$'";
473 check_taint_not $+, "\t\$+";
474 check_taint_not $1, "\t\$1";
475 check_taint_not $2, "\t\$2";
477 /(.)/; # untaint $&, $`, $', $+, $1.
478 check_taint_not $&, "\$& from /(.)/";
481 check_taint_not $&, "\$& from /(\\S)/";
482 check_taint_not $`, "\t\$`";
483 check_taint_not $', "\t\$'";
484 check_taint_not $+, "\t\$+";
485 check_taint_not $1, "\t\$1";
486 check_taint_not $2, "\t\$2";
488 /(.)/; # untaint $&, $`, $', $+, $1.
489 check_taint_not $&, "\$& from /(.)/";
492 check_taint_not $&, "\$& from /(\\d)/";
493 check_taint_not $`, "\t\$`";
494 check_taint_not $', "\t\$'";
495 check_taint_not $+, "\t\$+";
496 check_taint_not $1, "\t\$1";
497 check_taint_not $2, "\t\$2";
499 /(.)/; # untaint $&, $`, $', $+, $1.
500 check_taint_not $&, "\$& from /(.)/";
503 check_taint_not $&, "\$& from /(\\D)/";
504 check_taint_not $`, "\t\$`";
505 check_taint_not $', "\t\$'";
506 check_taint_not $+, "\t\$+";
507 check_taint_not $1, "\t\$1";
508 check_taint_not $2, "\t\$2";
510 /(.)/; # untaint $&, $`, $', $+, $1.
511 check_taint_not $&, "\$& from /(.)/";
514 check_taint_not $&, "\$& from /([[:alnum:]])/";
515 check_taint_not $`, "\t\$`";
516 check_taint_not $', "\t\$'";
517 check_taint_not $+, "\t\$+";
518 check_taint_not $1, "\t\$1";
519 check_taint_not $2, "\t\$2";
521 /(.)/; # untaint $&, $`, $', $+, $1.
522 check_taint_not $&, "\$& from /(.)/";
525 check_taint_not $&, "\$& from /([[:^alnum:]])/";
526 check_taint_not $`, "\t\$`";
527 check_taint_not $', "\t\$'";
528 check_taint_not $+, "\t\$+";
529 check_taint_not $1, "\t\$1";
530 check_taint_not $2, "\t\$2";
533 check_taint_not $&, "\$& from /(a)|(\\w)/";
534 check_taint_not $`, "\t\$`";
535 check_taint_not $', "\t\$'";
536 check_taint_not $+, "\t\$+";
537 check_taint_not $1, "\t\$1";
538 ok($1 eq 'a', ("\t" x 5) . "\$1 is 'a'");
539 ok(! defined $2, ("\t" x 5) . "\$2 is undefined");
540 check_taint_not $2, "\t\$2";
541 check_taint_not $3, "\t\$3";
543 /(.)/; # untaint $&, $`, $', $+, $1.
544 check_taint_not $&, "\$& from /(.)/";
546 "\N{CYRILLIC SMALL LETTER A}" =~ /(\N{CYRILLIC CAPITAL LETTER A})/i;
547 check_taint_not $&, "\$& from /(\\N{CYRILLIC CAPITAL LETTER A})/i";
548 check_taint_not $`, "\t\$`";
549 check_taint_not $', "\t\$'";
550 check_taint_not $+, "\t\$+";
551 check_taint_not $1, "\t\$1";
552 ok($1 eq "\N{CYRILLIC SMALL LETTER A}", ("\t" x 4) . "\t\$1 is 'small cyrillic a'");
553 check_taint_not $2, "\t\$2";
555 /(.)/; # untaint $&, $`, $', $+, $1.
556 check_taint_not $&, "\$& from /./";
558 "(\N{KELVIN SIGN})" =~ /(\N{KELVIN SIGN})/i;
559 check_taint_not $&, "\$& from /(\\N{KELVIN SIGN})/i";
560 check_taint_not $`, "\t\$`";
561 check_taint_not $', "\t\$'";
562 check_taint_not $+, "\t\$+";
563 check_taint_not $1, "\t\$1";
564 check_taint_not $2, "\t\$2";
566 /(.)/; # untaint $&, $`, $', $+, $1.
567 check_taint_not $&, "\$& from /(.)/";
570 check_taint_not $&, "\$& from /(.)\\b(.)/";
571 check_taint_not $`, "\t\$`";
572 check_taint_not $', "\t\$'";
573 check_taint_not $+, "\t\$+";
574 check_taint_not $1, "\t\$1";
575 check_taint_not $2, "\t\$2";
576 check_taint_not $3, "\t\$3";
578 /(.)/; # untaint $&, $`, $', $+, $1.
579 check_taint_not $&, "\$& from /./";
582 check_taint_not $&, "\$& from /(.)\\B(.)/";
583 check_taint_not $`, "\t\$`";
584 check_taint_not $', "\t\$'";
585 check_taint_not $+, "\t\$+";
586 check_taint_not $1, "\t\$1";
587 check_taint_not $2, "\t\$2";
588 check_taint_not $3, "\t\$3";
590 /(.)/; # untaint $&, $`, $', $+, $1.
591 check_taint_not $&, "\$& from /./";
593 "aaa" =~ /(.).(\1)/i; # notaint because not locale dependent
594 check_taint_not $&, "\$ & from /(.).(\\1)/";
595 check_taint_not $`, "\t\$`";
596 check_taint_not $', "\t\$'";
597 check_taint_not $+, "\t\$+";
598 check_taint_not $1, "\t\$1";
599 check_taint_not $2, "\t\$2";
600 check_taint_not $3, "\t\$3";
602 /(.)/; # untaint $&, $`, $', $+, $1.
603 check_taint_not $&, "\$ & from /./";
605 $_ = $a; # untaint $_
607 check_taint_not $_, 'untainting $_ works';
610 check_taint_not $&, "\$ & from /(b)/";
611 check_taint_not $`, "\t\$`";
612 check_taint_not $', "\t\$'";
613 check_taint_not $+, "\t\$+";
614 check_taint_not $1, "\t\$1";
615 check_taint_not $2, "\t\$2";
617 $_ = $a; # untaint $_
619 check_taint_not $_, 'untainting $_ works';
622 check_taint_not $_, '$_ (wasn\'t tainted) from s/(.+)/b/';
623 check_taint_not $&, "\t\$&";
624 check_taint_not $`, "\t\$`";
625 check_taint_not $', "\t\$'";
626 check_taint_not $+, "\t\$+";
627 check_taint_not $1, "\t\$1";
628 check_taint_not $2, "\t\$2";
630 $b = $a; # untaint $b
632 ($b = $a) =~ s/\w/$&/;
633 check_taint_not $b, '$b from ($b = $a) =~ s/\w/$&/';
634 check_taint_not $a, '$a from ($b = $a) =~ s/\w/$&/';
636 $_ = $a; # untaint $_
639 check_taint_not $_, '$_ (wasn\'t tainted) from s/(\w)/\l$1/,'; # this must taint
640 check_taint_not $&, "\t\$&";
641 check_taint_not $`, "\t\$`";
642 check_taint_not $', "\t\$'";
643 check_taint_not $+, "\t\$+";
644 check_taint_not $1, "\t\$1";
645 check_taint_not $2, "\t\$2";
647 $_ = $a; # untaint $_
650 check_taint_not $_, '$_ (wasn\'t tainted) from s/(\w)/\L$1/,';
651 check_taint_not $&, "\t\$&";
652 check_taint_not $`, "\t\$`";
653 check_taint_not $', "\t\$'";
654 check_taint_not $+, "\t\$+";
655 check_taint_not $1, "\t\$1";
656 check_taint_not $2, "\t\$2";
658 $_ = $a; # untaint $_
661 check_taint_not $_, '$_ (wasn\'t tainted) from s/(\w)/\u$1/';
662 check_taint_not $&, "\t\$&";
663 check_taint_not $`, "\t\$`";
664 check_taint_not $', "\t\$'";
665 check_taint_not $+, "\t\$+";
666 check_taint_not $1, "\t\$1";
667 check_taint_not $2, "\t\$2";
669 $_ = $a; # untaint $_
672 check_taint_not $_, '$_ (wasn\'t tainted) from s/(\w)/\U$1/';
673 check_taint_not $&, "\t\$&";
674 check_taint_not $`, "\t\$`";
675 check_taint_not $', "\t\$'";
676 check_taint_not $+, "\t\$+";
677 check_taint_not $1, "\t\$1";
678 check_taint_not $2, "\t\$2";
680 # After all this tainting $a should be cool.
682 check_taint_not $a, '$a still not tainted';
685 check_taint_not $1, '"a" =~ /([a-z])/';
686 "foo.bar_baz" =~ /^(.*)[._](.*?)$/; # Bug 120675
687 check_taint_not $1, '"foo.bar_baz" =~ /^(.*)[._](.*?)$/';
691 # Here are in scope of 'use locale'
693 # I think we've seen quite enough of taint.
694 # Let us do some *real* locale work now,
695 # unless setlocale() is missing (i.e. minitest).
697 # The test number before our first setlocale()
698 my $final_without_setlocale = $test_num;
702 debug "Scanning for locales...\n";
704 require POSIX; import POSIX ':locale_h';
706 my @Locale = find_locales([ &POSIX::LC_CTYPE, &POSIX::LC_NUMERIC, &POSIX::LC_ALL ]);
714 print "1..$test_num\n";
719 setlocale(&POSIX::LC_ALL, "C");
726 my @Added_alpha; # Alphas that aren't in the C locale.
730 # This returns a display string denoting the input parameter @_, each
731 # entry of which is a single character in the range 0-255. The first part
732 # of the output is a string of the characters in @_ that are ASCII
733 # graphics, and hence unambiguously displayable. They are given by code
734 # point order. The second part is the remaining code points, the ordinals
735 # of which are each displayed as 2-digit hex. Blanks are inserted so as
736 # to keep anything from the first part looking like a 2-digit hex number.
739 my @chars = sort { ord $a <=> ord $b } @_;
743 push @chars, chr(258); # This sentinel simplifies the loop termination
745 foreach my $i (0 .. @chars - 1) {
746 my $char = $chars[$i];
750 # We avoid using [:posix:] classes, as these are being tested in this
751 # file. Each equivalence class below is for things that can appear in
752 # a range; those that can't be in a range have class -1. 0 for those
753 # which should be output in hex; and >0 for the other ranges
754 if ($char =~ /[A-Z]/) {
757 elsif ($char =~ /[a-z]/) {
760 elsif ($char =~ /[0-9]/) {
763 # Uncomment to get literal punctuation displayed instead of hex
764 #elsif ($char =~ /[[\]!"#\$\%&\'()*+,.\/:\\;<=>?\@\^_`{|}~-]/) {
765 # $class = -1; # Punct never appears in a range
768 $class = 0; # Output in hex
771 if (! defined $range_start) {
773 $output .= " " . $char;
776 $range_start = ord $char;
777 $start_class = $class;
779 } # A range ends if not consecutive, or the class-type changes
780 elsif (ord $char != ($range_end = ord($chars[$i-1])) + 1
781 || $class != $start_class)
784 # Here, the current character is not in the range. This means the
785 # previous character must have been. Output the range up through
787 my $range_length = $range_end - $range_start + 1;
788 if ($start_class > 0) {
789 $output .= " " . chr($range_start);
790 $output .= "-" . chr($range_end) if $range_length > 1;
793 $output .= sprintf(" %02X", $range_start);
794 $output .= sprintf("-%02X", $range_end) if $range_length > 1;
797 # Handle the new current character, as potentially beginning a new
811 # Displays the string unambiguously. ASCII printables are always output
812 # as-is, though perhaps separated by blanks from other characters. If
813 # entirely printable ASCII, just returns the string. Otherwise if valid
814 # UTF-8 it uses the character names for non-printable-ASCII. Otherwise it
815 # outputs hex for each non-ASCII-printable byte.
817 return $string if $string =~ / ^ [[:print:]]* $/xa;
820 my $prev_was_punct = 1; # Beginning is considered punct
821 if (utf8::valid($string) && utf8::is_utf8($string)) {
823 foreach my $char (split "", $string) {
825 # Keep punctuation adjacent to other characters; otherwise
826 # separate them with a blank
827 if ($char =~ /[[:punct:]]/a) {
831 elsif ($char =~ /[[:print:]]/a) {
832 $result .= " " unless $prev_was_punct;
837 $result .= " " unless $prev_was_punct;
838 $result .= charnames::viacode(ord $char);
845 foreach my $char (split "", $string) {
846 if ($char =~ /[[:punct:]]/a) {
850 elsif ($char =~ /[[:print:]]/a) {
851 $result .= " " unless $prev_was_punct;
856 $result .= " " unless $prev_was_punct;
857 $result .= sprintf("%02X", ord $char);
867 my ($Locale, $i, $pass_fail, $message) = @_;
869 $message = " ($message)" if $message;
870 unless ($pass_fail) {
871 $Problem{$i}{$Locale} = 1;
872 debug "failed $i ($test_names{$i}) with locale '$Locale'$message\n";
874 push @{$Okay{$i}}, $Locale;
878 sub report_multi_result {
879 my ($Locale, $i, $results_ref) = @_;
881 # $results_ref points to an array, each element of which is a character that was
882 # in error for this test numbered '$i'. If empty, the test passed
886 $message = join " ", "for", disp_chars(@$results_ref);
888 report_result($Locale, $i, @$results_ref == 0, $message);
891 my $first_locales_test_number = $final_without_setlocale + 1;
892 my $locales_test_number;
893 my $not_necessarily_a_problem_test_number;
894 my $first_casing_test_number;
895 my %setlocale_failed; # List of locales that setlocale() didn't work on
897 foreach my $Locale (@Locale) {
898 $locales_test_number = $first_locales_test_number - 1;
900 debug "Locale = $Locale\n";
902 unless (setlocale(&POSIX::LC_ALL, $Locale)) {
903 $setlocale_failed{$Locale} = $Locale;
907 # We test UTF-8 locales only under ':not_characters'; It is easier to
908 # test them in other test files than here. Non- UTF-8 locales are tested
909 # only under plain 'use locale', as otherwise we would have to convert
910 # everything in them to Unicode.
912 my %UPPER = (); # All alpha X for which uc(X) == X and lc(X) != X
913 my %lower = (); # All alpha X for which lc(X) == X and uc(X) != X
914 my %BoThCaSe = (); # All alpha X for which uc(X) == lc(X) == X
916 my $is_utf8_locale = is_locale_utf8($Locale);
918 debug "is utf8 locale? = $is_utf8_locale\n";
920 debug "radix = " . disp_str(localeconv()->{decimal_point}) . "\n";
922 if (! $is_utf8_locale) {
924 @{$posixes{'word'}} = grep /\w/, map { chr } 0..255;
925 @{$posixes{'digit'}} = grep /\d/, map { chr } 0..255;
926 @{$posixes{'space'}} = grep /\s/, map { chr } 0..255;
927 @{$posixes{'alpha'}} = grep /[[:alpha:]]/, map {chr } 0..255;
928 @{$posixes{'alnum'}} = grep /[[:alnum:]]/, map {chr } 0..255;
929 @{$posixes{'ascii'}} = grep /[[:ascii:]]/, map {chr } 0..255;
930 @{$posixes{'blank'}} = grep /[[:blank:]]/, map {chr } 0..255;
931 @{$posixes{'cntrl'}} = grep /[[:cntrl:]]/, map {chr } 0..255;
932 @{$posixes{'graph'}} = grep /[[:graph:]]/, map {chr } 0..255;
933 @{$posixes{'lower'}} = grep /[[:lower:]]/, map {chr } 0..255;
934 @{$posixes{'print'}} = grep /[[:print:]]/, map {chr } 0..255;
935 @{$posixes{'punct'}} = grep /[[:punct:]]/, map {chr } 0..255;
936 @{$posixes{'upper'}} = grep /[[:upper:]]/, map {chr } 0..255;
937 @{$posixes{'xdigit'}} = grep /[[:xdigit:]]/, map {chr } 0..255;
938 @{$posixes{'cased'}} = grep /[[:upper:]]/i, map {chr } 0..255;
940 # Sieve the uppercase and the lowercase.
942 for (@{$posixes{'word'}}) {
943 if (/[^\d_]/) { # skip digits and the _
954 use locale ':not_characters';
955 @{$posixes{'word'}} = grep /\w/, map { chr } 0..255;
956 @{$posixes{'digit'}} = grep /\d/, map { chr } 0..255;
957 @{$posixes{'space'}} = grep /\s/, map { chr } 0..255;
958 @{$posixes{'alpha'}} = grep /[[:alpha:]]/, map {chr } 0..255;
959 @{$posixes{'alnum'}} = grep /[[:alnum:]]/, map {chr } 0..255;
960 @{$posixes{'ascii'}} = grep /[[:ascii:]]/, map {chr } 0..255;
961 @{$posixes{'blank'}} = grep /[[:blank:]]/, map {chr } 0..255;
962 @{$posixes{'cntrl'}} = grep /[[:cntrl:]]/, map {chr } 0..255;
963 @{$posixes{'graph'}} = grep /[[:graph:]]/, map {chr } 0..255;
964 @{$posixes{'lower'}} = grep /[[:lower:]]/, map {chr } 0..255;
965 @{$posixes{'print'}} = grep /[[:print:]]/, map {chr } 0..255;
966 @{$posixes{'punct'}} = grep /[[:punct:]]/, map {chr } 0..255;
967 @{$posixes{'upper'}} = grep /[[:upper:]]/, map {chr } 0..255;
968 @{$posixes{'xdigit'}} = grep /[[:xdigit:]]/, map {chr } 0..255;
969 @{$posixes{'cased'}} = grep /[[:upper:]]/i, map {chr } 0..255;
970 for (@{$posixes{'word'}}) {
971 if (/[^\d_]/) { # skip digits and the _
982 # Ordered, where possible, in groups of "this is a subset of the next
984 debug ":upper: = ", disp_chars(@{$posixes{'upper'}}), "\n";
985 debug ":lower: = ", disp_chars(@{$posixes{'lower'}}), "\n";
986 debug ":cased: = ", disp_chars(@{$posixes{'cased'}}), "\n";
987 debug ":alpha: = ", disp_chars(@{$posixes{'alpha'}}), "\n";
988 debug ":alnum: = ", disp_chars(@{$posixes{'alnum'}}), "\n";
989 debug " w = ", disp_chars(@{$posixes{'word'}}), "\n";
990 debug ":graph: = ", disp_chars(@{$posixes{'graph'}}), "\n";
991 debug ":print: = ", disp_chars(@{$posixes{'print'}}), "\n";
992 debug " d = ", disp_chars(@{$posixes{'digit'}}), "\n";
993 debug ":xdigit: = ", disp_chars(@{$posixes{'xdigit'}}), "\n";
994 debug ":blank: = ", disp_chars(@{$posixes{'blank'}}), "\n";
995 debug " s = ", disp_chars(@{$posixes{'space'}}), "\n";
996 debug ":punct: = ", disp_chars(@{$posixes{'punct'}}), "\n";
997 debug ":cntrl: = ", disp_chars(@{$posixes{'cntrl'}}), "\n";
998 debug ":ascii: = ", disp_chars(@{$posixes{'ascii'}}), "\n";
1000 foreach (keys %UPPER) {
1002 $BoThCaSe{$_}++ if exists $lower{$_};
1004 foreach (keys %lower) {
1005 $BoThCaSe{$_}++ if exists $UPPER{$_};
1007 foreach (keys %BoThCaSe) {
1013 foreach my $ord ( 0 .. 255 ) {
1014 $Unassigned{chr $ord} = 1;
1016 foreach my $class (keys %posixes) {
1017 foreach my $char (@{$posixes{$class}}) {
1018 delete $Unassigned{$char};
1022 debug "UPPER = ", disp_chars(sort { ord $a <=> ord $b } keys %UPPER), "\n";
1023 debug "lower = ", disp_chars(sort { ord $a <=> ord $b } keys %lower), "\n";
1024 debug "BoThCaSe = ", disp_chars(sort { ord $a <=> ord $b } keys %BoThCaSe), "\n";
1025 debug "Unassigned = ", disp_chars(sort { ord $a <=> ord $b } keys %Unassigned), "\n";
1029 foreach my $x (sort { ord $a <=> ord $b } keys %UPPER) {
1032 if ($is_utf8_locale) {
1033 use locale ':not_characters';
1034 $ok = $x =~ /[[:upper:]]/;
1035 $fold_ok = $x =~ /[[:lower:]]/i;
1039 $ok = $x =~ /[[:upper:]]/;
1040 $fold_ok = $x =~ /[[:lower:]]/i;
1042 push @failures, $x unless $ok;
1043 push @fold_failures, $x unless $fold_ok;
1045 $locales_test_number++;
1046 $first_casing_test_number = $locales_test_number;
1047 $test_names{$locales_test_number} = 'Verify that /[[:upper:]]/ matches all alpha X for which uc(X) == X and lc(X) != X';
1048 report_multi_result($Locale, $locales_test_number, \@failures);
1050 $locales_test_number++;
1052 $test_names{$locales_test_number} = 'Verify that /[[:lower:]]/i matches all alpha X for which uc(X) == X and lc(X) != X';
1053 report_multi_result($Locale, $locales_test_number, \@fold_failures);
1056 undef @fold_failures;
1058 foreach my $x (sort { ord $a <=> ord $b } keys %lower) {
1061 if ($is_utf8_locale) {
1062 use locale ':not_characters';
1063 $ok = $x =~ /[[:lower:]]/;
1064 $fold_ok = $x =~ /[[:upper:]]/i;
1068 $ok = $x =~ /[[:lower:]]/;
1069 $fold_ok = $x =~ /[[:upper:]]/i;
1071 push @failures, $x unless $ok;
1072 push @fold_failures, $x unless $fold_ok;
1075 $locales_test_number++;
1076 $test_names{$locales_test_number} = 'Verify that /[[:lower:]]/ matches all alpha X for which lc(X) == X and uc(X) != X';
1077 report_multi_result($Locale, $locales_test_number, \@failures);
1079 $locales_test_number++;
1080 $test_names{$locales_test_number} = 'Verify that /[[:upper:]]/i matches all alpha X for which lc(X) == X and uc(X) != X';
1081 report_multi_result($Locale, $locales_test_number, \@fold_failures);
1083 { # Find the alphabetic characters that are not considered alphabetics
1084 # in the default (C) locale.
1089 for (keys %UPPER, keys %lower, keys %BoThCaSe) {
1090 push(@Added_alpha, $_) if (/\W/);
1094 @Added_alpha = sort { ord $a <=> ord $b } @Added_alpha;
1096 debug "Added_alpha = ", disp_chars(@Added_alpha), "\n";
1098 # Cross-check the whole 8-bit character set.
1100 ++$locales_test_number;
1102 $test_names{$locales_test_number} = 'Verify that \w and [:word:] are identical';
1103 for (map { chr } 0..255) {
1104 if ($is_utf8_locale) {
1105 use locale ':not_characters';
1106 push @f, $_ unless /[[:word:]]/ == /\w/;
1109 push @f, $_ unless /[[:word:]]/ == /\w/;
1112 report_multi_result($Locale, $locales_test_number, \@f);
1114 ++$locales_test_number;
1116 $test_names{$locales_test_number} = 'Verify that \d and [:digit:] are identical';
1117 for (map { chr } 0..255) {
1118 if ($is_utf8_locale) {
1119 use locale ':not_characters';
1120 push @f, $_ unless /[[:digit:]]/ == /\d/;
1123 push @f, $_ unless /[[:digit:]]/ == /\d/;
1126 report_multi_result($Locale, $locales_test_number, \@f);
1128 ++$locales_test_number;
1130 $test_names{$locales_test_number} = 'Verify that \s and [:space:] are identical';
1131 for (map { chr } 0..255) {
1132 if ($is_utf8_locale) {
1133 use locale ':not_characters';
1134 push @f, $_ unless /[[:space:]]/ == /\s/;
1137 push @f, $_ unless /[[:space:]]/ == /\s/;
1140 report_multi_result($Locale, $locales_test_number, \@f);
1142 ++$locales_test_number;
1144 $test_names{$locales_test_number} = 'Verify that [:posix:] and [:^posix:] are mutually exclusive';
1145 for (map { chr } 0..255) {
1146 if ($is_utf8_locale) {
1147 use locale ':not_characters';
1148 push @f, $_ unless (/[[:alpha:]]/ xor /[[:^alpha:]]/) ||
1149 (/[[:alnum:]]/ xor /[[:^alnum:]]/) ||
1150 (/[[:ascii:]]/ xor /[[:^ascii:]]/) ||
1151 (/[[:blank:]]/ xor /[[:^blank:]]/) ||
1152 (/[[:cntrl:]]/ xor /[[:^cntrl:]]/) ||
1153 (/[[:digit:]]/ xor /[[:^digit:]]/) ||
1154 (/[[:graph:]]/ xor /[[:^graph:]]/) ||
1155 (/[[:lower:]]/ xor /[[:^lower:]]/) ||
1156 (/[[:print:]]/ xor /[[:^print:]]/) ||
1157 (/[[:space:]]/ xor /[[:^space:]]/) ||
1158 (/[[:upper:]]/ xor /[[:^upper:]]/) ||
1159 (/[[:word:]]/ xor /[[:^word:]]/) ||
1160 (/[[:xdigit:]]/ xor /[[:^xdigit:]]/) ||
1162 # effectively is what [:cased:] would be if it existed.
1163 (/[[:upper:]]/i xor /[[:^upper:]]/i);
1166 push @f, $_ unless (/[[:alpha:]]/ xor /[[:^alpha:]]/) ||
1167 (/[[:alnum:]]/ xor /[[:^alnum:]]/) ||
1168 (/[[:ascii:]]/ xor /[[:^ascii:]]/) ||
1169 (/[[:blank:]]/ xor /[[:^blank:]]/) ||
1170 (/[[:cntrl:]]/ xor /[[:^cntrl:]]/) ||
1171 (/[[:digit:]]/ xor /[[:^digit:]]/) ||
1172 (/[[:graph:]]/ xor /[[:^graph:]]/) ||
1173 (/[[:lower:]]/ xor /[[:^lower:]]/) ||
1174 (/[[:print:]]/ xor /[[:^print:]]/) ||
1175 (/[[:space:]]/ xor /[[:^space:]]/) ||
1176 (/[[:upper:]]/ xor /[[:^upper:]]/) ||
1177 (/[[:word:]]/ xor /[[:^word:]]/) ||
1178 (/[[:xdigit:]]/ xor /[[:^xdigit:]]/) ||
1179 (/[[:upper:]]/i xor /[[:^upper:]]/i);
1182 report_multi_result($Locale, $locales_test_number, \@f);
1184 # The rules for the relationships are given in:
1185 # http://www.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap07.html
1188 ++$locales_test_number;
1190 $test_names{$locales_test_number} = 'Verify that [:lower:] contains at least a-z';
1192 if ($is_utf8_locale) {
1193 use locale ':not_characters';
1194 push @f, $_ unless /[[:lower:]]/;
1197 push @f, $_ unless /[[:lower:]]/;
1200 report_multi_result($Locale, $locales_test_number, \@f);
1202 ++$locales_test_number;
1204 $test_names{$locales_test_number} = 'Verify that [:lower:] is a subset of [:alpha:]';
1205 for (map { chr } 0..255) {
1206 if ($is_utf8_locale) {
1207 use locale ':not_characters';
1208 push @f, $_ if /[[:lower:]]/ and ! /[[:alpha:]]/;
1211 push @f, $_ if /[[:lower:]]/ and ! /[[:alpha:]]/;
1214 report_multi_result($Locale, $locales_test_number, \@f);
1216 ++$locales_test_number;
1218 $test_names{$locales_test_number} = 'Verify that [:upper:] contains at least A-Z';
1220 if ($is_utf8_locale) {
1221 use locale ':not_characters';
1222 push @f, $_ unless /[[:upper:]]/;
1225 push @f, $_ unless /[[:upper:]]/;
1228 report_multi_result($Locale, $locales_test_number, \@f);
1230 ++$locales_test_number;
1232 $test_names{$locales_test_number} = 'Verify that [:upper:] is a subset of [:alpha:]';
1233 for (map { chr } 0..255) {
1234 if ($is_utf8_locale) {
1235 use locale ':not_characters';
1236 push @f, $_ if /[[:upper:]]/ and ! /[[:alpha:]]/;
1239 push @f, $_ if /[[:upper:]]/ and ! /[[:alpha:]]/;
1242 report_multi_result($Locale, $locales_test_number, \@f);
1244 ++$locales_test_number;
1246 $test_names{$locales_test_number} = 'Verify that /[[:lower:]]/i is a subset of [:alpha:]';
1247 for (map { chr } 0..255) {
1248 if ($is_utf8_locale) {
1249 use locale ':not_characters';
1250 push @f, $_ if /[[:lower:]]/i and ! /[[:alpha:]]/;
1253 push @f, $_ if /[[:lower:]]/i and ! /[[:alpha:]]/;
1256 report_multi_result($Locale, $locales_test_number, \@f);
1258 ++$locales_test_number;
1260 $test_names{$locales_test_number} = 'Verify that [:alpha:] is a subset of [:alnum:]';
1261 for (map { chr } 0..255) {
1262 if ($is_utf8_locale) {
1263 use locale ':not_characters';
1264 push @f, $_ if /[[:alpha:]]/ and ! /[[:alnum:]]/;
1267 push @f, $_ if /[[:alpha:]]/ and ! /[[:alnum:]]/;
1270 report_multi_result($Locale, $locales_test_number, \@f);
1272 ++$locales_test_number;
1274 $test_names{$locales_test_number} = 'Verify that [:digit:] contains at least 0-9';
1276 if ($is_utf8_locale) {
1277 use locale ':not_characters';
1278 push @f, $_ unless /[[:digit:]]/;
1281 push @f, $_ unless /[[:digit:]]/;
1284 report_multi_result($Locale, $locales_test_number, \@f);
1286 ++$locales_test_number;
1288 $test_names{$locales_test_number} = 'Verify that [:digit:] is a subset of [:alnum:]';
1289 for (map { chr } 0..255) {
1290 if ($is_utf8_locale) {
1291 use locale ':not_characters';
1292 push @f, $_ if /[[:digit:]]/ and ! /[[:alnum:]]/;
1295 push @f, $_ if /[[:digit:]]/ and ! /[[:alnum:]]/;
1298 report_multi_result($Locale, $locales_test_number, \@f);
1300 ++$locales_test_number;
1302 $test_names{$locales_test_number} = 'Verify that [:digit:] matches either 10 or 20 code points';
1303 report_result($Locale, $locales_test_number, @{$posixes{'digit'}} == 10 || @{$posixes{'digit'}} == 20);
1305 ++$locales_test_number;
1307 $test_names{$locales_test_number} = 'Verify that if there is a second set of digits in [:digit:], they are consecutive';
1308 if (@{$posixes{'digit'}} == 20) {
1310 for (map { chr } 0..255) {
1311 next unless /[[:digit:]]/;
1313 if (defined $previous_ord) {
1314 if ($is_utf8_locale) {
1315 use locale ':not_characters';
1316 push @f, $_ if ord $_ != $previous_ord + 1;
1319 push @f, $_ if ord $_ != $previous_ord + 1;
1322 $previous_ord = ord $_;
1325 report_multi_result($Locale, $locales_test_number, \@f);
1327 ++$locales_test_number;
1329 my @xdigit_digits; # :digit: & :xdigit:
1330 $test_names{$locales_test_number} = 'Verify that [:xdigit:] contains one or two blocks of 10 consecutive [:digit:] chars';
1331 for (map { chr } 0..255) {
1332 if ($is_utf8_locale) {
1333 use locale ':not_characters';
1334 # For utf8 locales, we actually use a stricter test: that :digit:
1335 # is a subset of :xdigit:, as we know that only 0-9 should match
1336 push @f, $_ if /[[:digit:]]/ and ! /[[:xdigit:]]/;
1339 push @xdigit_digits, $_ if /[[:digit:]]/ and /[[:xdigit:]]/;
1342 if (! $is_utf8_locale) {
1344 # For non-utf8 locales, @xdigit_digits is a list of the characters
1345 # that are both :xdigit: and :digit:. Because :digit: is stored in
1346 # increasing code point order (unless the tests above failed),
1347 # @xdigit_digits is as well. There should be exactly 10 or
1349 if (@xdigit_digits != 10 && @xdigit_digits != 20) {
1350 @f = @xdigit_digits;
1354 # Look for contiguity in the series, adding any wrong ones to @f
1355 my @temp = @xdigit_digits;
1357 push @f, $temp[1] if ($temp[0] != $temp[1] - 1)
1359 # Skip this test for the 0th character of
1360 # the second block of 10, as it won't be
1361 # contiguous with the previous block
1362 && (! defined $xdigit_digits[10]
1363 || $temp[1] != $xdigit_digits[10]);
1369 report_multi_result($Locale, $locales_test_number, \@f);
1371 ++$locales_test_number;
1373 $test_names{$locales_test_number} = 'Verify that [:xdigit:] contains at least A-F, a-f';
1374 for ('A' .. 'F', 'a' .. 'f') {
1375 if ($is_utf8_locale) {
1376 use locale ':not_characters';
1377 push @f, $_ unless /[[:xdigit:]]/;
1380 push @f, $_ unless /[[:xdigit:]]/;
1383 report_multi_result($Locale, $locales_test_number, \@f);
1385 ++$locales_test_number;
1387 $test_names{$locales_test_number} = 'Verify that any additional members of [:xdigit:], are in groups of 6 consecutive code points';
1390 for my $chr (map { chr } 0..255) {
1391 next unless $chr =~ /[[:xdigit:]]/;
1392 if ($is_utf8_locale) {
1393 next if $chr =~ /[[:digit:]]/;
1396 next if grep { $chr eq $_ } @xdigit_digits;
1398 next if $chr =~ /[A-Fa-f]/;
1399 if (defined $previous_ord) {
1400 if ($is_utf8_locale) {
1401 use locale ':not_characters';
1402 push @f, $chr if ord $chr != $previous_ord + 1;
1405 push @f, $chr if ord $chr != $previous_ord + 1;
1410 undef $previous_ord;
1413 $previous_ord = ord $chr;
1416 report_multi_result($Locale, $locales_test_number, \@f);
1418 ++$locales_test_number;
1420 $test_names{$locales_test_number} = 'Verify that [:xdigit:] is a subset of [:graph:]';
1421 for (map { chr } 0..255) {
1422 if ($is_utf8_locale) {
1423 use locale ':not_characters';
1424 push @f, $_ if /[[:xdigit:]]/ and ! /[[:graph:]]/;
1427 push @f, $_ if /[[:xdigit:]]/ and ! /[[:graph:]]/;
1430 report_multi_result($Locale, $locales_test_number, \@f);
1432 # Note that xdigit doesn't have to be a subset of alnum
1434 ++$locales_test_number;
1436 $test_names{$locales_test_number} = 'Verify that [:punct:] is a subset of [:graph:]';
1437 for (map { chr } 0..255) {
1438 if ($is_utf8_locale) {
1439 use locale ':not_characters';
1440 push @f, $_ if /[[:punct:]]/ and ! /[[:graph:]]/;
1443 push @f, $_ if /[[:punct:]]/ and ! /[[:graph:]]/;
1446 report_multi_result($Locale, $locales_test_number, \@f);
1448 ++$locales_test_number;
1450 $test_names{$locales_test_number} = 'Verify that the space character is not in [:graph:]';
1451 if ($is_utf8_locale) {
1452 use locale ':not_characters';
1453 push @f, " " if " " =~ /[[:graph:]]/;
1456 push @f, " " if " " =~ /[[:graph:]]/;
1458 report_multi_result($Locale, $locales_test_number, \@f);
1460 ++$locales_test_number;
1462 $test_names{$locales_test_number} = 'Verify that [:space:] contains at least [\f\n\r\t\cK ]';
1463 for (' ', "\f", "\n", "\r", "\t", "\cK") {
1464 if ($is_utf8_locale) {
1465 use locale ':not_characters';
1466 push @f, $_ unless /[[:space:]]/;
1469 push @f, $_ unless /[[:space:]]/;
1472 report_multi_result($Locale, $locales_test_number, \@f);
1474 ++$locales_test_number;
1476 $test_names{$locales_test_number} = 'Verify that [:blank:] contains at least [\t ]';
1478 if ($is_utf8_locale) {
1479 use locale ':not_characters';
1480 push @f, $_ unless /[[:blank:]]/;
1483 push @f, $_ unless /[[:blank:]]/;
1486 report_multi_result($Locale, $locales_test_number, \@f);
1488 ++$locales_test_number;
1490 $test_names{$locales_test_number} = 'Verify that [:blank:] is a subset of [:space:]';
1491 for (map { chr } 0..255) {
1492 if ($is_utf8_locale) {
1493 use locale ':not_characters';
1494 push @f, $_ if /[[:blank:]]/ and ! /[[:space:]]/;
1497 push @f, $_ if /[[:blank:]]/ and ! /[[:space:]]/;
1500 report_multi_result($Locale, $locales_test_number, \@f);
1502 ++$locales_test_number;
1504 $test_names{$locales_test_number} = 'Verify that [:graph:] is a subset of [:print:]';
1505 for (map { chr } 0..255) {
1506 if ($is_utf8_locale) {
1507 use locale ':not_characters';
1508 push @f, $_ if /[[:graph:]]/ and ! /[[:print:]]/;
1511 push @f, $_ if /[[:graph:]]/ and ! /[[:print:]]/;
1514 report_multi_result($Locale, $locales_test_number, \@f);
1516 ++$locales_test_number;
1518 $test_names{$locales_test_number} = 'Verify that the space character is in [:print:]';
1519 if ($is_utf8_locale) {
1520 use locale ':not_characters';
1521 push @f, " " if " " !~ /[[:print:]]/;
1524 push @f, " " if " " !~ /[[:print:]]/;
1526 report_multi_result($Locale, $locales_test_number, \@f);
1528 ++$locales_test_number;
1530 $test_names{$locales_test_number} = 'Verify that isn\'t both [:cntrl:] and [:print:]';
1531 for (map { chr } 0..255) {
1532 if ($is_utf8_locale) {
1533 use locale ':not_characters';
1534 push @f, $_ if (/[[:print:]]/ and /[[:cntrl:]]/);
1537 push @f, $_ if (/[[:print:]]/ and /[[:cntrl:]]/);
1540 report_multi_result($Locale, $locales_test_number, \@f);
1542 ++$locales_test_number;
1544 $test_names{$locales_test_number} = 'Verify that isn\'t both [:alpha:] and [:digit:]';
1545 for (map { chr } 0..255) {
1546 if ($is_utf8_locale) {
1547 use locale ':not_characters';
1548 push @f, $_ if /[[:alpha:]]/ and /[[:digit:]]/;
1551 push @f, $_ if /[[:alpha:]]/ and /[[:digit:]]/;
1554 report_multi_result($Locale, $locales_test_number, \@f);
1556 ++$locales_test_number;
1558 $test_names{$locales_test_number} = 'Verify that isn\'t both [:alnum:] and [:punct:]';
1559 for (map { chr } 0..255) {
1560 if ($is_utf8_locale) {
1561 use locale ':not_characters';
1562 push @f, $_ if /[[:alnum:]]/ and /[[:punct:]]/;
1565 push @f, $_ if /[[:alnum:]]/ and /[[:punct:]]/;
1568 report_multi_result($Locale, $locales_test_number, \@f);
1570 ++$locales_test_number;
1572 $test_names{$locales_test_number} = 'Verify that isn\'t both [:xdigit:] and [:punct:]';
1573 for (map { chr } 0..255) {
1574 if ($is_utf8_locale) {
1575 use locale ':not_characters';
1576 push @f, $_ if (/[[:punct:]]/ and /[[:xdigit:]]/);
1579 push @f, $_ if (/[[:punct:]]/ and /[[:xdigit:]]/);
1582 report_multi_result($Locale, $locales_test_number, \@f);
1584 ++$locales_test_number;
1586 $test_names{$locales_test_number} = 'Verify that isn\'t both [:graph:] and [:space:]';
1587 for (map { chr } 0..255) {
1588 if ($is_utf8_locale) {
1589 use locale ':not_characters';
1590 push @f, $_ if (/[[:graph:]]/ and /[[:space:]]/);
1593 push @f, $_ if (/[[:graph:]]/ and /[[:space:]]/);
1596 report_multi_result($Locale, $locales_test_number, \@f);
1598 foreach ($first_casing_test_number..$locales_test_number) {
1599 $problematical_tests{$_} = 1;
1603 # Test for read-only scalars' locale vs non-locale comparisons.
1609 if ($is_utf8_locale) {
1610 use locale ':not_characters';
1611 $ok = ($a cmp "qwerty") == 0;
1615 $ok = ($a cmp "qwerty") == 0;
1617 report_result($Locale, ++$locales_test_number, $ok);
1618 $test_names{$locales_test_number} = 'Verify that cmp works with a read-only scalar; no- vs locale';
1622 my ($from, $to, $lesser, $greater,
1623 @test, %test, $test, $yes, $no, $sign);
1625 ++$locales_test_number;
1626 $test_names{$locales_test_number} = 'Verify that "le", "ne", etc work';
1627 $not_necessarily_a_problem_test_number = $locales_test_number;
1630 $from = int(($_*@{$posixes{'word'}})/10);
1631 $to = $from + int(@{$posixes{'word'}}/10);
1632 $to = $#{$posixes{'word'}} if ($to > $#{$posixes{'word'}});
1633 $lesser = join('', @{$posixes{'word'}}[$from..$to]);
1634 # Select a slice one character on.
1636 $to = $#{$posixes{'word'}} if ($to > $#{$posixes{'word'}});
1637 $greater = join('', @{$posixes{'word'}}[$from..$to]);
1638 if ($is_utf8_locale) {
1639 use locale ':not_characters';
1640 ($yes, $no, $sign) = ($lesser lt $greater
1642 : ("not ", " ", -1));
1646 ($yes, $no, $sign) = ($lesser lt $greater
1648 : ("not ", " ", -1));
1650 # all these tests should FAIL (return 0). Exact lt or gt cannot
1651 # be tested because in some locales, say, eacute and E may test
1655 $no.' ($lesser le $greater)', # 1
1656 'not ($lesser ne $greater)', # 2
1657 ' ($lesser eq $greater)', # 3
1658 $yes.' ($lesser ge $greater)', # 4
1659 $yes.' ($lesser ge $greater)', # 5
1660 $yes.' ($greater le $lesser )', # 7
1661 'not ($greater ne $lesser )', # 8
1662 ' ($greater eq $lesser )', # 9
1663 $no.' ($greater ge $lesser )', # 10
1664 'not (($lesser cmp $greater) == -($sign))' # 11
1666 @test{@test} = 0 x @test;
1668 for my $ti (@test) {
1669 if ($is_utf8_locale) {
1670 use locale ':not_characters';
1671 $test{$ti} = eval $ti;
1674 # Already in 'use locale';
1675 $test{$ti} = eval $ti;
1677 $test ||= $test{$ti}
1679 report_result($Locale, $locales_test_number, $test == 0);
1681 debug "lesser = '$lesser'\n";
1682 debug "greater = '$greater'\n";
1683 debug "lesser cmp greater = ",
1684 $lesser cmp $greater, "\n";
1685 debug "greater cmp lesser = ",
1686 $greater cmp $lesser, "\n";
1687 debug "(greater) from = $from, to = $to\n";
1688 for my $ti (@test) {
1689 debugf("# %-40s %-4s", $ti,
1690 $test{$ti} ? 'FAIL' : 'ok');
1691 if ($ti =~ /\(\.*(\$.+ +cmp +\$[^\)]+)\.*\)/) {
1692 debugf("(%s == %4d)", $1, eval $1);
1734 if (! $is_utf8_locale) {
1737 my ($x, $y) = (1.23, 1.23);
1740 printf ''; # printf used to reset locale to "C"
1745 my $z = sprintf ''; # sprintf used to reset locale to "C"
1752 local $SIG{__WARN__} =
1758 # The == (among other ops) used to warn for locales
1759 # that had something else than "." as the radix character.
1783 $ok12 = abs(($f + $g) - 3.57) < 0.01;
1785 $ok14 = $ok14_5 = $ok15 = $ok16 = 1; # Skip for non-utf8 locales
1789 $ok17 = "1.5:1.25" eq sprintf("%g:%g", $h, $i);
1791 $ok18 = $j eq sprintf("%g:%g", $h, $i);
1794 use locale ':not_characters';
1796 my ($x, $y) = (1.23, 1.23);
1798 printf ''; # printf used to reset locale to "C"
1803 my $z = sprintf ''; # sprintf used to reset locale to "C"
1809 local $SIG{__WARN__} =
1835 $ok12 = abs(($f + $g) - 3.57) < 0.01;
1838 # Look for non-ASCII error messages, and verify that the first
1839 # such is in UTF-8 (the others almost certainly will be like the
1840 # first). This is only done if the current locale has LC_MESSAGES
1843 if (setlocale(&POSIX::LC_MESSAGES, $Locale)) {
1844 foreach my $err (keys %!) {
1846 $! = eval "&Errno::$err"; # Convert to strerror() output
1847 my $strerror = "$!";
1848 if ("$strerror" =~ /\P{ASCII}/) {
1849 $ok14 = utf8::is_utf8($strerror);
1851 $ok14_5 = "$!" !~ /\P{ASCII}/;
1857 # Similarly, we verify that a non-ASCII radix is in UTF-8. This
1858 # also catches if there is a disparity between sprintf and
1861 my $string_g = "$g";
1862 my $sprintf_g = sprintf("%g", $g);
1864 $ok15 = $string_g =~ / ^ \p{ASCII}+ $ /x || utf8::is_utf8($string_g);
1865 $ok16 = $sprintf_g eq $string_g;
1869 $ok17 = "1.5:1.25" eq sprintf("%g:%g", $h, $i);
1871 $ok18 = $j eq sprintf("%g:%g", $h, $i);
1875 if (setlocale(&POSIX::LC_TIME, $Locale)) { # These tests aren't affected by
1877 my @times = CORE::localtime();
1880 $ok19 = POSIX::strftime("%p", @times) ne "%p"; # [perl #119425]
1881 my $date = POSIX::strftime("'%A' '%B' '%Z' '%p'", @times);
1882 debug("'Day' 'Month' 'TZ' 'am/pm' = ", disp_str($date));
1884 # If there is any non-ascii, it better be UTF-8 in a UTF-8 locale, and
1885 # not UTF-8 if the locale isn't UTF-8.
1886 $ok20 = $date =~ / ^ \p{ASCII}+ $ /x
1887 || $is_utf8_locale == utf8::is_utf8($date);
1891 foreach my $err (keys %!) {
1894 $! = eval "&Errno::$err"; # Convert to strerror() output
1895 my $strerror = "$!";
1896 if ("$strerror" =~ /\P{ASCII}/) {
1902 report_result($Locale, ++$locales_test_number, $ok1);
1903 $test_names{$locales_test_number} = 'Verify that an intervening printf doesn\'t change assignment results';
1904 my $first_a_test = $locales_test_number;
1906 debug "$first_a_test..$locales_test_number: \$a = $a, \$b = $b, Locale = $Locale\n";
1908 report_result($Locale, ++$locales_test_number, $ok2);
1909 $test_names{$locales_test_number} = 'Verify that an intervening sprintf doesn\'t change assignment results';
1911 my $first_c_test = $locales_test_number;
1913 report_result($Locale, ++$locales_test_number, $ok3);
1914 $test_names{$locales_test_number} = 'Verify that a different locale radix works when doing "==" with a constant';
1915 $problematical_tests{$locales_test_number} = 1;
1917 report_result($Locale, ++$locales_test_number, $ok4);
1918 $test_names{$locales_test_number} = 'Verify that a different locale radix works when doing "==" with a scalar';
1919 $problematical_tests{$locales_test_number} = 1;
1921 report_result($Locale, ++$locales_test_number, $ok5);
1922 $test_names{$locales_test_number} = 'Verify that a different locale radix works when doing "==" with a scalar and an intervening sprintf';
1923 $problematical_tests{$locales_test_number} = 1;
1925 debug "$first_c_test..$locales_test_number: \$c = $c, \$d = $d, Locale = $Locale\n";
1927 report_result($Locale, ++$locales_test_number, $ok6);
1928 $test_names{$locales_test_number} = 'Verify that can assign stringified under inner no-locale block';
1929 my $first_e_test = $locales_test_number;
1931 report_result($Locale, ++$locales_test_number, $ok7);
1932 $test_names{$locales_test_number} = 'Verify that "==" with a scalar still works in inner no locale';
1934 report_result($Locale, ++$locales_test_number, $ok8);
1935 $test_names{$locales_test_number} = 'Verify that "==" with a scalar and an intervening sprintf still works in inner no locale';
1936 $problematical_tests{$locales_test_number} = 1;
1938 debug "$first_e_test..$locales_test_number: \$e = $e, no locale\n";
1940 report_result($Locale, ++$locales_test_number, $ok9);
1941 $test_names{$locales_test_number} = 'Verify that after a no-locale block, a different locale radix still works when doing "==" with a constant';
1942 $problematical_tests{$locales_test_number} = 1;
1943 my $first_f_test = $locales_test_number;
1945 report_result($Locale, ++$locales_test_number, $ok10);
1946 $test_names{$locales_test_number} = 'Verify that after a no-locale block, a different locale radix still works when doing "==" with a scalar';
1947 $problematical_tests{$locales_test_number} = 1;
1949 report_result($Locale, ++$locales_test_number, $ok11);
1950 $test_names{$locales_test_number} = 'Verify that after a no-locale block, a different locale radix still works when doing "==" with a scalar and an intervening sprintf';
1951 $problematical_tests{$locales_test_number} = 1;
1953 report_result($Locale, ++$locales_test_number, $ok12);
1954 $test_names{$locales_test_number} = 'Verify that after a no-locale block, a different locale radix can participate in an addition and function call as numeric';
1955 $problematical_tests{$locales_test_number} = 1;
1957 report_result($Locale, ++$locales_test_number, $ok13);
1958 $test_names{$locales_test_number} = 'Verify that don\'t get warning under "==" even if radix is not a dot';
1959 $problematical_tests{$locales_test_number} = 1;
1961 report_result($Locale, ++$locales_test_number, $ok14);
1962 $test_names{$locales_test_number} = 'Verify that non-ASCII UTF-8 error messages are in UTF-8';
1964 report_result($Locale, ++$locales_test_number, $ok14_5);
1965 $test_names{$locales_test_number} = '... and are ASCII outside "use locale"';
1967 report_result($Locale, ++$locales_test_number, $ok15);
1968 $test_names{$locales_test_number} = 'Verify that a number with a UTF-8 radix has a UTF-8 stringification';
1970 report_result($Locale, ++$locales_test_number, $ok16);
1971 $test_names{$locales_test_number} = 'Verify that a sprintf of a number with a UTF-8 radix yields UTF-8';
1973 report_result($Locale, ++$locales_test_number, $ok17);
1974 $test_names{$locales_test_number} = 'Verify that a sprintf of a number outside locale scope uses a dot radix';
1976 report_result($Locale, ++$locales_test_number, $ok18);
1977 $test_names{$locales_test_number} = 'Verify that a sprintf of a number back within locale scope uses locale radix';
1979 report_result($Locale, ++$locales_test_number, $ok19);
1980 $test_names{$locales_test_number} = 'Verify that strftime doesn\'t return "%p" in locales where %p is empty';
1982 report_result($Locale, ++$locales_test_number, $ok20);
1983 $test_names{$locales_test_number} = 'Verify that strftime returns date with UTF-8 flag appropriately set';
1984 $problematical_tests{$locales_test_number} = 1; # This is broken in
1987 report_result($Locale, ++$locales_test_number, $ok21);
1988 $test_names{$locales_test_number} = '"$!" is ASCII only outside of locale scope';
1990 debug "$first_f_test..$locales_test_number: \$f = $f, \$g = $g, back to locale = $Locale\n";
1992 # Does taking lc separately differ from taking
1993 # the lc "in-line"? (This was the bug 19990704.002, change #3568.)
1994 # The bug was in the caching of the 'o'-magic.
1995 if (! $is_utf8_locale) {
2001 return $lc0 cmp $lc1;
2005 return lc($_[0]) cmp lc($_[1]);
2012 report_result($Locale, ++$locales_test_number,
2013 lcA($x, $y) == 1 && lcB($x, $y) == 1 ||
2014 lcA($x, $z) == 0 && lcB($x, $z) == 0);
2017 use locale ':not_characters';
2022 return $lc0 cmp $lc1;
2026 return lc($_[0]) cmp lc($_[1]);
2033 report_result($Locale, ++$locales_test_number,
2034 lcC($x, $y) == 1 && lcD($x, $y) == 1 ||
2035 lcC($x, $z) == 0 && lcD($x, $z) == 0);
2037 $test_names{$locales_test_number} = 'Verify "lc(foo) cmp lc(bar)" is the same as using intermediaries for the cmp';
2039 # Does lc of an UPPER (if different from the UPPER) match
2040 # case-insensitively the UPPER, and does the UPPER match
2041 # case-insensitively the lc of the UPPER. And vice versa.
2045 my $re = qr/[\[\(\{\*\+\?\|\^\$\\]/;
2048 ++$locales_test_number;
2049 $test_names{$locales_test_number} = 'Verify case insensitive matching works';
2050 foreach my $x (sort { ord $a <=> ord $b } keys %UPPER) {
2051 if (! $is_utf8_locale) {
2053 next unless uc $y eq $x;
2054 debug_more( "UPPER=", disp_chars(($x)),
2055 "; lc=", disp_chars(($y)), "; ",
2056 "; fc=", disp_chars((fc $x)), "; ",
2057 disp_chars(($x)), "=~/", disp_chars(($y)), "/i=",
2058 $x =~ /$y/i ? 1 : 0,
2060 disp_chars(($y)), "=~/", disp_chars(($x)), "/i=",
2061 $y =~ /$x/i ? 1 : 0,
2064 # If $x and $y contain regular expression characters
2065 # AND THEY lowercase (/i) to regular expression characters,
2066 # regcomp() will be mightily confused. No, the \Q doesn't
2067 # help here (maybe regex engine internal lowercasing
2068 # is done after the \Q?) An example of this happening is
2069 # the bg_BG (Bulgarian) locale under EBCDIC (OS/390 USS):
2070 # the chr(173) (the "[") is the lowercase of the chr(235).
2072 # Similarly losing EBCDIC locales include cs_cz, cs_CZ,
2073 # el_gr, el_GR, en_us.IBM-037 (!), en_US.IBM-037 (!),
2074 # et_ee, et_EE, hr_hr, hr_HR, hu_hu, hu_HU, lt_LT,
2075 # mk_mk, mk_MK, nl_nl.IBM-037, nl_NL.IBM-037,
2076 # pl_pl, pl_PL, ro_ro, ro_RO, ru_ru, ru_RU,
2077 # sk_sk, sk_SK, sl_si, sl_SI, tr_tr, tr_TR.
2079 # Similar things can happen even under (bastardised)
2080 # non-EBCDIC locales: in many European countries before the
2081 # advent of ISO 8859-x nationally customised versions of
2082 # ISO 646 were devised, reusing certain punctuation
2083 # characters for modified characters needed by the
2084 # country/language. For example, the "|" might have
2085 # stood for U+00F6 or LATIN SMALL LETTER O WITH DIAERESIS.
2087 if ($x =~ $re || $y =~ $re) {
2088 print "# Regex characters in '$x' or '$y', skipping test $locales_test_number for locale '$Locale'\n";
2091 push @f, $x unless $x =~ /$y/i && $y =~ /$x/i;
2093 # fc is not a locale concept, so Perl uses lc for it.
2094 push @f, $x unless lc $x eq fc $x;
2097 use locale ':not_characters';
2099 next unless uc $y eq $x;
2100 debug_more( "UPPER=", disp_chars(($x)),
2101 "; lc=", disp_chars(($y)), "; ",
2102 "; fc=", disp_chars((fc $x)), "; ",
2103 disp_chars(($x)), "=~/", disp_chars(($y)), "/i=",
2104 $x =~ /$y/i ? 1 : 0,
2106 disp_chars(($y)), "=~/", disp_chars(($x)), "/i=",
2107 $y =~ /$x/i ? 1 : 0,
2110 push @f, $x unless $x =~ /$y/i && $y =~ /$x/i;
2112 # The places where Unicode's lc is different from fc are
2113 # skipped here by virtue of the 'next unless uc...' line above
2114 push @f, $x unless lc $x eq fc $x;
2118 foreach my $x (sort { ord $a <=> ord $b } keys %lower) {
2119 if (! $is_utf8_locale) {
2121 next unless lc $y eq $x;
2122 debug_more( "lower=", disp_chars(($x)),
2123 "; uc=", disp_chars(($y)), "; ",
2124 "; fc=", disp_chars((fc $x)), "; ",
2125 disp_chars(($x)), "=~/", disp_chars(($y)), "/i=",
2126 $x =~ /$y/i ? 1 : 0,
2128 disp_chars(($y)), "=~/", disp_chars(($x)), "/i=",
2129 $y =~ /$x/i ? 1 : 0,
2131 if ($x =~ $re || $y =~ $re) { # See above.
2132 print "# Regex characters in '$x' or '$y', skipping test $locales_test_number for locale '$Locale'\n";
2135 push @f, $x unless $x =~ /$y/i && $y =~ /$x/i;
2137 push @f, $x unless lc $x eq fc $x;
2140 use locale ':not_characters';
2142 next unless lc $y eq $x;
2143 debug_more( "lower=", disp_chars(($x)),
2144 "; uc=", disp_chars(($y)), "; ",
2145 "; fc=", disp_chars((fc $x)), "; ",
2146 disp_chars(($x)), "=~/", disp_chars(($y)), "/i=",
2147 $x =~ /$y/i ? 1 : 0,
2149 disp_chars(($y)), "=~/", disp_chars(($x)), "/i=",
2150 $y =~ /$x/i ? 1 : 0,
2152 push @f, $x unless $x =~ /$y/i && $y =~ /$x/i;
2154 push @f, $x unless lc $x eq fc $x;
2157 report_multi_result($Locale, $locales_test_number, \@f);
2158 $problematical_tests{$locales_test_number} = 1;
2164 ++$locales_test_number;
2165 $test_names{$locales_test_number} = 'Verify atof with locale radix and negative exponent';
2166 $problematical_tests{$locales_test_number} = 1;
2168 my $radix = POSIX::localeconv()->{decimal_point};
2170 "3.14e+9", "3${radix}14e+9", "3.14e-9", "3${radix}14e-9",
2171 "-3.14e+9", "-3${radix}14e+9", "-3.14e-9", "-3${radix}14e-9",
2174 if (! $is_utf8_locale) {
2176 for my $num (@nums) {
2178 unless sprintf("%g", $num) =~ /3.+14/;
2182 use locale ':not_characters';
2183 for my $num (@nums) {
2185 unless sprintf("%g", $num) =~ /3.+14/;
2189 report_result($Locale, $locales_test_number, @f == 0);
2191 print "# failed $locales_test_number locale '$Locale' numbers @f\n"
2196 my $final_locales_test_number = $locales_test_number;
2198 # Recount the errors.
2200 foreach $test_num ($first_locales_test_number..$final_locales_test_number) {
2201 if (%setlocale_failed) {
2204 elsif ($Problem{$test_num} || !defined $Okay{$test_num} || !@{$Okay{$test_num}}) {
2205 if (defined $not_necessarily_a_problem_test_number
2206 && $test_num == $not_necessarily_a_problem_test_number)
2208 print "# The failure of test $not_necessarily_a_problem_test_number is not necessarily fatal.\n";
2209 print "# It usually indicates a problem in the environment,\n";
2210 print "# not in Perl itself.\n";
2212 if ($Okay{$test_num} && grep { $_ == $test_num } keys %problematical_tests) {
2213 no warnings 'experimental::autoderef';
2214 # Round to nearest .1%
2215 my $percent_fail = (int(.5 + (1000 * scalar(keys $Problem{$test_num})
2216 / scalar(@Locale))))
2218 if ($percent_fail < $acceptable_failure_percentage) {
2220 $test_names{$test_num} .= 'TODO';
2221 print "# ", 100 - $percent_fail, "% of locales pass the following test, so it is likely that the failures\n";
2222 print "# are errors in the locale definitions. The test is marked TODO, as the\n";
2223 print "# problem is not likely to be Perl's\n";
2227 print "# $percent_fail% of locales (",
2228 scalar(keys $Problem{$test_num}),
2231 ") fail the above test (TODO cut-off is ",
2232 $acceptable_failure_percentage,
2238 print "# The code points that had this failure are given above. Look for lines\n";
2239 print "# that match 'failed $test_num'\n";
2242 print "# For more details, rerun, with environment variable PERL_DEBUG_FULL_TEST=1.\n";
2243 print "# Then look at that output for lines that match 'failed $test_num'\n";
2247 print "ok $test_num";
2248 if (defined $test_names{$test_num}) {
2249 # If TODO is in the test name, make it thus
2250 my $todo = $test_names{$test_num} =~ s/TODO\s*//;
2251 print " $test_names{$test_num}";
2252 print " # TODO" if $todo;
2257 $test_num = $final_locales_test_number;
2259 unless ( $^O =~ m!^(dragonfly|openbsd|bitrig|mirbsd)$! ) {
2263 local $SIG{__WARN__} = sub {
2264 $warned = $_[0] =~ /uninitialized/;
2266 my $z = "y" . setlocale(&POSIX::LC_ALL, "xyzzy");
2267 ok($warned, "variable set to setlocale(BAD LOCALE) is considered uninitialized");
2270 # Test that tainting and case changing works on utf8 strings. These tests are
2271 # placed last to avoid disturbing the hard-coded test numbers that existed at
2272 # the time these were added above this in this file.
2273 # This also tests that locale overrides unicode_strings in the same scope for
2275 setlocale(&POSIX::LC_ALL, "C");
2278 use feature 'unicode_strings';
2280 foreach my $function ("uc", "ucfirst", "lc", "lcfirst", "fc") {
2281 my @list; # List of code points to test for $function
2283 # Used to calculate the changed case for ASCII characters by using the
2284 # ord, instead of using one of the functions under test.
2285 my $ascii_case_change_delta;
2286 my $above_latin1_case_change_delta; # Same for the specific ords > 255
2289 # We test an ASCII character, which should change case;
2290 # a Latin1 character, which shouldn't change case under this C locale,
2291 # an above-Latin1 character that when the case is changed would cross
2292 # the 255/256 boundary, so doesn't change case
2293 # (the \x{149} is one of these, but changes into 2 characters, the
2294 # first one of which doesn't cross the boundary.
2295 # the final one in each list is an above-Latin1 character whose case
2296 # does change. The code below uses its position in its list as a
2297 # marker to indicate that it, unlike the other code points above
2298 # ASCII, has a successful case change
2300 # All casing operations under locale (but not :not_characters) should
2302 if ($function =~ /^u/) {
2303 @list = ("", "a", "\xe0", "\xff", "\x{fb00}", "\x{149}", "\x{101}");
2304 $ascii_case_change_delta = -32;
2305 $above_latin1_case_change_delta = -1;
2308 @list = ("", "A", "\xC0", "\x{17F}", "\x{100}");
2309 $ascii_case_change_delta = +32;
2310 $above_latin1_case_change_delta = +1;
2312 foreach my $is_utf8_locale (0 .. 1) {
2313 foreach my $j (0 .. $#list) {
2314 my $char = $list[$j];
2316 for my $encoded_in_utf8 (0 .. 1) {
2319 if (! $is_utf8_locale) {
2320 $should_be = ($j == $#list)
2321 ? chr(ord($char) + $above_latin1_case_change_delta)
2322 : (length $char == 0 || ord($char) > 127)
2324 : chr(ord($char) + $ascii_case_change_delta);
2326 # This monstrosity is in order to avoid using an eval,
2327 # which might perturb the results
2328 $changed = ($function eq "uc")
2330 : ($function eq "ucfirst")
2332 : ($function eq "lc")
2334 : ($function eq "lcfirst")
2336 : ($function eq "fc")
2338 : die("Unexpected function \"$function\"");
2344 # For utf8-locales the case changing functions
2345 # should work just like they do outside of locale.
2346 # Can use eval here because not testing it when
2348 $should_be = eval "$function('$char')";
2349 die "Unexpected eval error $@ from 'eval \"$function('$char')\"'" if $@;
2352 use locale ':not_characters';
2353 $changed = ($function eq "uc")
2355 : ($function eq "ucfirst")
2357 : ($function eq "lc")
2359 : ($function eq "lcfirst")
2361 : ($function eq "fc")
2363 : die("Unexpected function \"$function\"");
2365 ok($changed eq $should_be,
2366 "$function(\"$char\") in C locale "
2367 . (($is_utf8_locale)
2368 ? "(use locale ':not_characters'"
2370 . (($encoded_in_utf8)
2371 ? "; encoded in utf8)"
2372 : "; not encoded in utf8)")
2373 . " should be \"$should_be\", got \"$changed\"");
2375 # Tainting shouldn't happen for use locale :not_character
2378 ? check_taint($changed)
2379 : check_taint_not($changed);
2381 # Use UTF-8 next time through the loop
2382 utf8::upgrade($char);
2389 # Give final advice.
2393 foreach ($first_locales_test_number..$final_locales_test_number) {
2395 my @f = sort keys %{ $Problem{$_} };
2396 my $f = join(" ", @f);
2397 $f =~ s/(.{50,60}) /$1\n#\t/g;
2400 "# The locale ", (@f == 1 ? "definition" : "definitions"), "\n#\n",
2402 "# on your system may have errors because the locale test $_\n",
2403 "# \"$test_names{$_}\"\n",
2404 "# failed in ", (@f == 1 ? "that locale" : "those locales"),
2408 # If your users are not using these locales you are safe for the moment,
2409 # but please report this failure first to perlbug\@perl.com using the
2410 # perlbug script (as described in the INSTALL file) so that the exact
2411 # details of the failures can be sorted out first and then your operating
2412 # system supplier can be alerted about these anomalies.
2419 # Tell which locales were okay and which were not.
2424 foreach my $l (@Locale) {
2426 if ($setlocale_failed{$l}) {
2431 ($first_locales_test_number..$final_locales_test_number)
2433 $p++ if $Problem{$t}{$l};
2436 push @s, $l if $p == 0;
2437 push @F, $l unless $p == 0;
2441 my $s = join(" ", @s);
2442 $s =~ s/(.{50,60}) /$1\n#\t/g;
2445 "# The following locales\n#\n",
2447 "# tested okay.\n#\n",
2449 print "# None of your locales were fully okay.\n";
2453 my $F = join(" ", @F);
2454 $F =~ s/(.{50,60}) /$1\n#\t/g;
2458 $details = "# For more details, rerun, with environment variable PERL_DEBUG_FULL_TEST=1.\n";
2460 elsif ($debug == 1) {
2461 $details = "# For even more details, rerun, with environment variable PERL_DEBUG_FULL_TEST=2.\n";
2465 "# The following locales\n#\n",
2467 "# had problems.\n#\n",
2470 print "# None of your locales were broken.\n";
2474 print "1..$test_num\n";