This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
lib/locale.t: Untaint before checking if next thing taints
[perl5.git] / lib / locale.t
CommitLineData
8ebc5c01
PP
1#!./perl -wT
2
66cbab2c
KW
3# This tests plain 'use locale' and adorned 'use locale ":not_characters"'
4# Because these pragmas are compile time, and I (khw) am trying to test
5# without using 'eval' as much as possible, which might cloud the issue, the
6# crucial parts of the code are duplicated in a block for each pragma.
7
6c2e653d
KW
8# To make a TODO test, add the string 'TODO' to its %test_names value
9
e3a2734b
KW
10binmode STDOUT, ':utf8';
11binmode STDERR, ':utf8';
12
8ebc5c01
PP
13BEGIN {
14 chdir 't' if -d 't';
20822f61 15 @INC = '../lib';
f9cbebe1 16 unshift @INC, '.';
b002077a 17 require Config; import Config;
97a0514d 18 if (!$Config{d_setlocale} || $Config{ccflags} =~ /\bD?NO_LOCALE\b/) {
b002077a
CS
19 print "1..0\n";
20 exit;
21 }
73fc293b 22 require './loc_tools.pl';
2de3dbcc 23 $| = 1;
8ebc5c01
PP
24}
25
26use strict;
26c1569f 27use feature 'fc';
8ebc5c01 28
92891c66 29# =1 adds debugging output; =2 increases the verbosity somewhat
108a305e 30my $debug = $ENV{PERL_DEBUG_FULL_TEST} // 0;
284102e8 31
6d5d702a
KW
32# Certain tests have been shown to be problematical for a few locales. Don't
33# fail them unless at least this percentage of the tested locales fail.
8321c25f
KW
34# Some Windows machines are defective in every locale but the C, calling \t
35# printable; superscripts to be digits, etc. See
662399e7
KW
36# http://markmail.org/message/5jwam4xsx4amsdnv. Also on AIX machines, many
37# locales call a no-break space a graphic.
df33771d 38# (There aren't 1000 locales currently in existence, so 99.9 works)
662399e7
KW
39my $acceptable_fold_failure_percentage = ($^O =~ / ^ ( MSWin32 | AIX ) $ /ix)
40 ? 99.9
41 : 5;
6d5d702a 42
7493b8f2
KW
43# The list of test numbers of the problematic tests.
44my @problematical_tests;
45
46
db4b7445
A
47use Dumpvalue;
48
49my $dumper = Dumpvalue->new(
50 tick => qq{"},
51 quoteHighBit => 0,
52 unctrl => "quote"
53 );
6be75cd7 54sub debug {
db4b7445
A
55 return unless $debug;
56 my($mess) = join "", @_;
57 chop $mess;
58 print $dumper->stringify($mess,1), "\n";
6be75cd7
JH
59}
60
92891c66
KW
61sub debug_more {
62 return unless $debug > 1;
63 return debug(@_);
64}
65
6be75cd7
JH
66sub debugf {
67 printf @_ if $debug;
68}
69
0e053d1e 70$a = 'abc %';
8ebc5c01 71
c213d471
KW
72my $test_num = 0;
73
8ebc5c01 74sub ok {
c213d471 75 my ($result, $message) = @_;
e3a2734b 76 $message = "" unless defined $message;
8ebc5c01
PP
77
78 print 'not ' unless ($result);
c213d471 79 print "ok " . ++$test_num;
e3a2734b
KW
80 print " $message";
81 print "\n";
8ebc5c01
PP
82}
83
84# First we'll do a lot of taint checking for locales.
85# This is the easiest to test, actually, as any locale,
86# even the default locale will taint under 'use locale'.
87
88sub is_tainted { # hello, camel two.
9f1b1f2d 89 no warnings 'uninitialized' ;
8ebc5c01 90 my $dummy;
ba74571d 91 local $@;
8ebc5c01
PP
92 not eval { $dummy = join("", @_), kill 0; 1 }
93}
94
a9b7c637
KW
95sub check_taint ($;$) {
96 my $message_tail = $_[1] // "";
97 $message_tail = ": $message_tail" if $message_tail;
98 ok is_tainted($_[0]), "verify that is tainted$message_tail";
8ebc5c01
PP
99}
100
a9b7c637
KW
101sub check_taint_not ($;$) {
102 my $message_tail = $_[1] // "";
103 $message_tail = ": $message_tail" if $message_tail;
104 ok((not is_tainted($_[0])), "verify that isn't tainted$message_tail");
8ebc5c01
PP
105}
106
bf3cd0e6
KW
107"\tb\t" =~ /^m?(\s)(.*)\1$/;
108check_taint_not $&, "not tainted outside 'use locale'";
109;
110
8ebc5c01
PP
111use locale; # engage locale and therefore locale taint.
112
36a42ae7 113check_taint_not $a;
8ebc5c01 114
36a42ae7
KW
115check_taint uc($a);
116check_taint "\U$a";
117check_taint ucfirst($a);
118check_taint "\u$a";
119check_taint lc($a);
26c1569f 120check_taint fc($a);
36a42ae7 121check_taint "\L$a";
26c1569f 122check_taint "\F$a";
36a42ae7
KW
123check_taint lcfirst($a);
124check_taint "\l$a";
8ebc5c01 125
36a42ae7
KW
126check_taint_not sprintf('%e', 123.456);
127check_taint_not sprintf('%f', 123.456);
128check_taint_not sprintf('%g', 123.456);
129check_taint_not sprintf('%d', 123.456);
130check_taint_not sprintf('%x', 123.456);
8ebc5c01
PP
131
132$_ = $a; # untaint $_
133
134$_ = uc($a); # taint $_
135
36a42ae7 136check_taint $_;
8ebc5c01
PP
137
138/(\w)/; # taint $&, $`, $', $+, $1.
36a42ae7
KW
139check_taint $&;
140check_taint $`;
141check_taint $';
142check_taint $+;
143check_taint $1;
144check_taint_not $2;
8ebc5c01
PP
145
146/(.)/; # untaint $&, $`, $', $+, $1.
36a42ae7
KW
147check_taint_not $&;
148check_taint_not $`;
149check_taint_not $';
150check_taint_not $+;
151check_taint_not $1;
152check_taint_not $2;
8ebc5c01
PP
153
154/(\W)/; # taint $&, $`, $', $+, $1.
36a42ae7
KW
155check_taint $&;
156check_taint $`;
157check_taint $';
158check_taint $+;
159check_taint $1;
160check_taint_not $2;
8ebc5c01 161
9486279c
KW
162/(.)/; # untaint $&, $`, $', $+, $1.
163check_taint_not $&, '/(.)/';
164check_taint_not $`;
165check_taint_not $';
166check_taint_not $+;
167check_taint_not $1;
168check_taint_not $2;
169
8ebc5c01 170/(\s)/; # taint $&, $`, $', $+, $1.
36a42ae7
KW
171check_taint $&;
172check_taint $`;
173check_taint $';
174check_taint $+;
175check_taint $1;
176check_taint_not $2;
8ebc5c01 177
9486279c
KW
178/(.)/; # untaint $&, $`, $', $+, $1.
179check_taint_not $&, '/(.)/';
180
8ebc5c01 181/(\S)/; # taint $&, $`, $', $+, $1.
36a42ae7
KW
182check_taint $&;
183check_taint $`;
184check_taint $';
185check_taint $+;
186check_taint $1;
187check_taint_not $2;
8ebc5c01
PP
188
189$_ = $a; # untaint $_
190
36a42ae7 191check_taint_not $_;
8ebc5c01
PP
192
193/(b)/; # this must not taint
36a42ae7
KW
194check_taint_not $&;
195check_taint_not $`;
196check_taint_not $';
197check_taint_not $+;
198check_taint_not $1;
199check_taint_not $2;
8ebc5c01
PP
200
201$_ = $a; # untaint $_
202
36a42ae7 203check_taint_not $_;
8ebc5c01
PP
204
205$b = uc($a); # taint $b
206s/(.+)/$b/; # this must taint only the $_
207
36a42ae7
KW
208check_taint $_;
209check_taint_not $&;
210check_taint_not $`;
211check_taint_not $';
212check_taint_not $+;
213check_taint_not $1;
214check_taint_not $2;
8ebc5c01
PP
215
216$_ = $a; # untaint $_
217
218s/(.+)/b/; # this must not taint
36a42ae7
KW
219check_taint_not $_;
220check_taint_not $&;
221check_taint_not $`;
222check_taint_not $';
223check_taint_not $+;
224check_taint_not $1;
225check_taint_not $2;
8ebc5c01
PP
226
227$b = $a; # untaint $b
228
229($b = $a) =~ s/\w/$&/;
36a42ae7
KW
230check_taint $b; # $b should be tainted.
231check_taint_not $a; # $a should be not.
8ebc5c01
PP
232
233$_ = $a; # untaint $_
234
235s/(\w)/\l$1/; # this must taint
36a42ae7
KW
236check_taint $_;
237check_taint $&;
238check_taint $`;
239check_taint $';
240check_taint $+;
241check_taint $1;
242check_taint_not $2;
8ebc5c01
PP
243
244$_ = $a; # untaint $_
245
246s/(\w)/\L$1/; # this must taint
36a42ae7
KW
247check_taint $_;
248check_taint $&;
249check_taint $`;
250check_taint $';
251check_taint $+;
252check_taint $1;
253check_taint_not $2;
8ebc5c01
PP
254
255$_ = $a; # untaint $_
256
257s/(\w)/\u$1/; # this must taint
36a42ae7
KW
258check_taint $_;
259check_taint $&;
260check_taint $`;
261check_taint $';
262check_taint $+;
263check_taint $1;
264check_taint_not $2;
8ebc5c01
PP
265
266$_ = $a; # untaint $_
267
268s/(\w)/\U$1/; # this must taint
36a42ae7
KW
269check_taint $_;
270check_taint $&;
271check_taint $`;
272check_taint $';
273check_taint $+;
274check_taint $1;
275check_taint_not $2;
8ebc5c01 276
faab3281
KW
277# After all this tainting $a should be cool.
278
279check_taint_not $a;
280
b99851e1
KW
281"a" =~ /([a-z])/;
282check_taint_not $1, '"a" =~ /([a-z])/';
283"foo.bar_baz" =~ /^(.*)[._](.*?)$/; # Bug 120675
284check_taint_not $1, '"foo.bar_baz" =~ /^(.*)[._](.*?)$/';
285
7a9d1c02 286# BE SURE TO COPY ANYTHING YOU ADD to the block below
8ebc5c01 287
66cbab2c
KW
288{ # This is just the previous tests copied here with a different
289 # compile-time pragma.
290
291 use locale ':not_characters'; # engage restricted locale with different
292 # tainting rules
293
294 check_taint_not $a;
295
296 check_taint_not uc($a);
297 check_taint_not "\U$a";
298 check_taint_not ucfirst($a);
299 check_taint_not "\u$a";
300 check_taint_not lc($a);
26c1569f 301 check_taint_not fc($a);
66cbab2c 302 check_taint_not "\L$a";
26c1569f 303 check_taint_not "\F$a";
66cbab2c
KW
304 check_taint_not lcfirst($a);
305 check_taint_not "\l$a";
306
307 check_taint_not sprintf('%e', 123.456);
308 check_taint_not sprintf('%f', 123.456);
309 check_taint_not sprintf('%g', 123.456);
310 check_taint_not sprintf('%d', 123.456);
311 check_taint_not sprintf('%x', 123.456);
312
313 $_ = $a; # untaint $_
314
315 $_ = uc($a); # taint $_
316
317 check_taint_not $_;
318
319 /(\w)/; # taint $&, $`, $', $+, $1.
320 check_taint_not $&;
321 check_taint_not $`;
322 check_taint_not $';
323 check_taint_not $+;
324 check_taint_not $1;
325 check_taint_not $2;
326
327 /(.)/; # untaint $&, $`, $', $+, $1.
328 check_taint_not $&;
329 check_taint_not $`;
330 check_taint_not $';
331 check_taint_not $+;
332 check_taint_not $1;
333 check_taint_not $2;
334
335 /(\W)/; # taint $&, $`, $', $+, $1.
336 check_taint_not $&;
337 check_taint_not $`;
338 check_taint_not $';
339 check_taint_not $+;
340 check_taint_not $1;
341 check_taint_not $2;
342
343 /(\s)/; # taint $&, $`, $', $+, $1.
344 check_taint_not $&;
345 check_taint_not $`;
346 check_taint_not $';
347 check_taint_not $+;
348 check_taint_not $1;
349 check_taint_not $2;
350
351 /(\S)/; # taint $&, $`, $', $+, $1.
352 check_taint_not $&;
353 check_taint_not $`;
354 check_taint_not $';
355 check_taint_not $+;
356 check_taint_not $1;
357 check_taint_not $2;
358
359 $_ = $a; # untaint $_
360
361 check_taint_not $_;
362
363 /(b)/; # this must not taint
364 check_taint_not $&;
365 check_taint_not $`;
366 check_taint_not $';
367 check_taint_not $+;
368 check_taint_not $1;
369 check_taint_not $2;
370
371 $_ = $a; # untaint $_
372
373 check_taint_not $_;
374
375 $b = uc($a); # taint $b
376 s/(.+)/$b/; # this must taint only the $_
377
378 check_taint_not $_;
379 check_taint_not $&;
380 check_taint_not $`;
381 check_taint_not $';
382 check_taint_not $+;
383 check_taint_not $1;
384 check_taint_not $2;
385
386 $_ = $a; # untaint $_
387
388 s/(.+)/b/; # this must not taint
389 check_taint_not $_;
390 check_taint_not $&;
391 check_taint_not $`;
392 check_taint_not $';
393 check_taint_not $+;
394 check_taint_not $1;
395 check_taint_not $2;
396
397 $b = $a; # untaint $b
398
399 ($b = $a) =~ s/\w/$&/;
400 check_taint_not $b; # $b should be tainted.
401 check_taint_not $a; # $a should be not.
402
403 $_ = $a; # untaint $_
404
405 s/(\w)/\l$1/; # this must taint
406 check_taint_not $_;
407 check_taint_not $&;
408 check_taint_not $`;
409 check_taint_not $';
410 check_taint_not $+;
411 check_taint_not $1;
412 check_taint_not $2;
413
414 $_ = $a; # untaint $_
415
416 s/(\w)/\L$1/; # this must taint
417 check_taint_not $_;
418 check_taint_not $&;
419 check_taint_not $`;
420 check_taint_not $';
421 check_taint_not $+;
422 check_taint_not $1;
423 check_taint_not $2;
424
425 $_ = $a; # untaint $_
426
427 s/(\w)/\u$1/; # this must taint
428 check_taint_not $_;
429 check_taint_not $&;
430 check_taint_not $`;
431 check_taint_not $';
432 check_taint_not $+;
433 check_taint_not $1;
434 check_taint_not $2;
435
436 $_ = $a; # untaint $_
437
438 s/(\w)/\U$1/; # this must taint
439 check_taint_not $_;
440 check_taint_not $&;
441 check_taint_not $`;
442 check_taint_not $';
443 check_taint_not $+;
444 check_taint_not $1;
445 check_taint_not $2;
446
447 # After all this tainting $a should be cool.
448
449 check_taint_not $a;
7a9d1c02
KW
450
451 "a" =~ /([a-z])/;
452 check_taint_not $1, '"a" =~ /([a-z])/';
453 "foo.bar_baz" =~ /^(.*)[._](.*?)$/; # Bug 120675
454 check_taint_not $1, '"foo.bar_baz" =~ /^(.*)[._](.*?)$/';
66cbab2c
KW
455}
456
457# Here are in scope of 'use locale'
458
8ebc5c01
PP
459# I think we've seen quite enough of taint.
460# Let us do some *real* locale work now,
284102e8 461# unless setlocale() is missing (i.e. minitest).
8ebc5c01 462
6cf0b567 463# The test number before our first setlocale()
66330f13 464my $final_without_setlocale = $test_num;
6cf0b567 465
284102e8
JH
466# Find locales.
467
6be75cd7
JH
468debug "# Scanning for locales...\n";
469
9b0711ee
KW
470require POSIX; import POSIX ':locale_h';
471
472my @Locale = find_locales([ &POSIX::LC_CTYPE, &POSIX::LC_ALL ]);
4599a1de 473
887ef7ed
PP
474debug "# Locales =\n";
475for ( @Locale ) {
476 debug "# $_\n";
477}
8ebc5c01 478
d369fd5b
KW
479unless (@Locale) {
480 print "1..$test_num\n";
481 exit;
482}
483
d369fd5b
KW
484
485setlocale(&POSIX::LC_ALL, "C");
486
73fc293b
KW
487my %posixes;
488
284102e8 489my %Problem;
2a680da6
JH
490my %Okay;
491my %Testing;
30032ef4 492my @Added_alpha; # Alphas that aren't in the C locale.
c08acc4c 493my %test_names;
284102e8 494
705af3af 495sub disp_chars {
019bf7dd
KW
496 # This returns a display string denoting the input parameter @_, each
497 # entry of which is a single character in the range 0-255. The first part
498 # of the output is a string of the characters in @_ that are ASCII
499 # graphics, and hence unambiguously displayable. They are given by code
500 # point order. The second part is the remaining code points, the ordinals
501 # of which are each displayed as 2-digit hex. Blanks are inserted so as
502 # to keep anything from the first part looking like a 2-digit hex number.
503
504 no locale;
505 my @chars = sort { ord $a <=> ord $b } @_;
506 my $output = "";
019bf7dd
KW
507 my $range_start;
508 my $start_class;
509 push @chars, chr(258); # This sentinel simplifies the loop termination
510 # logic
511 foreach my $i (0 .. @chars - 1) {
512 my $char = $chars[$i];
513 my $range_end;
514 my $class;
515
516 # We avoid using [:posix:] classes, as these are being tested in this
517 # file. Each equivalence class below is for things that can appear in
518 # a range; those that can't be in a range have class -1. 0 for those
519 # which should be output in hex; and >0 for the other ranges
520 if ($char =~ /[A-Z]/) {
521 $class = 2;
522 }
523 elsif ($char =~ /[a-z]/) {
524 $class = 3;
525 }
526 elsif ($char =~ /[0-9]/) {
527 $class = 4;
528 }
78fbfaf5
KW
529 # Uncomment to get literal punctuation displayed instead of hex
530 #elsif ($char =~ /[[\]!"#\$\%&\'()*+,.\/:\\;<=>?\@\^_`{|}~-]/) {
531 # $class = -1; # Punct never appears in a range
532 #}
019bf7dd
KW
533 else {
534 $class = 0; # Output in hex
535 }
536
537 if (! defined $range_start) {
538 if ($class < 0) {
78fbfaf5 539 $output .= " " . $char;
019bf7dd
KW
540 }
541 else {
542 $range_start = ord $char;
543 $start_class = $class;
544 }
545 } # A range ends if not consecutive, or the class-type changes
546 elsif (ord $char != ($range_end = ord($chars[$i-1])) + 1
547 || $class != $start_class)
548 {
549
550 # Here, the current character is not in the range. This means the
551 # previous character must have been. Output the range up through
552 # that one.
553 my $range_length = $range_end - $range_start + 1;
554 if ($start_class > 0) {
555 $output .= " " . chr($range_start);
556 $output .= "-" . chr($range_end) if $range_length > 1;
557 }
558 else {
78fbfaf5
KW
559 $output .= sprintf(" %02X", $range_start);
560 $output .= sprintf("-%02X", $range_end) if $range_length > 1;
019bf7dd
KW
561 }
562
563 # Handle the new current character, as potentially beginning a new
564 # range
565 undef $range_start;
566 redo;
567 }
568 }
569
570 $output =~ s/^ //;
78fbfaf5 571 return $output;
019bf7dd
KW
572}
573
30032ef4
KW
574sub report_result {
575 my ($Locale, $i, $pass_fail, $message) = @_;
15bbd6a2
KW
576 $message //= "";
577 $message = " ($message)" if $message;
30032ef4 578 unless ($pass_fail) {
2a680da6 579 $Problem{$i}{$Locale} = 1;
baae13cb 580 debug "# failed $i ($test_names{$i}) with locale '$Locale'$message\n";
2a680da6
JH
581 } else {
582 push @{$Okay{$i}}, $Locale;
583 }
584}
585
7c844d17
KW
586sub report_multi_result {
587 my ($Locale, $i, $results_ref) = @_;
588
589 # $results_ref points to an array, each element of which is a character that was
590 # in error for this test numbered '$i'. If empty, the test passed
591
592 my $message = "";
593 if (@$results_ref) {
705af3af 594 $message = join " ", "for", disp_chars(@$results_ref);
7c844d17
KW
595 }
596 report_result($Locale, $i, @$results_ref == 0, $message);
597}
598
c4093d7d
KW
599my $first_locales_test_number = $final_without_setlocale + 1;
600my $locales_test_number;
601my $not_necessarily_a_problem_test_number;
6d5d702a 602my $first_casing_test_number;
c4093d7d
KW
603my %setlocale_failed; # List of locales that setlocale() didn't work on
604
73fc293b 605foreach my $Locale (@Locale) {
c4093d7d 606 $locales_test_number = $first_locales_test_number - 1;
78fbfaf5 607 debug "#\n";
284102e8 608 debug "# Locale = $Locale\n";
284102e8 609
a810e350 610 unless (setlocale(&POSIX::LC_ALL, $Locale)) {
c4093d7d 611 $setlocale_failed{$Locale} = $Locale;
284102e8 612 next;
8ebc5c01 613 }
8ebc5c01 614
31f05a37
KW
615 # We test UTF-8 locales only under ':not_characters'; It is easier to
616 # test them in other test files than here. Non- UTF-8 locales are tested
617 # only under plain 'use locale', as otherwise we would have to convert
618 # everything in them to Unicode.
66cbab2c 619
95eaa1bf
KW
620 my %UPPER = (); # All alpha X for which uc(X) == X and lc(X) != X
621 my %lower = (); # All alpha X for which lc(X) == X and uc(X) != X
622 my %BoThCaSe = (); # All alpha X for which uc(X) == lc(X) == X
66cbab2c 623
ab8b8bcc 624 my $is_utf8_locale = is_locale_utf8($Locale);
b9df08e4
KW
625
626 debug "# is utf8 locale? = $is_utf8_locale\n";
627
628 my $radix = localeconv()->{decimal_point};
629 if ($radix !~ / ^ [[:ascii:]] + $/x) {
630 use bytes;
631 $radix = disp_chars(split "", $radix);
632 }
633 debug "# radix = $radix\n";
634
66cbab2c
KW
635 if (! $is_utf8_locale) {
636 use locale;
fb844330
KW
637 @{$posixes{'word'}} = grep /\w/, map { chr } 0..255;
638 @{$posixes{'digit'}} = grep /\d/, map { chr } 0..255;
639 @{$posixes{'space'}} = grep /\s/, map { chr } 0..255;
640 @{$posixes{'alpha'}} = grep /[[:alpha:]]/, map {chr } 0..255;
641 @{$posixes{'alnum'}} = grep /[[:alnum:]]/, map {chr } 0..255;
642 @{$posixes{'ascii'}} = grep /[[:ascii:]]/, map {chr } 0..255;
643 @{$posixes{'blank'}} = grep /[[:blank:]]/, map {chr } 0..255;
644 @{$posixes{'cntrl'}} = grep /[[:cntrl:]]/, map {chr } 0..255;
645 @{$posixes{'graph'}} = grep /[[:graph:]]/, map {chr } 0..255;
646 @{$posixes{'lower'}} = grep /[[:lower:]]/, map {chr } 0..255;
647 @{$posixes{'print'}} = grep /[[:print:]]/, map {chr } 0..255;
98ed56e7 648 @{$posixes{'punct'}} = grep /[[:punct:]]/, map {chr } 0..255;
fb844330
KW
649 @{$posixes{'upper'}} = grep /[[:upper:]]/, map {chr } 0..255;
650 @{$posixes{'xdigit'}} = grep /[[:xdigit:]]/, map {chr } 0..255;
651 @{$posixes{'cased'}} = grep /[[:upper:]]/i, map {chr } 0..255;
e5272a46 652
71e5cbb3
KW
653 # Sieve the uppercase and the lowercase.
654
fb844330 655 for (@{$posixes{'word'}}) {
71e5cbb3
KW
656 if (/[^\d_]/) { # skip digits and the _
657 if (uc($_) eq $_) {
658 $UPPER{$_} = $_;
659 }
660 if (lc($_) eq $_) {
661 $lower{$_} = $_;
662 }
663 }
664 }
66cbab2c
KW
665 }
666 else {
667 use locale ':not_characters';
fb844330
KW
668 @{$posixes{'word'}} = grep /\w/, map { chr } 0..255;
669 @{$posixes{'digit'}} = grep /\d/, map { chr } 0..255;
670 @{$posixes{'space'}} = grep /\s/, map { chr } 0..255;
671 @{$posixes{'alpha'}} = grep /[[:alpha:]]/, map {chr } 0..255;
672 @{$posixes{'alnum'}} = grep /[[:alnum:]]/, map {chr } 0..255;
673 @{$posixes{'ascii'}} = grep /[[:ascii:]]/, map {chr } 0..255;
674 @{$posixes{'blank'}} = grep /[[:blank:]]/, map {chr } 0..255;
675 @{$posixes{'cntrl'}} = grep /[[:cntrl:]]/, map {chr } 0..255;
676 @{$posixes{'graph'}} = grep /[[:graph:]]/, map {chr } 0..255;
677 @{$posixes{'lower'}} = grep /[[:lower:]]/, map {chr } 0..255;
678 @{$posixes{'print'}} = grep /[[:print:]]/, map {chr } 0..255;
98ed56e7 679 @{$posixes{'punct'}} = grep /[[:punct:]]/, map {chr } 0..255;
fb844330
KW
680 @{$posixes{'upper'}} = grep /[[:upper:]]/, map {chr } 0..255;
681 @{$posixes{'xdigit'}} = grep /[[:xdigit:]]/, map {chr } 0..255;
682 @{$posixes{'cased'}} = grep /[[:upper:]]/i, map {chr } 0..255;
683 for (@{$posixes{'word'}}) {
66cbab2c
KW
684 if (/[^\d_]/) { # skip digits and the _
685 if (uc($_) eq $_) {
686 $UPPER{$_} = $_;
687 }
688 if (lc($_) eq $_) {
689 $lower{$_} = $_;
690 }
691 }
692 }
693 }
a160ac48 694
fb844330
KW
695 # Ordered, where possible, in groups of "this is a subset of the next
696 # one"
705af3af
KW
697 debug "# :upper: = ", disp_chars(@{$posixes{'upper'}}), "\n";
698 debug "# :lower: = ", disp_chars(@{$posixes{'lower'}}), "\n";
699 debug "# :cased: = ", disp_chars(@{$posixes{'cased'}}), "\n";
700 debug "# :alpha: = ", disp_chars(@{$posixes{'alpha'}}), "\n";
701 debug "# :alnum: = ", disp_chars(@{$posixes{'alnum'}}), "\n";
702 debug "# w = ", disp_chars(@{$posixes{'word'}}), "\n";
703 debug "# :graph: = ", disp_chars(@{$posixes{'graph'}}), "\n";
704 debug "# :print: = ", disp_chars(@{$posixes{'print'}}), "\n";
705 debug "# d = ", disp_chars(@{$posixes{'digit'}}), "\n";
706 debug "# :xdigit: = ", disp_chars(@{$posixes{'xdigit'}}), "\n";
707 debug "# :blank: = ", disp_chars(@{$posixes{'blank'}}), "\n";
708 debug "# s = ", disp_chars(@{$posixes{'space'}}), "\n";
709 debug "# :punct: = ", disp_chars(@{$posixes{'punct'}}), "\n";
710 debug "# :cntrl: = ", disp_chars(@{$posixes{'cntrl'}}), "\n";
711 debug "# :ascii: = ", disp_chars(@{$posixes{'ascii'}}), "\n";
a160ac48 712
284102e8 713 foreach (keys %UPPER) {
a160ac48 714
097ee67d 715 $BoThCaSe{$_}++ if exists $lower{$_};
284102e8
JH
716 }
717 foreach (keys %lower) {
097ee67d 718 $BoThCaSe{$_}++ if exists $UPPER{$_};
284102e8 719 }
097ee67d 720 foreach (keys %BoThCaSe) {
284102e8
JH
721 delete $UPPER{$_};
722 delete $lower{$_};
723 }
724
e5cc0528
KW
725 my %Unassigned;
726 foreach my $ord ( 0 .. 255 ) {
727 $Unassigned{chr $ord} = 1;
728 }
729 foreach my $class (keys %posixes) {
730 foreach my $char (@{$posixes{$class}}) {
731 delete $Unassigned{$char};
732 }
733 }
734
b9df08e4
KW
735 debug "# UPPER = ", disp_chars(sort { ord $a <=> ord $b } keys %UPPER), "\n";
736 debug "# lower = ", disp_chars(sort { ord $a <=> ord $b } keys %lower), "\n";
737 debug "# BoThCaSe = ", disp_chars(sort { ord $a <=> ord $b } keys %BoThCaSe), "\n";
705af3af 738 debug "# Unassigned = ", disp_chars(sort { ord $a <=> ord $b } keys %Unassigned), "\n";
284102e8 739
baa71cfd 740 my @failures;
3da38613 741 my @fold_failures;
b9df08e4 742 foreach my $x (sort { ord $a <=> ord $b } keys %UPPER) {
baa71cfd 743 my $ok;
3da38613 744 my $fold_ok;
baa71cfd
KW
745 if ($is_utf8_locale) {
746 use locale ':not_characters';
747 $ok = $x =~ /[[:upper:]]/;
3da38613 748 $fold_ok = $x =~ /[[:lower:]]/i;
baa71cfd
KW
749 }
750 else {
751 use locale;
752 $ok = $x =~ /[[:upper:]]/;
3da38613 753 $fold_ok = $x =~ /[[:lower:]]/i;
baa71cfd
KW
754 }
755 push @failures, $x unless $ok;
3da38613 756 push @fold_failures, $x unless $fold_ok;
baa71cfd 757 }
baa71cfd 758 $locales_test_number++;
6d5d702a 759 $first_casing_test_number = $locales_test_number;
95eaa1bf 760 $test_names{$locales_test_number} = 'Verify that /[[:upper:]]/ matches all alpha X for which uc(X) == X and lc(X) != X';
7c844d17 761 report_multi_result($Locale, $locales_test_number, \@failures);
6d5d702a 762
3da38613 763 $locales_test_number++;
6d5d702a 764
95eaa1bf 765 $test_names{$locales_test_number} = 'Verify that /[[:lower:]]/i matches all alpha X for which uc(X) == X and lc(X) != X';
7c844d17 766 report_multi_result($Locale, $locales_test_number, \@fold_failures);
baa71cfd 767
baa71cfd 768 undef @failures;
3da38613 769 undef @fold_failures;
baa71cfd 770
b9df08e4 771 foreach my $x (sort { ord $a <=> ord $b } keys %lower) {
baa71cfd 772 my $ok;
3da38613 773 my $fold_ok;
baa71cfd
KW
774 if ($is_utf8_locale) {
775 use locale ':not_characters';
776 $ok = $x =~ /[[:lower:]]/;
3da38613 777 $fold_ok = $x =~ /[[:upper:]]/i;
baa71cfd
KW
778 }
779 else {
780 use locale;
781 $ok = $x =~ /[[:lower:]]/;
3da38613 782 $fold_ok = $x =~ /[[:upper:]]/i;
baa71cfd
KW
783 }
784 push @failures, $x unless $ok;
3da38613 785 push @fold_failures, $x unless $fold_ok;
baa71cfd
KW
786 }
787
788 $locales_test_number++;
95eaa1bf 789 $test_names{$locales_test_number} = 'Verify that /[[:lower:]]/ matches all alpha X for which lc(X) == X and uc(X) != X';
7c844d17
KW
790 report_multi_result($Locale, $locales_test_number, \@failures);
791
3da38613 792 $locales_test_number++;
95eaa1bf 793 $test_names{$locales_test_number} = 'Verify that /[[:upper:]]/i matches all alpha X for which lc(X) == X and uc(X) != X';
7c844d17 794 report_multi_result($Locale, $locales_test_number, \@fold_failures);
baa71cfd 795
9445c837
KW
796 { # Find the alphabetic characters that are not considered alphabetics
797 # in the default (C) locale.
8ebc5c01 798
284102e8 799 no locale;
71e5cbb3 800
30032ef4 801 @Added_alpha = ();
5e7a1028 802 for (keys %UPPER, keys %lower, keys %BoThCaSe) {
30032ef4 803 push(@Added_alpha, $_) if (/\W/);
284102e8 804 }
8ebc5c01 805 }
8ebc5c01 806
b9df08e4 807 @Added_alpha = sort { ord $a <=> ord $b } @Added_alpha;
8ebc5c01 808
705af3af 809 debug "# Added_alpha = ", disp_chars(@Added_alpha), "\n";
8ebc5c01 810
db31898d 811 # Cross-check the whole 8-bit character set.
8ebc5c01 812
db31898d
KW
813 ++$locales_test_number;
814 my @f;
815 $test_names{$locales_test_number} = 'Verify that \w and [:word:] are identical';
816 for (map { chr } 0..255) {
817 if ($is_utf8_locale) {
818 use locale ':not_characters';
819 push @f, $_ unless /[[:word:]]/ == /\w/;
820 }
821 else {
822 push @f, $_ unless /[[:word:]]/ == /\w/;
823 }
824 }
825 report_multi_result($Locale, $locales_test_number, \@f);
8ebc5c01 826
db31898d
KW
827 ++$locales_test_number;
828 undef @f;
829 $test_names{$locales_test_number} = 'Verify that \d and [:digit:] are identical';
830 for (map { chr } 0..255) {
831 if ($is_utf8_locale) {
832 use locale ':not_characters';
833 push @f, $_ unless /[[:digit:]]/ == /\d/;
834 }
835 else {
836 push @f, $_ unless /[[:digit:]]/ == /\d/;
837 }
838 }
839 report_multi_result($Locale, $locales_test_number, \@f);
32e8aa3f 840
db31898d
KW
841 ++$locales_test_number;
842 undef @f;
843 $test_names{$locales_test_number} = 'Verify that \s and [:space:] are identical';
844 for (map { chr } 0..255) {
845 if ($is_utf8_locale) {
846 use locale ':not_characters';
847 push @f, $_ unless /[[:space:]]/ == /\s/;
848 }
849 else {
850 push @f, $_ unless /[[:space:]]/ == /\s/;
851 }
852 }
853 report_multi_result($Locale, $locales_test_number, \@f);
32e8aa3f 854
db31898d
KW
855 ++$locales_test_number;
856 undef @f;
857 $test_names{$locales_test_number} = 'Verify that [:posix:] and [:^posix:] are mutually exclusive';
858 for (map { chr } 0..255) {
859 if ($is_utf8_locale) {
860 use locale ':not_characters';
861 push @f, $_ unless (/[[:alpha:]]/ xor /[[:^alpha:]]/) ||
862 (/[[:alnum:]]/ xor /[[:^alnum:]]/) ||
863 (/[[:ascii:]]/ xor /[[:^ascii:]]/) ||
864 (/[[:blank:]]/ xor /[[:^blank:]]/) ||
865 (/[[:cntrl:]]/ xor /[[:^cntrl:]]/) ||
866 (/[[:digit:]]/ xor /[[:^digit:]]/) ||
867 (/[[:graph:]]/ xor /[[:^graph:]]/) ||
868 (/[[:lower:]]/ xor /[[:^lower:]]/) ||
869 (/[[:print:]]/ xor /[[:^print:]]/) ||
870 (/[[:space:]]/ xor /[[:^space:]]/) ||
871 (/[[:upper:]]/ xor /[[:^upper:]]/) ||
872 (/[[:word:]]/ xor /[[:^word:]]/) ||
873 (/[[:xdigit:]]/ xor /[[:^xdigit:]]/) ||
874
875 # effectively is what [:cased:] would be if it existed.
876 (/[[:upper:]]/i xor /[[:^upper:]]/i);
877 }
878 else {
879 push @f, $_ unless (/[[:alpha:]]/ xor /[[:^alpha:]]/) ||
880 (/[[:alnum:]]/ xor /[[:^alnum:]]/) ||
881 (/[[:ascii:]]/ xor /[[:^ascii:]]/) ||
882 (/[[:blank:]]/ xor /[[:^blank:]]/) ||
883 (/[[:cntrl:]]/ xor /[[:^cntrl:]]/) ||
884 (/[[:digit:]]/ xor /[[:^digit:]]/) ||
885 (/[[:graph:]]/ xor /[[:^graph:]]/) ||
886 (/[[:lower:]]/ xor /[[:^lower:]]/) ||
887 (/[[:print:]]/ xor /[[:^print:]]/) ||
888 (/[[:space:]]/ xor /[[:^space:]]/) ||
889 (/[[:upper:]]/ xor /[[:^upper:]]/) ||
890 (/[[:word:]]/ xor /[[:^word:]]/) ||
891 (/[[:xdigit:]]/ xor /[[:^xdigit:]]/) ||
892 (/[[:upper:]]/i xor /[[:^upper:]]/i);
893 }
894 }
895 report_multi_result($Locale, $locales_test_number, \@f);
32e8aa3f 896
db31898d
KW
897 # The rules for the relationships are given in:
898 # http://www.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap07.html
32e8aa3f 899
660c7bcc
KW
900
901 ++$locales_test_number;
902 undef @f;
903 $test_names{$locales_test_number} = 'Verify that [:lower:] contains at least a-z';
904 for ('a' .. 'z') {
905 if ($is_utf8_locale) {
906 use locale ':not_characters';
907 push @f, $_ unless /[[:lower:]]/;
908 }
909 else {
910 push @f, $_ unless /[[:lower:]]/;
911 }
912 }
913 report_multi_result($Locale, $locales_test_number, \@f);
914
db31898d
KW
915 ++$locales_test_number;
916 undef @f;
917 $test_names{$locales_test_number} = 'Verify that [:lower:] is a subset of [:alpha:]';
918 for (map { chr } 0..255) {
919 if ($is_utf8_locale) {
920 use locale ':not_characters';
921 push @f, $_ if /[[:lower:]]/ and ! /[[:alpha:]]/;
922 }
923 else {
924 push @f, $_ if /[[:lower:]]/ and ! /[[:alpha:]]/;
925 }
926 }
927 report_multi_result($Locale, $locales_test_number, \@f);
32e8aa3f 928
db31898d
KW
929 ++$locales_test_number;
930 undef @f;
660c7bcc
KW
931 $test_names{$locales_test_number} = 'Verify that [:upper:] contains at least A-Z';
932 for ('A' .. 'Z') {
933 if ($is_utf8_locale) {
934 use locale ':not_characters';
935 push @f, $_ unless /[[:upper:]]/;
936 }
937 else {
938 push @f, $_ unless /[[:upper:]]/;
939 }
940 }
941 report_multi_result($Locale, $locales_test_number, \@f);
942
943 ++$locales_test_number;
944 undef @f;
db31898d
KW
945 $test_names{$locales_test_number} = 'Verify that [:upper:] is a subset of [:alpha:]';
946 for (map { chr } 0..255) {
947 if ($is_utf8_locale) {
948 use locale ':not_characters';
949 push @f, $_ if /[[:upper:]]/ and ! /[[:alpha:]]/;
950 }
951 else {
952 push @f, $_ if /[[:upper:]]/ and ! /[[:alpha:]]/;
953 }
954 }
955 report_multi_result($Locale, $locales_test_number, \@f);
32e8aa3f 956
db31898d
KW
957 ++$locales_test_number;
958 undef @f;
959 $test_names{$locales_test_number} = 'Verify that /[[:lower:]]/i is a subset of [:alpha:]';
960 for (map { chr } 0..255) {
961 if ($is_utf8_locale) {
962 use locale ':not_characters';
963 push @f, $_ if /[[:lower:]]/i and ! /[[:alpha:]]/;
964 }
965 else {
966 push @f, $_ if /[[:lower:]]/i and ! /[[:alpha:]]/;
967 }
968 }
969 report_multi_result($Locale, $locales_test_number, \@f);
32e8aa3f 970
db31898d
KW
971 ++$locales_test_number;
972 undef @f;
973 $test_names{$locales_test_number} = 'Verify that [:alpha:] is a subset of [:alnum:]';
974 for (map { chr } 0..255) {
975 if ($is_utf8_locale) {
976 use locale ':not_characters';
977 push @f, $_ if /[[:alpha:]]/ and ! /[[:alnum:]]/;
978 }
979 else {
980 push @f, $_ if /[[:alpha:]]/ and ! /[[:alnum:]]/;
981 }
982 }
983 report_multi_result($Locale, $locales_test_number, \@f);
32e8aa3f 984
db31898d
KW
985 ++$locales_test_number;
986 undef @f;
660c7bcc
KW
987 $test_names{$locales_test_number} = 'Verify that [:digit:] contains at least 0-9';
988 for ('0' .. '9') {
989 if ($is_utf8_locale) {
990 use locale ':not_characters';
991 push @f, $_ unless /[[:digit:]]/;
992 }
993 else {
994 push @f, $_ unless /[[:digit:]]/;
995 }
996 }
997 report_multi_result($Locale, $locales_test_number, \@f);
998
999 ++$locales_test_number;
1000 undef @f;
db31898d
KW
1001 $test_names{$locales_test_number} = 'Verify that [:digit:] is a subset of [:alnum:]';
1002 for (map { chr } 0..255) {
1003 if ($is_utf8_locale) {
1004 use locale ':not_characters';
1005 push @f, $_ if /[[:digit:]]/ and ! /[[:alnum:]]/;
1006 }
1007 else {
1008 push @f, $_ if /[[:digit:]]/ and ! /[[:alnum:]]/;
1009 }
1010 }
1011 report_multi_result($Locale, $locales_test_number, \@f);
32e8aa3f 1012
db31898d
KW
1013 ++$locales_test_number;
1014 undef @f;
aac995d2 1015 $test_names{$locales_test_number} = 'Verify that [:digit:] matches either 10 or 20 code points';
fb844330 1016 report_result($Locale, $locales_test_number, @{$posixes{'digit'}} == 10 || @{$posixes{'digit'}} == 20);
aac995d2
KW
1017
1018 ++$locales_test_number;
1019 undef @f;
660c7bcc
KW
1020 $test_names{$locales_test_number} = 'Verify that if there is a second set of digits in [:digit:], they are consecutive';
1021 if (@{$posixes{'digit'}} == 20) {
1022 my $previous_ord;
fc81f5f2 1023 for (map { chr } 0..255) {
660c7bcc
KW
1024 next unless /[[:digit:]]/;
1025 next if /[0-9]/;
1026 if (defined $previous_ord) {
1027 if ($is_utf8_locale) {
1028 use locale ':not_characters';
1029 push @f, $_ if ord $_ != $previous_ord + 1;
1030 }
1031 else {
1032 push @f, $_ if ord $_ != $previous_ord + 1;
1033 }
fc81f5f2 1034 }
660c7bcc 1035 $previous_ord = ord $_;
db31898d
KW
1036 }
1037 }
1038 report_multi_result($Locale, $locales_test_number, \@f);
32e8aa3f 1039
db31898d
KW
1040 ++$locales_test_number;
1041 undef @f;
660c7bcc 1042 $test_names{$locales_test_number} = 'Verify that [:digit:] is a subset of [:xdigit:]';
db31898d
KW
1043 for (map { chr } 0..255) {
1044 if ($is_utf8_locale) {
1045 use locale ':not_characters';
660c7bcc 1046 push @f, $_ if /[[:digit:]]/ and ! /[[:xdigit:]]/;
db31898d
KW
1047 }
1048 else {
660c7bcc 1049 push @f, $_ if /[[:digit:]]/ and ! /[[:xdigit:]]/;
db31898d
KW
1050 }
1051 }
1052 report_multi_result($Locale, $locales_test_number, \@f);
32e8aa3f 1053
660c7bcc
KW
1054 ++$locales_test_number;
1055 undef @f;
1056 $test_names{$locales_test_number} = 'Verify that [:xdigit:] contains at least A-F, a-f';
1057 for ('A' .. 'F', 'a' .. 'f') {
1058 if ($is_utf8_locale) {
1059 use locale ':not_characters';
1060 push @f, $_ unless /[[:xdigit:]]/;
1061 }
1062 else {
1063 push @f, $_ unless /[[:xdigit:]]/;
1064 }
1065 }
1066 report_multi_result($Locale, $locales_test_number, \@f);
1067
1068 ++$locales_test_number;
1069 undef @f;
1070 $test_names{$locales_test_number} = 'Verify that any additional members of [:xdigit:], are in groups of 6 consecutive code points';
1071 my $previous_ord;
1072 my $count = 0;
1073 for (map { chr } 0..255) {
1074 next unless /[[:xdigit:]]/;
1075 next if /[[:digit:]]/;
1076 next if /[A-Fa-f]/;
1077 if (defined $previous_ord) {
1078 if ($is_utf8_locale) {
1079 use locale ':not_characters';
1080 push @f, $_ if ord $_ != $previous_ord + 1;
1081 }
1082 else {
1083 push @f, $_ if ord $_ != $previous_ord + 1;
1084 }
1085 }
1086 $count++;
1087 if ($count == 6) {
1088 undef $previous_ord;
1089 }
1090 else {
1091 $previous_ord = ord $_;
1092 }
1093 }
1094 report_multi_result($Locale, $locales_test_number, \@f);
32e8aa3f 1095
db31898d
KW
1096 ++$locales_test_number;
1097 undef @f;
1098 $test_names{$locales_test_number} = 'Verify that [:xdigit:] is a subset of [:graph:]';
1099 for (map { chr } 0..255) {
1100 if ($is_utf8_locale) {
1101 use locale ':not_characters';
1102 push @f, $_ if /[[:xdigit:]]/ and ! /[[:graph:]]/;
1103 }
1104 else {
1105 push @f, $_ if /[[:xdigit:]]/ and ! /[[:graph:]]/;
1106 }
1107 }
1108 report_multi_result($Locale, $locales_test_number, \@f);
32e8aa3f 1109
660c7bcc
KW
1110 # Note that xdigit doesn't have to be a subset of alnum
1111
db31898d
KW
1112 ++$locales_test_number;
1113 undef @f;
1114 $test_names{$locales_test_number} = 'Verify that [:punct:] is a subset of [:graph:]';
1115 for (map { chr } 0..255) {
1116 if ($is_utf8_locale) {
1117 use locale ':not_characters';
1118 push @f, $_ if /[[:punct:]]/ and ! /[[:graph:]]/;
1119 }
1120 else {
1121 push @f, $_ if /[[:punct:]]/ and ! /[[:graph:]]/;
1122 }
1123 }
1124 report_multi_result($Locale, $locales_test_number, \@f);
32e8aa3f 1125
db31898d
KW
1126 ++$locales_test_number;
1127 undef @f;
660c7bcc
KW
1128 $test_names{$locales_test_number} = 'Verify that the space character is not in [:graph:]';
1129 if ($is_utf8_locale) {
1130 use locale ':not_characters';
1131 push @f, " " if " " =~ /[[:graph:]]/;
1132 }
1133 else {
1134 push @f, " " if " " =~ /[[:graph:]]/;
1135 }
1136 report_multi_result($Locale, $locales_test_number, \@f);
1137
1138 ++$locales_test_number;
1139 undef @f;
1140 $test_names{$locales_test_number} = 'Verify that [:space:] contains at least [\f\n\r\t\cK ]';
1141 for (' ', "\f", "\n", "\r", "\t", "\cK") {
1142 if ($is_utf8_locale) {
1143 use locale ':not_characters';
1144 push @f, $_ unless /[[:space:]]/;
1145 }
1146 else {
1147 push @f, $_ unless /[[:space:]]/;
1148 }
1149 }
1150 report_multi_result($Locale, $locales_test_number, \@f);
1151
1152 ++$locales_test_number;
1153 undef @f;
1154 $test_names{$locales_test_number} = 'Verify that [:blank:] contains at least [\t ]';
1155 for (' ', "\t") {
1156 if ($is_utf8_locale) {
1157 use locale ':not_characters';
1158 push @f, $_ unless /[[:blank:]]/;
1159 }
1160 else {
1161 push @f, $_ unless /[[:blank:]]/;
1162 }
1163 }
1164 report_multi_result($Locale, $locales_test_number, \@f);
1165
1166 ++$locales_test_number;
1167 undef @f;
db31898d
KW
1168 $test_names{$locales_test_number} = 'Verify that [:blank:] is a subset of [:space:]';
1169 for (map { chr } 0..255) {
1170 if ($is_utf8_locale) {
1171 use locale ':not_characters';
1172 push @f, $_ if /[[:blank:]]/ and ! /[[:space:]]/;
1173 }
1174 else {
1175 push @f, $_ if /[[:blank:]]/ and ! /[[:space:]]/;
1176 }
1177 }
1178 report_multi_result($Locale, $locales_test_number, \@f);
32e8aa3f 1179
db31898d
KW
1180 ++$locales_test_number;
1181 undef @f;
1182 $test_names{$locales_test_number} = 'Verify that [:graph:] is a subset of [:print:]';
1183 for (map { chr } 0..255) {
1184 if ($is_utf8_locale) {
1185 use locale ':not_characters';
1186 push @f, $_ if /[[:graph:]]/ and ! /[[:print:]]/;
1187 }
1188 else {
1189 push @f, $_ if /[[:graph:]]/ and ! /[[:print:]]/;
1190 }
1191 }
1192 report_multi_result($Locale, $locales_test_number, \@f);
32e8aa3f 1193
db31898d
KW
1194 ++$locales_test_number;
1195 undef @f;
660c7bcc
KW
1196 $test_names{$locales_test_number} = 'Verify that the space character is in [:print:]';
1197 if ($is_utf8_locale) {
1198 use locale ':not_characters';
1199 push @f, " " if " " !~ /[[:print:]]/;
1200 }
1201 else {
1202 push @f, " " if " " !~ /[[:print:]]/;
1203 }
1204 report_multi_result($Locale, $locales_test_number, \@f);
1205
1206 ++$locales_test_number;
1207 undef @f;
db31898d
KW
1208 $test_names{$locales_test_number} = 'Verify that isn\'t both [:cntrl:] and [:print:]';
1209 for (map { chr } 0..255) {
1210 if ($is_utf8_locale) {
1211 use locale ':not_characters';
1212 push @f, $_ if (/[[:print:]]/ and /[[:cntrl:]]/);
1213 }
1214 else {
1215 push @f, $_ if (/[[:print:]]/ and /[[:cntrl:]]/);
1216 }
1217 }
1218 report_multi_result($Locale, $locales_test_number, \@f);
32e8aa3f 1219
db31898d
KW
1220 ++$locales_test_number;
1221 undef @f;
660c7bcc
KW
1222 $test_names{$locales_test_number} = 'Verify that isn\'t both [:alpha:] and [:digit:]';
1223 for (map { chr } 0..255) {
1224 if ($is_utf8_locale) {
1225 use locale ':not_characters';
1226 push @f, $_ if /[[:alpha:]]/ and /[[:digit:]]/;
1227 }
1228 else {
1229 push @f, $_ if /[[:alpha:]]/ and /[[:digit:]]/;
1230 }
1231 }
1232 report_multi_result($Locale, $locales_test_number, \@f);
1233
1234 ++$locales_test_number;
1235 undef @f;
db31898d
KW
1236 $test_names{$locales_test_number} = 'Verify that isn\'t both [:alnum:] and [:punct:]';
1237 for (map { chr } 0..255) {
1238 if ($is_utf8_locale) {
1239 use locale ':not_characters';
1240 push @f, $_ if /[[:alnum:]]/ and /[[:punct:]]/;
1241 }
1242 else {
1243 push @f, $_ if /[[:alnum:]]/ and /[[:punct:]]/;
1244 }
1245 }
1246 report_multi_result($Locale, $locales_test_number, \@f);
32e8aa3f 1247
db31898d
KW
1248 ++$locales_test_number;
1249 undef @f;
1250 $test_names{$locales_test_number} = 'Verify that isn\'t both [:xdigit:] and [:punct:]';
1251 for (map { chr } 0..255) {
1252 if ($is_utf8_locale) {
1253 use locale ':not_characters';
1254 push @f, $_ if (/[[:punct:]]/ and /[[:xdigit:]]/);
1255 }
1256 else {
1257 push @f, $_ if (/[[:punct:]]/ and /[[:xdigit:]]/);
1258 }
1259 }
1260 report_multi_result($Locale, $locales_test_number, \@f);
32e8aa3f 1261
db31898d
KW
1262 ++$locales_test_number;
1263 undef @f;
1264 $test_names{$locales_test_number} = 'Verify that isn\'t both [:graph:] and [:space:]';
1265 for (map { chr } 0..255) {
1266 if ($is_utf8_locale) {
1267 use locale ':not_characters';
1268 push @f, $_ if (/[[:graph:]]/ and /[[:space:]]/);
1269 }
1270 else {
1271 push @f, $_ if (/[[:graph:]]/ and /[[:space:]]/);
1272 }
1273 }
1274 report_multi_result($Locale, $locales_test_number, \@f);
32e8aa3f 1275
7493b8f2
KW
1276 foreach ($first_casing_test_number..$locales_test_number) {
1277 push @problematical_tests, $_;
1278 }
1279
32e8aa3f 1280
db31898d
KW
1281 # Test for read-only scalars' locale vs non-locale comparisons.
1282
1283 {
1284 no locale;
1285 my $ok;
1286 $a = "qwerty";
1287 if ($is_utf8_locale) {
1288 use locale ':not_characters';
1289 $ok = ($a cmp "qwerty") == 0;
1290 }
1291 else {
1292 use locale;
1293 $ok = ($a cmp "qwerty") == 0;
1294 }
1295 report_result($Locale, ++$locales_test_number, $ok);
1296 $test_names{$locales_test_number} = 'Verify that cmp works with a read-only scalar; no- vs locale';
1297 }
8ebc5c01 1298
db31898d
KW
1299 {
1300 my ($from, $to, $lesser, $greater,
1301 @test, %test, $test, $yes, $no, $sign);
284102e8 1302
db31898d
KW
1303 ++$locales_test_number;
1304 $test_names{$locales_test_number} = 'Verify that "le", "ne", etc work';
1305 $not_necessarily_a_problem_test_number = $locales_test_number;
1306 for (0..9) {
1307 # Select a slice.
fb844330
KW
1308 $from = int(($_*@{$posixes{'word'}})/10);
1309 $to = $from + int(@{$posixes{'word'}}/10);
1310 $to = $#{$posixes{'word'}} if ($to > $#{$posixes{'word'}});
1311 $lesser = join('', @{$posixes{'word'}}[$from..$to]);
db31898d
KW
1312 # Select a slice one character on.
1313 $from++; $to++;
fb844330
KW
1314 $to = $#{$posixes{'word'}} if ($to > $#{$posixes{'word'}});
1315 $greater = join('', @{$posixes{'word'}}[$from..$to]);
66cbab2c
KW
1316 if ($is_utf8_locale) {
1317 use locale ':not_characters';
db31898d
KW
1318 ($yes, $no, $sign) = ($lesser lt $greater
1319 ? (" ", "not ", 1)
1320 : ("not ", " ", -1));
66cbab2c
KW
1321 }
1322 else {
1323 use locale;
db31898d
KW
1324 ($yes, $no, $sign) = ($lesser lt $greater
1325 ? (" ", "not ", 1)
1326 : ("not ", " ", -1));
66cbab2c 1327 }
db31898d
KW
1328 # all these tests should FAIL (return 0). Exact lt or gt cannot
1329 # be tested because in some locales, say, eacute and E may test
1330 # equal.
1331 @test =
1332 (
1333 $no.' ($lesser le $greater)', # 1
1334 'not ($lesser ne $greater)', # 2
1335 ' ($lesser eq $greater)', # 3
1336 $yes.' ($lesser ge $greater)', # 4
1337 $yes.' ($lesser ge $greater)', # 5
1338 $yes.' ($greater le $lesser )', # 7
1339 'not ($greater ne $lesser )', # 8
1340 ' ($greater eq $lesser )', # 9
1341 $no.' ($greater ge $lesser )', # 10
1342 'not (($lesser cmp $greater) == -($sign))' # 11
1343 );
1344 @test{@test} = 0 x @test;
1345 $test = 0;
1346 for my $ti (@test) {
66cbab2c
KW
1347 if ($is_utf8_locale) {
1348 use locale ':not_characters';
db31898d 1349 $test{$ti} = eval $ti;
66cbab2c
KW
1350 }
1351 else {
db31898d
KW
1352 # Already in 'use locale';
1353 $test{$ti} = eval $ti;
66cbab2c 1354 }
db31898d
KW
1355 $test ||= $test{$ti}
1356 }
1357 report_result($Locale, $locales_test_number, $test == 0);
1358 if ($test) {
1359 debug "# lesser = '$lesser'\n";
1360 debug "# greater = '$greater'\n";
1361 debug "# lesser cmp greater = ",
1362 $lesser cmp $greater, "\n";
1363 debug "# greater cmp lesser = ",
1364 $greater cmp $lesser, "\n";
1365 debug "# (greater) from = $from, to = $to\n";
1366 for my $ti (@test) {
1367 debugf("# %-40s %-4s", $ti,
1368 $test{$ti} ? 'FAIL' : 'ok');
1369 if ($ti =~ /\(\.*(\$.+ +cmp +\$[^\)]+)\.*\)/) {
1370 debugf("(%s == %4d)", $1, eval $1);
66cbab2c 1371 }
db31898d
KW
1372 debug "\n#";
1373 }
1374
1375 last;
1376 }
1377 }
1378 }
c4093d7d 1379
66cbab2c
KW
1380 my $ok1;
1381 my $ok2;
1382 my $ok3;
1383 my $ok4;
1384 my $ok5;
1385 my $ok6;
1386 my $ok7;
1387 my $ok8;
1388 my $ok9;
1389 my $ok10;
1390 my $ok11;
1391 my $ok12;
1392 my $ok13;
1500bd91 1393 my $ok14;
28acfe03
KW
1394 my $ok15;
1395 my $ok16;
e46375fa
KW
1396 my $ok17;
1397 my $ok18;
66cbab2c
KW
1398
1399 my $c;
1400 my $d;
1401 my $e;
1402 my $f;
1403 my $g;
e46375fa
KW
1404 my $h;
1405 my $i;
1406 my $j;
66cbab2c
KW
1407
1408 if (! $is_utf8_locale) {
71e5cbb3 1409 use locale;
6be75cd7 1410
71e5cbb3 1411 my ($x, $y) = (1.23, 1.23);
6be75cd7 1412
71e5cbb3
KW
1413 $a = "$x";
1414 printf ''; # printf used to reset locale to "C"
1415 $b = "$y";
1416 $ok1 = $a eq $b;
6be75cd7 1417
71e5cbb3
KW
1418 $c = "$x";
1419 my $z = sprintf ''; # sprintf used to reset locale to "C"
1420 $d = "$y";
1421 $ok2 = $c eq $d;
1422 {
66cbab2c 1423
71e5cbb3
KW
1424 use warnings;
1425 my $w = 0;
1426 local $SIG{__WARN__} =
1427 sub {
1428 print "# @_\n";
1429 $w++;
1430 };
6be75cd7 1431
71e5cbb3
KW
1432 # The == (among other ops) used to warn for locales
1433 # that had something else than "." as the radix character.
6be75cd7 1434
71e5cbb3
KW
1435 $ok3 = $c == 1.23;
1436 $ok4 = $c == $x;
1437 $ok5 = $c == $d;
1438 {
1439 no locale;
66cbab2c 1440
b79536ea 1441 $e = "$x";
71e5cbb3
KW
1442
1443 $ok6 = $e == 1.23;
1444 $ok7 = $e == $x;
1445 $ok8 = $e == $c;
1446 }
66cbab2c 1447
71e5cbb3
KW
1448 $f = "1.23";
1449 $g = 2.34;
e46375fa
KW
1450 $h = 1.5;
1451 $i = 1.25;
1452 $j = "$h:$i";
66cbab2c 1453
71e5cbb3
KW
1454 $ok9 = $f == 1.23;
1455 $ok10 = $f == $x;
1456 $ok11 = $f == $c;
1457 $ok12 = abs(($f + $g) - 3.57) < 0.01;
1458 $ok13 = $w == 0;
28acfe03 1459 $ok14 = $ok15 = $ok16 = 1; # Skip for non-utf8 locales
71e5cbb3 1460 }
e46375fa
KW
1461 {
1462 no locale;
1463 $ok17 = "1.5:1.25" eq sprintf("%g:%g", $h, $i);
1464 }
1465 $ok18 = $j eq sprintf("%g:%g", $h, $i);
66cbab2c
KW
1466 }
1467 else {
1468 use locale ':not_characters';
1469
1470 my ($x, $y) = (1.23, 1.23);
1471 $a = "$x";
1472 printf ''; # printf used to reset locale to "C"
1473 $b = "$y";
1474 $ok1 = $a eq $b;
1475
1476 $c = "$x";
1477 my $z = sprintf ''; # sprintf used to reset locale to "C"
1478 $d = "$y";
1479 $ok2 = $c eq $d;
1480 {
1481 use warnings;
1482 my $w = 0;
1483 local $SIG{__WARN__} =
1484 sub {
1485 print "# @_\n";
1486 $w++;
1487 };
1488 $ok3 = $c == 1.23;
1489 $ok4 = $c == $x;
1490 $ok5 = $c == $d;
1491 {
1492 no locale;
b79536ea 1493 $e = "$x";
66cbab2c
KW
1494
1495 $ok6 = $e == 1.23;
1496 $ok7 = $e == $x;
1497 $ok8 = $e == $c;
1498 }
1499
1500 $f = "1.23";
1501 $g = 2.34;
e46375fa
KW
1502 $h = 1.5;
1503 $i = 1.25;
1504 $j = "$h:$i";
66cbab2c
KW
1505
1506 $ok9 = $f == 1.23;
1507 $ok10 = $f == $x;
1508 $ok11 = $f == $c;
1509 $ok12 = abs(($f + $g) - 3.57) < 0.01;
1510 $ok13 = $w == 0;
1500bd91
KW
1511
1512 # Look for non-ASCII error messages, and verify that the first
1513 # such is in UTF-8 (the others almost certainly will be like the
1514 # first).
1515 $ok14 = 1;
1516 foreach my $err (keys %!) {
1517 use Errno;
1518 $! = eval "&Errno::$err"; # Convert to strerror() output
1519 my $strerror = "$!";
1520 if ("$strerror" =~ /\P{ASCII}/) {
2ef12107 1521 $ok14 = utf8::is_utf8($strerror);
1500bd91
KW
1522 last;
1523 }
1524 }
28acfe03
KW
1525
1526 # Similarly, we verify that a non-ASCII radix is in UTF-8. This
1527 # also catches if there is a disparity between sprintf and
1528 # stringification.
1529
1530 my $string_g = "$g";
2ef12107 1531 my $sprintf_g = sprintf("%g", $g);
28acfe03 1532
2ef12107
KW
1533 $ok15 = $string_g =~ / ^ \p{ASCII}+ $ /x || utf8::is_utf8($string_g);
1534 $ok16 = $sprintf_g eq $string_g;
66cbab2c 1535 }
e46375fa
KW
1536 {
1537 no locale;
1538 $ok17 = "1.5:1.25" eq sprintf("%g:%g", $h, $i);
1539 }
1540 $ok18 = $j eq sprintf("%g:%g", $h, $i);
66cbab2c
KW
1541 }
1542
30032ef4 1543 report_result($Locale, ++$locales_test_number, $ok1);
66cbab2c
KW
1544 $test_names{$locales_test_number} = 'Verify that an intervening printf doesn\'t change assignment results';
1545 my $first_a_test = $locales_test_number;
1546
1547 debug "# $first_a_test..$locales_test_number: \$a = $a, \$b = $b, Locale = $Locale\n";
1548
30032ef4 1549 report_result($Locale, ++$locales_test_number, $ok2);
66cbab2c
KW
1550 $test_names{$locales_test_number} = 'Verify that an intervening sprintf doesn\'t change assignment results';
1551
1552 my $first_c_test = $locales_test_number;
1553
30032ef4 1554 report_result($Locale, ++$locales_test_number, $ok3);
71e5cbb3 1555 $test_names{$locales_test_number} = 'Verify that a different locale radix works when doing "==" with a constant';
6be75cd7 1556
30032ef4 1557 report_result($Locale, ++$locales_test_number, $ok4);
71e5cbb3 1558 $test_names{$locales_test_number} = 'Verify that a different locale radix works when doing "==" with a scalar';
66cbab2c 1559
30032ef4 1560 report_result($Locale, ++$locales_test_number, $ok5);
71e5cbb3 1561 $test_names{$locales_test_number} = 'Verify that a different locale radix works when doing "==" with a scalar and an intervening sprintf';
66cbab2c 1562
71e5cbb3 1563 debug "# $first_c_test..$locales_test_number: \$c = $c, \$d = $d, Locale = $Locale\n";
66cbab2c 1564
30032ef4 1565 report_result($Locale, ++$locales_test_number, $ok6);
b79536ea 1566 $test_names{$locales_test_number} = 'Verify that can assign stringified under inner no-locale block';
71e5cbb3 1567 my $first_e_test = $locales_test_number;
6be75cd7 1568
30032ef4 1569 report_result($Locale, ++$locales_test_number, $ok7);
71e5cbb3 1570 $test_names{$locales_test_number} = 'Verify that "==" with a scalar still works in inner no locale';
66cbab2c 1571
30032ef4 1572 report_result($Locale, ++$locales_test_number, $ok8);
71e5cbb3 1573 $test_names{$locales_test_number} = 'Verify that "==" with a scalar and an intervening sprintf still works in inner no locale';
c4093d7d 1574
71e5cbb3 1575 debug "# $first_e_test..$locales_test_number: \$e = $e, no locale\n";
2a680da6 1576
30032ef4 1577 report_result($Locale, ++$locales_test_number, $ok9);
71e5cbb3
KW
1578 $test_names{$locales_test_number} = 'Verify that after a no-locale block, a different locale radix still works when doing "==" with a constant';
1579 my $first_f_test = $locales_test_number;
6be75cd7 1580
30032ef4 1581 report_result($Locale, ++$locales_test_number, $ok10);
71e5cbb3 1582 $test_names{$locales_test_number} = 'Verify that after a no-locale block, a different locale radix still works when doing "==" with a scalar';
66cbab2c 1583
30032ef4 1584 report_result($Locale, ++$locales_test_number, $ok11);
71e5cbb3 1585 $test_names{$locales_test_number} = 'Verify that after a no-locale block, a different locale radix still works when doing "==" with a scalar and an intervening sprintf';
906f284f 1586
30032ef4 1587 report_result($Locale, ++$locales_test_number, $ok12);
71e5cbb3 1588 $test_names{$locales_test_number} = 'Verify that after a no-locale block, a different locale radix can participate in an addition and function call as numeric';
c4093d7d 1589
30032ef4 1590 report_result($Locale, ++$locales_test_number, $ok13);
71e5cbb3 1591 $test_names{$locales_test_number} = 'Verify that don\'t get warning under "==" even if radix is not a dot';
c4093d7d 1592
30032ef4 1593 report_result($Locale, ++$locales_test_number, $ok14);
1500bd91
KW
1594 $test_names{$locales_test_number} = 'Verify that non-ASCII UTF-8 error messages are in UTF-8';
1595
30032ef4 1596 report_result($Locale, ++$locales_test_number, $ok15);
28acfe03
KW
1597 $test_names{$locales_test_number} = 'Verify that a number with a UTF-8 radix has a UTF-8 stringification';
1598
30032ef4 1599 report_result($Locale, ++$locales_test_number, $ok16);
28acfe03
KW
1600 $test_names{$locales_test_number} = 'Verify that a sprintf of a number with a UTF-8 radix yields UTF-8';
1601
e46375fa
KW
1602 report_result($Locale, ++$locales_test_number, $ok17);
1603 $test_names{$locales_test_number} = 'Verify that a sprintf of a number outside locale scope uses a dot radix';
1604
1605 report_result($Locale, ++$locales_test_number, $ok18);
1606 $test_names{$locales_test_number} = 'Verify that a sprintf of a number back within locale scope uses locale radix';
1607
71e5cbb3 1608 debug "# $first_f_test..$locales_test_number: \$f = $f, \$g = $g, back to locale = $Locale\n";
906f284f 1609
26d80d95
LC
1610 # Does taking lc separately differ from taking
1611 # the lc "in-line"? (This was the bug 19990704.002, change #3568.)
1612 # The bug was in the caching of the 'o'-magic.
66cbab2c 1613 if (! $is_utf8_locale) {
2a680da6 1614 use locale;
6be75cd7 1615
2a680da6
JH
1616 sub lcA {
1617 my $lc0 = lc $_[0];
1618 my $lc1 = lc $_[1];
1619 return $lc0 cmp $lc1;
1620 }
6be75cd7 1621
2a680da6
JH
1622 sub lcB {
1623 return lc($_[0]) cmp lc($_[1]);
1624 }
6be75cd7 1625
2a680da6
JH
1626 my $x = "ab";
1627 my $y = "aa";
1628 my $z = "AB";
6be75cd7 1629
30032ef4 1630 report_result($Locale, ++$locales_test_number,
2a680da6
JH
1631 lcA($x, $y) == 1 && lcB($x, $y) == 1 ||
1632 lcA($x, $z) == 0 && lcB($x, $z) == 0);
6be75cd7 1633 }
66cbab2c
KW
1634 else {
1635 use locale ':not_characters';
1636
1637 sub lcC {
1638 my $lc0 = lc $_[0];
1639 my $lc1 = lc $_[1];
1640 return $lc0 cmp $lc1;
1641 }
1642
1643 sub lcD {
1644 return lc($_[0]) cmp lc($_[1]);
1645 }
1646
1647 my $x = "ab";
1648 my $y = "aa";
1649 my $z = "AB";
1650
30032ef4 1651 report_result($Locale, ++$locales_test_number,
66cbab2c
KW
1652 lcC($x, $y) == 1 && lcD($x, $y) == 1 ||
1653 lcC($x, $z) == 0 && lcD($x, $z) == 0);
1654 }
1655 $test_names{$locales_test_number} = 'Verify "lc(foo) cmp lc(bar)" is the same as using intermediaries for the cmp';
d8093b23 1656
26d80d95
LC
1657 # Does lc of an UPPER (if different from the UPPER) match
1658 # case-insensitively the UPPER, and does the UPPER match
1659 # case-insensitively the lc of the UPPER. And vice versa.
3ba0e062 1660 {
ef4a39e5
JH
1661 use locale;
1662 no utf8;
1663 my $re = qr/[\[\(\{\*\+\?\|\^\$\\]/;
1664
1665 my @f = ();
c4093d7d 1666 ++$locales_test_number;
c08acc4c 1667 $test_names{$locales_test_number} = 'Verify case insensitive matching works';
b9df08e4 1668 foreach my $x (sort { ord $a <=> ord $b } keys %UPPER) {
66cbab2c 1669 if (! $is_utf8_locale) {
71e5cbb3
KW
1670 my $y = lc $x;
1671 next unless uc $y eq $x;
92891c66
KW
1672 debug_more( "# UPPER=", disp_chars(($x)),
1673 "; lc=", disp_chars(($y)), "; ",
4eac893c 1674 "; fc=", disp_chars((fc $x)), "; ",
92891c66
KW
1675 disp_chars(($x)), "=~/", disp_chars(($y)), "/i=",
1676 $x =~ /$y/i ? 1 : 0,
1677 "; ",
1678 disp_chars(($y)), "=~/", disp_chars(($x)), "/i=",
1679 $y =~ /$x/i ? 1 : 0,
1680 "\n");
71e5cbb3
KW
1681 #
1682 # If $x and $y contain regular expression characters
1683 # AND THEY lowercase (/i) to regular expression characters,
1684 # regcomp() will be mightily confused. No, the \Q doesn't
1685 # help here (maybe regex engine internal lowercasing
1686 # is done after the \Q?) An example of this happening is
1687 # the bg_BG (Bulgarian) locale under EBCDIC (OS/390 USS):
1688 # the chr(173) (the "[") is the lowercase of the chr(235).
1689 #
1690 # Similarly losing EBCDIC locales include cs_cz, cs_CZ,
1691 # el_gr, el_GR, en_us.IBM-037 (!), en_US.IBM-037 (!),
1692 # et_ee, et_EE, hr_hr, hr_HR, hu_hu, hu_HU, lt_LT,
1693 # mk_mk, mk_MK, nl_nl.IBM-037, nl_NL.IBM-037,
1694 # pl_pl, pl_PL, ro_ro, ro_RO, ru_ru, ru_RU,
1695 # sk_sk, sk_SK, sl_si, sl_SI, tr_tr, tr_TR.
1696 #
1697 # Similar things can happen even under (bastardised)
1698 # non-EBCDIC locales: in many European countries before the
1699 # advent of ISO 8859-x nationally customised versions of
1700 # ISO 646 were devised, reusing certain punctuation
1701 # characters for modified characters needed by the
1702 # country/language. For example, the "|" might have
1703 # stood for U+00F6 or LATIN SMALL LETTER O WITH DIAERESIS.
1704 #
1705 if ($x =~ $re || $y =~ $re) {
1706 print "# Regex characters in '$x' or '$y', skipping test $locales_test_number for locale '$Locale'\n";
1707 next;
1708 }
69d77f94 1709 push @f, $x unless $x =~ /$y/i && $y =~ /$x/i;
26c1569f
KW
1710
1711 # fc is not a locale concept, so Perl uses lc for it.
1712 push @f, $x unless lc $x eq fc $x;
66cbab2c
KW
1713 }
1714 else {
1715 use locale ':not_characters';
1716 my $y = lc $x;
1717 next unless uc $y eq $x;
92891c66
KW
1718 debug_more( "# UPPER=", disp_chars(($x)),
1719 "; lc=", disp_chars(($y)), "; ",
4eac893c 1720 "; fc=", disp_chars((fc $x)), "; ",
92891c66
KW
1721 disp_chars(($x)), "=~/", disp_chars(($y)), "/i=",
1722 $x =~ /$y/i ? 1 : 0,
1723 "; ",
1724 disp_chars(($y)), "=~/", disp_chars(($x)), "/i=",
1725 $y =~ /$x/i ? 1 : 0,
1726 "\n");
66cbab2c 1727
66cbab2c 1728 push @f, $x unless $x =~ /$y/i && $y =~ /$x/i;
26c1569f
KW
1729
1730 # The places where Unicode's lc is different from fc are
1731 # skipped here by virtue of the 'next unless uc...' line above
1732 push @f, $x unless lc $x eq fc $x;
66cbab2c 1733 }
c00ff1c7 1734 }
ef4a39e5 1735
b9df08e4 1736 foreach my $x (sort { ord $a <=> ord $b } keys %lower) {
66cbab2c 1737 if (! $is_utf8_locale) {
71e5cbb3
KW
1738 my $y = uc $x;
1739 next unless lc $y eq $x;
92891c66
KW
1740 debug_more( "# lower=", disp_chars(($x)),
1741 "; uc=", disp_chars(($y)), "; ",
4eac893c 1742 "; fc=", disp_chars((fc $x)), "; ",
92891c66
KW
1743 disp_chars(($x)), "=~/", disp_chars(($y)), "/i=",
1744 $x =~ /$y/i ? 1 : 0,
1745 "; ",
1746 disp_chars(($y)), "=~/", disp_chars(($x)), "/i=",
1747 $y =~ /$x/i ? 1 : 0,
1748 "\n");
71e5cbb3
KW
1749 if ($x =~ $re || $y =~ $re) { # See above.
1750 print "# Regex characters in '$x' or '$y', skipping test $locales_test_number for locale '$Locale'\n";
1751 next;
1752 }
69d77f94 1753 push @f, $x unless $x =~ /$y/i && $y =~ /$x/i;
26c1569f
KW
1754
1755 push @f, $x unless lc $x eq fc $x;
66cbab2c
KW
1756 }
1757 else {
1758 use locale ':not_characters';
1759 my $y = uc $x;
1760 next unless lc $y eq $x;
92891c66
KW
1761 debug_more( "# lower=", disp_chars(($x)),
1762 "; uc=", disp_chars(($y)), "; ",
4eac893c 1763 "; fc=", disp_chars((fc $x)), "; ",
92891c66
KW
1764 disp_chars(($x)), "=~/", disp_chars(($y)), "/i=",
1765 $x =~ /$y/i ? 1 : 0,
1766 "; ",
1767 disp_chars(($y)), "=~/", disp_chars(($x)), "/i=",
1768 $y =~ /$x/i ? 1 : 0,
1769 "\n");
66cbab2c 1770 push @f, $x unless $x =~ /$y/i && $y =~ /$x/i;
26c1569f
KW
1771
1772 push @f, $x unless lc $x eq fc $x;
66cbab2c 1773 }
c00ff1c7 1774 }
7c844d17 1775 report_multi_result($Locale, $locales_test_number, \@f);
7493b8f2 1776 push @problematical_tests, $locales_test_number;
d8093b23 1777 }
78787052
JL
1778
1779 # [perl #109318]
1780 {
1781 my @f = ();
1782 ++$locales_test_number;
1783 $test_names{$locales_test_number} = 'Verify atof with locale radix and negative exponent';
1784
1785 my $radix = POSIX::localeconv()->{decimal_point};
1786 my @nums = (
1787 "3.14e+9", "3${radix}14e+9", "3.14e-9", "3${radix}14e-9",
1788 "-3.14e+9", "-3${radix}14e+9", "-3.14e-9", "-3${radix}14e-9",
1789 );
1790
1791 if (! $is_utf8_locale) {
1792 use locale;
1793 for my $num (@nums) {
1794 push @f, $num
1795 unless sprintf("%g", $num) =~ /3.+14/;
1796 }
1797 }
1798 else {
1799 use locale ':not_characters';
1800 for my $num (@nums) {
1801 push @f, $num
1802 unless sprintf("%g", $num) =~ /3.+14/;
1803 }
1804 }
1805
30032ef4 1806 report_result($Locale, $locales_test_number, @f == 0);
78787052
JL
1807 if (@f) {
1808 print "# failed $locales_test_number locale '$Locale' numbers @f\n"
1809 }
1810 }
8ebc5c01 1811}
284102e8 1812
c4093d7d 1813my $final_locales_test_number = $locales_test_number;
6cf0b567 1814
2a680da6
JH
1815# Recount the errors.
1816
0d3899b1 1817foreach $test_num ($first_locales_test_number..$final_locales_test_number) {
c4093d7d
KW
1818 if (%setlocale_failed) {
1819 print "not ";
1820 }
0d3899b1 1821 elsif ($Problem{$test_num} || !defined $Okay{$test_num} || !@{$Okay{$test_num}}) {
c4093d7d 1822 if (defined $not_necessarily_a_problem_test_number
0d3899b1 1823 && $test_num == $not_necessarily_a_problem_test_number)
c4093d7d
KW
1824 {
1825 print "# The failure of test $not_necessarily_a_problem_test_number is not necessarily fatal.\n";
b4e009be 1826 print "# It usually indicates a problem in the environment,\n";
284102e8
JH
1827 print "# not in Perl itself.\n";
1828 }
7493b8f2 1829 if ($Okay{$test_num} && grep { $_ == $test_num } @problematical_tests) {
d401967c 1830 no warnings 'experimental::autoderef';
0a974e2d 1831 # Round to nearest .1%
0d3899b1 1832 my $percent_fail = (int(.5 + (1000 * scalar(keys $Problem{$test_num})
0a974e2d
KW
1833 / scalar(@Locale))))
1834 / 10;
f5627fc1
KW
1835 if (! $debug && $percent_fail < $acceptable_fold_failure_percentage)
1836 {
0d3899b1 1837 $test_names{$test_num} .= 'TODO';
6d5d702a
KW
1838 print "# ", 100 - $percent_fail, "% of locales pass the following test, so it is likely that the failures\n";
1839 print "# are errors in the locale definitions. The test is marked TODO, as the\n";
1840 print "# problem is not likely to be Perl's\n";
1841 }
1842 }
f5627fc1
KW
1843 print "#\n";
1844 if ($debug) {
1845 print "# The code points that had this failure are given above. Look for lines\n";
0d3899b1 1846 print "# that match 'failed $test_num'\n";
f5627fc1
KW
1847 }
1848 else {
1849 print "# For more details, rerun, with environment variable PERL_DEBUG_FULL_TEST=1.\n";
0d3899b1 1850 print "# Then look at that output for lines that match 'failed $test_num'\n";
108a305e 1851 }
284102e8 1852 print "not ";
8ebc5c01 1853 }
0d3899b1
KW
1854 print "ok $test_num";
1855 if (defined $test_names{$test_num}) {
6c2e653d 1856 # If TODO is in the test name, make it thus
0d3899b1
KW
1857 my $todo = $test_names{$test_num} =~ s/TODO\s*//;
1858 print " $test_names{$test_num}";
6c2e653d
KW
1859 print " # TODO" if $todo;
1860 }
c4093d7d 1861 print "\n";
8ebc5c01 1862}
fb73857a 1863
c4093d7d 1864$test_num = $final_locales_test_number;
c213d471 1865
4bc67723 1866unless ( $^O =~ m!^(dragonfly|openbsd|bitrig|mirbsd)$! ) {
083ace8f 1867 # perl #115808
fbd840df
KW
1868 use warnings;
1869 my $warned = 0;
1870 local $SIG{__WARN__} = sub {
1871 $warned = $_[0] =~ /uninitialized/;
1872 };
1873 my $z = "y" . setlocale(&POSIX::LC_ALL, "xyzzy");
1874 ok($warned, "variable set to setlocale(BAD LOCALE) is considered uninitialized");
1875}
1876
094a2f8c 1877# Test that tainting and case changing works on utf8 strings. These tests are
1f5852c9
KW
1878# placed last to avoid disturbing the hard-coded test numbers that existed at
1879# the time these were added above this in this file.
0099bb8d
KW
1880# This also tests that locale overrides unicode_strings in the same scope for
1881# non-utf8 strings.
a810e350 1882setlocale(&POSIX::LC_ALL, "C");
094a2f8c
KW
1883{
1884 use locale;
0099bb8d 1885 use feature 'unicode_strings';
094a2f8c 1886
26c1569f 1887 foreach my $function ("uc", "ucfirst", "lc", "lcfirst", "fc") {
094a2f8c
KW
1888 my @list; # List of code points to test for $function
1889
1890 # Used to calculate the changed case for ASCII characters by using the
1891 # ord, instead of using one of the functions under test.
1892 my $ascii_case_change_delta;
1893 my $above_latin1_case_change_delta; # Same for the specific ords > 255
1894 # that we use
1895
445bf929 1896 # We test an ASCII character, which should change case;
094a2f8c 1897 # a Latin1 character, which shouldn't change case under this C locale,
094a2f8c 1898 # an above-Latin1 character that when the case is changed would cross
445bf929
KW
1899 # the 255/256 boundary, so doesn't change case
1900 # (the \x{149} is one of these, but changes into 2 characters, the
094a2f8c
KW
1901 # first one of which doesn't cross the boundary.
1902 # the final one in each list is an above-Latin1 character whose case
445bf929
KW
1903 # does change. The code below uses its position in its list as a
1904 # marker to indicate that it, unlike the other code points above
1905 # ASCII, has a successful case change
1906 #
1907 # All casing operations under locale (but not :not_characters) should
1908 # taint
094a2f8c 1909 if ($function =~ /^u/) {
094a2f8c
KW
1910 @list = ("", "a", "\xe0", "\xff", "\x{fb00}", "\x{149}", "\x{101}");
1911 $ascii_case_change_delta = -32;
1912 $above_latin1_case_change_delta = -1;
1913 }
1914 else {
1ca267a5 1915 @list = ("", "A", "\xC0", "\x{17F}", "\x{100}");
094a2f8c
KW
1916 $ascii_case_change_delta = +32;
1917 $above_latin1_case_change_delta = +1;
1918 }
66cbab2c 1919 foreach my $is_utf8_locale (0 .. 1) {
71e5cbb3
KW
1920 foreach my $j (0 .. $#list) {
1921 my $char = $list[$j];
0099bb8d
KW
1922
1923 for my $encoded_in_utf8 (0 .. 1) {
faf0c248
KW
1924 my $should_be;
1925 my $changed;
1926 if (! $is_utf8_locale) {
1927 $should_be = ($j == $#list)
1928 ? chr(ord($char) + $above_latin1_case_change_delta)
1929 : (length $char == 0 || ord($char) > 127)
1930 ? $char
1931 : chr(ord($char) + $ascii_case_change_delta);
1932
1933 # This monstrosity is in order to avoid using an eval,
1934 # which might perturb the results
1935 $changed = ($function eq "uc")
1936 ? uc($char)
1937 : ($function eq "ucfirst")
1938 ? ucfirst($char)
1939 : ($function eq "lc")
1940 ? lc($char)
1941 : ($function eq "lcfirst")
1942 ? lcfirst($char)
26c1569f
KW
1943 : ($function eq "fc")
1944 ? fc($char)
faf0c248
KW
1945 : die("Unexpected function \"$function\"");
1946 }
1947 else {
1948 {
1949 no locale;
71e5cbb3 1950
faf0c248
KW
1951 # For utf8-locales the case changing functions
1952 # should work just like they do outside of locale.
1953 # Can use eval here because not testing it when
1954 # not in locale.
1955 $should_be = eval "$function('$char')";
1956 die "Unexpected eval error $@ from 'eval \"$function('$char')\"'" if $@;
71e5cbb3 1957
faf0c248
KW
1958 }
1959 use locale ':not_characters';
1960 $changed = ($function eq "uc")
1961 ? uc($char)
1962 : ($function eq "ucfirst")
1963 ? ucfirst($char)
1964 : ($function eq "lc")
1965 ? lc($char)
1966 : ($function eq "lcfirst")
1967 ? lcfirst($char)
26c1569f
KW
1968 : ($function eq "fc")
1969 ? fc($char)
faf0c248 1970 : die("Unexpected function \"$function\"");
71e5cbb3 1971 }
faf0c248
KW
1972 ok($changed eq $should_be,
1973 "$function(\"$char\") in C locale "
1974 . (($is_utf8_locale)
1975 ? "(use locale ':not_characters'"
1976 : "(use locale")
1977 . (($encoded_in_utf8)
1978 ? "; encoded in utf8)"
1979 : "; not encoded in utf8)")
1980 . " should be \"$should_be\", got \"$changed\"");
1981
445bf929
KW
1982 # Tainting shouldn't happen for use locale :not_character
1983 # (a utf8 locale)
1984 (! $is_utf8_locale)
faf0c248
KW
1985 ? check_taint($changed)
1986 : check_taint_not($changed);
1987
1988 # Use UTF-8 next time through the loop
1989 utf8::upgrade($char);
0099bb8d 1990 }
66cbab2c 1991 }
094a2f8c
KW
1992 }
1993 }
1994}
1995
1bfe8fea
KW
1996# Give final advice.
1997
1998my $didwarn = 0;
1999
2000foreach ($first_locales_test_number..$final_locales_test_number) {
2001 if ($Problem{$_}) {
2002 my @f = sort keys %{ $Problem{$_} };
2003 my $f = join(" ", @f);
2004 $f =~ s/(.{50,60}) /$1\n#\t/g;
2005 print
2006 "#\n",
2007 "# The locale ", (@f == 1 ? "definition" : "definitions"), "\n#\n",
2008 "#\t", $f, "\n#\n",
2009 "# on your system may have errors because the locale test $_\n",
2010 "# \"$test_names{$_}\"\n",
2011 "# failed in ", (@f == 1 ? "that locale" : "those locales"),
2012 ".\n";
2013 print <<EOW;
2014#
2015# If your users are not using these locales you are safe for the moment,
2016# but please report this failure first to perlbug\@perl.com using the
2017# perlbug script (as described in the INSTALL file) so that the exact
2018# details of the failures can be sorted out first and then your operating
2019# system supplier can be alerted about these anomalies.
2020#
2021EOW
2022 $didwarn = 1;
2023 }
2024}
2025
2026# Tell which locales were okay and which were not.
2027
2028if ($didwarn) {
2029 my (@s, @F);
2030
2031 foreach my $l (@Locale) {
2032 my $p = 0;
2033 if ($setlocale_failed{$l}) {
2034 $p++;
2035 }
2036 else {
2037 foreach my $t
2038 ($first_locales_test_number..$final_locales_test_number)
2039 {
2040 $p++ if $Problem{$t}{$l};
2041 }
2042 }
2043 push @s, $l if $p == 0;
2044 push @F, $l unless $p == 0;
2045 }
2046
2047 if (@s) {
2048 my $s = join(" ", @s);
2049 $s =~ s/(.{50,60}) /$1\n#\t/g;
2050
2051 warn
2052 "# The following locales\n#\n",
2053 "#\t", $s, "\n#\n",
2054 "# tested okay.\n#\n",
2055 } else {
2056 warn "# None of your locales were fully okay.\n";
2057 }
2058
2059 if (@F) {
2060 my $F = join(" ", @F);
2061 $F =~ s/(.{50,60}) /$1\n#\t/g;
2062
c7e7f6b7
KW
2063 my $details = "";
2064 unless ($debug) {
2065 $details = "# For more details, rerun, with environment variable PERL_DEBUG_FULL_TEST=1.\n";
2066 }
2067 elsif ($debug == 1) {
2068 $details = "# For even more details, rerun, with environment variable PERL_DEBUG_FULL_TEST=2.\n";
2069 }
2070
1bfe8fea
KW
2071 warn
2072 "# The following locales\n#\n",
2073 "#\t", $F, "\n#\n",
2074 "# had problems.\n#\n",
c7e7f6b7 2075 $details;
1bfe8fea
KW
2076 } else {
2077 warn "# None of your locales were broken.\n";
2078 }
2079}
2080
fdf053ee 2081print "1..$test_num\n";
906f284f 2082
90248788 2083# eof