This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
lib/locale.t: White space only
[perl5.git] / lib / locale.t
CommitLineData
8ebc5c01
PP
1#!./perl -wT
2
66cbab2c
KW
3# This tests plain 'use locale' and adorned 'use locale ":not_characters"'
4# Because these pragmas are compile time, and I (khw) am trying to test
5# without using 'eval' as much as possible, which might cloud the issue, the
6# crucial parts of the code are duplicated in a block for each pragma.
7
6c2e653d
KW
8# To make a TODO test, add the string 'TODO' to its %test_names value
9
e3a2734b
KW
10binmode STDOUT, ':utf8';
11binmode STDERR, ':utf8';
12
8ebc5c01
PP
13BEGIN {
14 chdir 't' if -d 't';
20822f61 15 @INC = '../lib';
f9cbebe1 16 unshift @INC, '.';
b002077a 17 require Config; import Config;
97a0514d 18 if (!$Config{d_setlocale} || $Config{ccflags} =~ /\bD?NO_LOCALE\b/) {
b002077a
CS
19 print "1..0\n";
20 exit;
21 }
73fc293b 22 require './loc_tools.pl';
2de3dbcc 23 $| = 1;
8ebc5c01
PP
24}
25
26use strict;
26c1569f 27use feature 'fc';
8ebc5c01 28
92891c66 29# =1 adds debugging output; =2 increases the verbosity somewhat
108a305e 30my $debug = $ENV{PERL_DEBUG_FULL_TEST} // 0;
284102e8 31
6d5d702a
KW
32# Certain tests have been shown to be problematical for a few locales. Don't
33# fail them unless at least this percentage of the tested locales fail.
4874c8de 34# On AIX machines, many locales call a no-break space a graphic.
df33771d 35# (There aren't 1000 locales currently in existence, so 99.9 works)
4874c8de 36my $acceptable_failure_percentage = ($^O =~ / ^ ( AIX ) $ /ix)
31f658ea
KW
37 ? 99.9
38 : 5;
6d5d702a 39
7493b8f2 40# The list of test numbers of the problematic tests.
5b1f18a7 41my %problematical_tests;
7493b8f2
KW
42
43
db4b7445
A
44use Dumpvalue;
45
46my $dumper = Dumpvalue->new(
47 tick => qq{"},
48 quoteHighBit => 0,
49 unctrl => "quote"
50 );
6be75cd7 51sub debug {
db4b7445 52 return unless $debug;
46973bb2 53 my($mess) = join "", '# ', @_;
db4b7445
A
54 chop $mess;
55 print $dumper->stringify($mess,1), "\n";
6be75cd7
JH
56}
57
92891c66
KW
58sub debug_more {
59 return unless $debug > 1;
60 return debug(@_);
61}
62
6be75cd7
JH
63sub debugf {
64 printf @_ if $debug;
65}
66
40205cab 67$a = 'abc %9';
8ebc5c01 68
c213d471
KW
69my $test_num = 0;
70
8ebc5c01 71sub ok {
c213d471 72 my ($result, $message) = @_;
e3a2734b 73 $message = "" unless defined $message;
8ebc5c01
PP
74
75 print 'not ' unless ($result);
c213d471 76 print "ok " . ++$test_num;
e3a2734b
KW
77 print " $message";
78 print "\n";
8ebc5c01
PP
79}
80
81# First we'll do a lot of taint checking for locales.
82# This is the easiest to test, actually, as any locale,
83# even the default locale will taint under 'use locale'.
84
85sub is_tainted { # hello, camel two.
9f1b1f2d 86 no warnings 'uninitialized' ;
8ebc5c01 87 my $dummy;
ba74571d 88 local $@;
8ebc5c01
PP
89 not eval { $dummy = join("", @_), kill 0; 1 }
90}
91
a9b7c637
KW
92sub check_taint ($;$) {
93 my $message_tail = $_[1] // "";
2575405e
KW
94
95 # Extra blanks are so aligns with taint_not output
96 $message_tail = ": $message_tail" if $message_tail;
a9b7c637 97 ok is_tainted($_[0]), "verify that is tainted$message_tail";
8ebc5c01
PP
98}
99
a9b7c637
KW
100sub check_taint_not ($;$) {
101 my $message_tail = $_[1] // "";
2575405e 102 $message_tail = ": $message_tail" if $message_tail;
a9b7c637 103 ok((not is_tainted($_[0])), "verify that isn't tainted$message_tail");
8ebc5c01
PP
104}
105
bf3cd0e6
KW
106"\tb\t" =~ /^m?(\s)(.*)\1$/;
107check_taint_not $&, "not tainted outside 'use locale'";
108;
109
8ebc5c01
PP
110use locale; # engage locale and therefore locale taint.
111
b6fb4de7
KW
112# BE SURE TO COPY ANYTHING YOU ADD to these tests to the block below for
113# ":notcharacters"
2575405e
KW
114
115check_taint_not $a, '$a';
116
117check_taint uc($a), 'uc($a)';
118check_taint "\U$a", '"\U$a"';
119check_taint ucfirst($a), 'ucfirst($a)';
120check_taint "\u$a", '"\u$a"';
121check_taint lc($a), 'lc($a)';
122check_taint fc($a), 'fc($a)';
123check_taint "\L$a", '"\L$a"';
124check_taint "\F$a", '"\F$a"';
125check_taint lcfirst($a), 'lcfirst($a)';
126check_taint "\l$a", '"\l$a"';
127
128check_taint_not sprintf('%e', 123.456), "sprintf('%e', 123.456)";
129check_taint_not sprintf('%f', 123.456), "sprintf('%f', 123.456)";
130check_taint_not sprintf('%g', 123.456), "sprintf('%g', 123.456)";
131check_taint_not sprintf('%d', 123.456), "sprintf('%d', 123.456)";
132check_taint_not sprintf('%x', 123.456), "sprintf('%x', 123.456)";
8ebc5c01
PP
133
134$_ = $a; # untaint $_
135
136$_ = uc($a); # taint $_
137
2575405e 138check_taint $_, '$_ = uc($a)';
8ebc5c01
PP
139
140/(\w)/; # taint $&, $`, $', $+, $1.
2575405e 141check_taint $&, "\$& from /(\\w)/";
fdf73a7f
KW
142check_taint $`, "\t\$`";
143check_taint $', "\t\$'";
144check_taint $+, "\t\$+";
145check_taint $1, "\t\$1";
146check_taint_not $2, "\t\$2";
8ebc5c01
PP
147
148/(.)/; # untaint $&, $`, $', $+, $1.
2575405e 149check_taint_not $&, "\$& from /(.)/";
fdf73a7f
KW
150check_taint_not $`, "\t\$`";
151check_taint_not $', "\t\$'";
152check_taint_not $+, "\t\$+";
153check_taint_not $1, "\t\$1";
154check_taint_not $2, "\t\$2";
8ebc5c01
PP
155
156/(\W)/; # taint $&, $`, $', $+, $1.
2575405e 157check_taint $&, "\$& from /(\\W)/";
fdf73a7f
KW
158check_taint $`, "\t\$`";
159check_taint $', "\t\$'";
160check_taint $+, "\t\$+";
161check_taint $1, "\t\$1";
162check_taint_not $2, "\t\$2";
8ebc5c01 163
9486279c 164/(.)/; # untaint $&, $`, $', $+, $1.
2575405e 165check_taint_not $&, "\$& from /(.)/";
fdf73a7f
KW
166check_taint_not $`, "\t\$`";
167check_taint_not $', "\t\$'";
168check_taint_not $+, "\t\$+";
169check_taint_not $1, "\t\$1";
170check_taint_not $2, "\t\$2";
9486279c 171
8ebc5c01 172/(\s)/; # taint $&, $`, $', $+, $1.
2575405e 173check_taint $&, "\$& from /(\\s)/";
fdf73a7f
KW
174check_taint $`, "\t\$`";
175check_taint $', "\t\$'";
176check_taint $+, "\t\$+";
177check_taint $1, "\t\$1";
178check_taint_not $2, "\t\$2";
8ebc5c01 179
9486279c 180/(.)/; # untaint $&, $`, $', $+, $1.
2575405e 181check_taint_not $&, "\$& from /(.)/";
9486279c 182
8ebc5c01 183/(\S)/; # taint $&, $`, $', $+, $1.
2575405e 184check_taint $&, "\$& from /(\\S)/";
fdf73a7f
KW
185check_taint $`, "\t\$`";
186check_taint $', "\t\$'";
187check_taint $+, "\t\$+";
188check_taint $1, "\t\$1";
189check_taint_not $2, "\t\$2";
8ebc5c01 190
63baef57 191/(.)/; # untaint $&, $`, $', $+, $1.
2575405e 192check_taint_not $&, "\$& from /(.)/";
63baef57 193
40205cab
KW
194"0" =~ /(\d)/; # taint $&, $`, $', $+, $1.
195check_taint $&, "\$& from /(\\d)/";
196check_taint $`, "\t\$`";
197check_taint $', "\t\$'";
198check_taint $+, "\t\$+";
199check_taint $1, "\t\$1";
200check_taint_not $2, "\t\$2";
201
202/(.)/; # untaint $&, $`, $', $+, $1.
203check_taint_not $&, "\$& from /(.)/";
204
205/(\D)/; # taint $&, $`, $', $+, $1.
206check_taint $&, "\$& from /(\\D)/";
207check_taint $`, "\t\$`";
208check_taint $', "\t\$'";
209check_taint $+, "\t\$+";
210check_taint $1, "\t\$1";
211check_taint_not $2, "\t\$2";
212
213/(.)/; # untaint $&, $`, $', $+, $1.
214check_taint_not $&, "\$& from /(.)/";
215
216/([[:alnum:]])/; # taint $&, $`, $', $+, $1.
217check_taint $&, "\$& from /([[:alnum:]])/";
218check_taint $`, "\t\$`";
219check_taint $', "\t\$'";
220check_taint $+, "\t\$+";
221check_taint $1, "\t\$1";
222check_taint_not $2, "\t\$2";
223
224/(.)/; # untaint $&, $`, $', $+, $1.
225check_taint_not $&, "\$& from /(.)/";
226
227/([[:^alnum:]])/; # taint $&, $`, $', $+, $1.
228check_taint $&, "\$& from /([[:^alnum:]])/";
229check_taint $`, "\t\$`";
230check_taint $', "\t\$'";
231check_taint $+, "\t\$+";
232check_taint $1, "\t\$1";
233check_taint_not $2, "\t\$2";
234
63baef57 235"a" =~ /(a)|(\w)/; # taint $&, $`, $', $+, $1.
2575405e 236check_taint $&, "\$& from /(a)|(\\w)/";
63baef57
KW
237check_taint $`, "\t\$`";
238check_taint $', "\t\$'";
239check_taint $+, "\t\$+";
240check_taint $1, "\t\$1";
2575405e
KW
241ok($1 eq 'a', ("\t" x 5) . "\$1 is 'a'");
242ok(! defined $2, ("\t" x 5) . "\$2 is undefined");
63baef57
KW
243check_taint_not $2, "\t\$2";
244check_taint_not $3, "\t\$3";
245
246/(.)/; # untaint $&, $`, $', $+, $1.
2575405e 247check_taint_not $&, "\$& from /(.)/";
63baef57
KW
248
249"\N{CYRILLIC SMALL LETTER A}" =~ /(\N{CYRILLIC CAPITAL LETTER A})/i; # no tainting because no locale dependence
2575405e 250check_taint_not $&, "\$& from /(\\N{CYRILLIC CAPITAL LETTER A})/i";
63baef57
KW
251check_taint_not $`, "\t\$`";
252check_taint_not $', "\t\$'";
253check_taint_not $+, "\t\$+";
254check_taint_not $1, "\t\$1";
2575405e 255ok($1 eq "\N{CYRILLIC SMALL LETTER A}", ("\t" x 4) . "\t\$1 is 'small cyrillic a'");
63baef57
KW
256check_taint_not $2, "\t\$2";
257
258/(.)/; # untaint $&, $`, $', $+, $1.
2575405e 259check_taint_not $&, "\$& from /./";
63baef57 260
40205cab
KW
261"(\N{KELVIN SIGN})" =~ /(\N{KELVIN SIGN})/i; # taints because depends on locale
262check_taint $&, "\$& from /(\\N{KELVIN SIGN})/i";
263check_taint $`, "\t\$`";
264check_taint $', "\t\$'";
265check_taint $+, "\t\$+";
266check_taint $1, "\t\$1";
267check_taint_not $2, "\t\$2";
268
63baef57 269/(.)/; # untaint $&, $`, $', $+, $1.
2575405e 270check_taint_not $&, "\$& from /(.)/";
63baef57
KW
271
272"a:" =~ /(.)\b(.)/; # taint $&, $`, $', $+, $1.
2575405e 273check_taint $&, "\$& from /(.)\\b(.)/";
63baef57
KW
274check_taint $`, "\t\$`";
275check_taint $', "\t\$'";
276check_taint $+, "\t\$+";
277check_taint $1, "\t\$1";
278check_taint $2, "\t\$2";
279check_taint_not $3, "\t\$3";
280
281/(.)/; # untaint $&, $`, $', $+, $1.
2575405e 282check_taint_not $&, "\$& from /./";
63baef57
KW
283
284"aa" =~ /(.)\B(.)/; # taint $&, $`, $', $+, $1.
2575405e 285check_taint $&, "\$& from /(.)\\B(.)/";
63baef57
KW
286check_taint $`, "\t\$`";
287check_taint $', "\t\$'";
288check_taint $+, "\t\$+";
289check_taint $1, "\t\$1";
290check_taint $2, "\t\$2";
291check_taint_not $3, "\t\$3";
292
293/(.)/; # untaint $&, $`, $', $+, $1.
2575405e 294check_taint_not $&, "\$& from /./";
63baef57
KW
295
296"aaa" =~ /(.).(\1)/i; # notaint because not locale dependent
2575405e 297check_taint_not $&, "\$ & from /(.).(\\1)/";
63baef57
KW
298check_taint_not $`, "\t\$`";
299check_taint_not $', "\t\$'";
300check_taint_not $+, "\t\$+";
301check_taint_not $1, "\t\$1";
302check_taint_not $2, "\t\$2";
2575405e 303check_taint_not $3, "\t\$3";
63baef57
KW
304
305/(.)/; # untaint $&, $`, $', $+, $1.
2575405e 306check_taint_not $&, "\$ & from /./";
63baef57 307
8ebc5c01
PP
308$_ = $a; # untaint $_
309
2575405e 310check_taint_not $_, 'untainting $_ works';
8ebc5c01
PP
311
312/(b)/; # this must not taint
2575405e 313check_taint_not $&, "\$ & from /(b)/";
fdf73a7f
KW
314check_taint_not $`, "\t\$`";
315check_taint_not $', "\t\$'";
316check_taint_not $+, "\t\$+";
317check_taint_not $1, "\t\$1";
318check_taint_not $2, "\t\$2";
8ebc5c01
PP
319
320$_ = $a; # untaint $_
321
2575405e 322check_taint_not $_, 'untainting $_ works';
8ebc5c01
PP
323
324$b = uc($a); # taint $b
325s/(.+)/$b/; # this must taint only the $_
326
2575405e 327check_taint $_, '$_ (wasn\'t tainted) from s/(.+)/$b/ where $b is tainted';
fdf73a7f
KW
328check_taint_not $&, "\t\$&";
329check_taint_not $`, "\t\$`";
330check_taint_not $', "\t\$'";
331check_taint_not $+, "\t\$+";
332check_taint_not $1, "\t\$1";
333check_taint_not $2, "\t\$2";
8ebc5c01
PP
334
335$_ = $a; # untaint $_
336
337s/(.+)/b/; # this must not taint
2575405e 338check_taint_not $_, '$_ (wasn\'t tainted) from s/(.+)/b/';
fdf73a7f
KW
339check_taint_not $&, "\t\$&";
340check_taint_not $`, "\t\$`";
341check_taint_not $', "\t\$'";
342check_taint_not $+, "\t\$+";
343check_taint_not $1, "\t\$1";
344check_taint_not $2, "\t\$2";
8ebc5c01
PP
345
346$b = $a; # untaint $b
347
348($b = $a) =~ s/\w/$&/;
2575405e
KW
349check_taint $b, '$b from ($b = $a) =~ s/\w/$&/'; # $b should be tainted.
350check_taint_not $a, '$a from ($b = $a) =~ s/\w/$&/'; # $a should be not.
8ebc5c01
PP
351
352$_ = $a; # untaint $_
353
354s/(\w)/\l$1/; # this must taint
2575405e 355check_taint $_, '$_ (wasn\'t tainted) from s/(\w)/\l$1/,'; # this must taint
fdf73a7f
KW
356check_taint $&, "\t\$&";
357check_taint $`, "\t\$`";
358check_taint $', "\t\$'";
359check_taint $+, "\t\$+";
360check_taint $1, "\t\$1";
361check_taint_not $2, "\t\$2";
8ebc5c01
PP
362
363$_ = $a; # untaint $_
364
365s/(\w)/\L$1/; # this must taint
2575405e 366check_taint $_, '$_ (wasn\'t tainted) from s/(\w)/\L$1/,';
fdf73a7f
KW
367check_taint $&, "\t\$&";
368check_taint $`, "\t\$`";
369check_taint $', "\t\$'";
370check_taint $+, "\t\$+";
371check_taint $1, "\t\$1";
372check_taint_not $2, "\t\$2";
8ebc5c01
PP
373
374$_ = $a; # untaint $_
375
376s/(\w)/\u$1/; # this must taint
2575405e 377check_taint $_, '$_ (wasn\'t tainted) from s/(\w)/\u$1/';
fdf73a7f
KW
378check_taint $&, "\t\$&";
379check_taint $`, "\t\$`";
380check_taint $', "\t\$'";
381check_taint $+, "\t\$+";
382check_taint $1, "\t\$1";
383check_taint_not $2, "\t\$2";
8ebc5c01
PP
384
385$_ = $a; # untaint $_
386
387s/(\w)/\U$1/; # this must taint
2575405e 388check_taint $_, '$_ (wasn\'t tainted) from s/(\w)/\U$1/';
fdf73a7f
KW
389check_taint $&, "\t\$&";
390check_taint $`, "\t\$`";
391check_taint $', "\t\$'";
392check_taint $+, "\t\$+";
393check_taint $1, "\t\$1";
394check_taint_not $2, "\t\$2";
8ebc5c01 395
faab3281
KW
396# After all this tainting $a should be cool.
397
2575405e 398check_taint_not $a, '$a still not tainted';
faab3281 399
b99851e1
KW
400"a" =~ /([a-z])/;
401check_taint_not $1, '"a" =~ /([a-z])/';
402"foo.bar_baz" =~ /^(.*)[._](.*?)$/; # Bug 120675
403check_taint_not $1, '"foo.bar_baz" =~ /^(.*)[._](.*?)$/';
404
7a9d1c02 405# BE SURE TO COPY ANYTHING YOU ADD to the block below
8ebc5c01 406
66cbab2c
KW
407{ # This is just the previous tests copied here with a different
408 # compile-time pragma.
409
410 use locale ':not_characters'; # engage restricted locale with different
411 # tainting rules
b6fb4de7
KW
412 check_taint_not $a, '$a';
413
414 check_taint_not uc($a), 'uc($a)';
415 check_taint_not "\U$a", '"\U$a"';
416 check_taint_not ucfirst($a), 'ucfirst($a)';
417 check_taint_not "\u$a", '"\u$a"';
418 check_taint_not lc($a), 'lc($a)';
419 check_taint_not fc($a), 'fc($a)';
420 check_taint_not "\L$a", '"\L$a"';
421 check_taint_not "\F$a", '"\F$a"';
422 check_taint_not lcfirst($a), 'lcfirst($a)';
423 check_taint_not "\l$a", '"\l$a"';
424
425 check_taint_not sprintf('%e', 123.456), "sprintf('%e', 123.456)";
426 check_taint_not sprintf('%f', 123.456), "sprintf('%f', 123.456)";
427 check_taint_not sprintf('%g', 123.456), "sprintf('%g', 123.456)";
428 check_taint_not sprintf('%d', 123.456), "sprintf('%d', 123.456)";
429 check_taint_not sprintf('%x', 123.456), "sprintf('%x', 123.456)";
66cbab2c
KW
430
431 $_ = $a; # untaint $_
432
b6fb4de7 433 $_ = uc($a);
66cbab2c 434
b6fb4de7 435 check_taint_not $_, '$_ = uc($a)';
66cbab2c 436
b6fb4de7
KW
437 /(\w)/;
438 check_taint_not $&, "\$& from /(\\w)/";
439 check_taint_not $`, "\t\$`";
440 check_taint_not $', "\t\$'";
441 check_taint_not $+, "\t\$+";
442 check_taint_not $1, "\t\$1";
443 check_taint_not $2, "\t\$2";
66cbab2c
KW
444
445 /(.)/; # untaint $&, $`, $', $+, $1.
b6fb4de7
KW
446 check_taint_not $&, "\$& from /(.)/";
447 check_taint_not $`, "\t\$`";
448 check_taint_not $', "\t\$'";
449 check_taint_not $+, "\t\$+";
450 check_taint_not $1, "\t\$1";
451 check_taint_not $2, "\t\$2";
452
453 /(\W)/;
454 check_taint_not $&, "\$& from /(\\W)/";
455 check_taint_not $`, "\t\$`";
456 check_taint_not $', "\t\$'";
457 check_taint_not $+, "\t\$+";
458 check_taint_not $1, "\t\$1";
459 check_taint_not $2, "\t\$2";
66cbab2c 460
b6fb4de7
KW
461 /(.)/; # untaint $&, $`, $', $+, $1.
462 check_taint_not $&, "\$& from /(.)/";
463 check_taint_not $`, "\t\$`";
464 check_taint_not $', "\t\$'";
465 check_taint_not $+, "\t\$+";
466 check_taint_not $1, "\t\$1";
467 check_taint_not $2, "\t\$2";
468
469 /(\s)/;
470 check_taint_not $&, "\$& from /(\\s)/";
471 check_taint_not $`, "\t\$`";
472 check_taint_not $', "\t\$'";
473 check_taint_not $+, "\t\$+";
474 check_taint_not $1, "\t\$1";
475 check_taint_not $2, "\t\$2";
66cbab2c 476
b6fb4de7
KW
477 /(.)/; # untaint $&, $`, $', $+, $1.
478 check_taint_not $&, "\$& from /(.)/";
66cbab2c 479
b6fb4de7
KW
480 /(\S)/;
481 check_taint_not $&, "\$& from /(\\S)/";
482 check_taint_not $`, "\t\$`";
483 check_taint_not $', "\t\$'";
484 check_taint_not $+, "\t\$+";
485 check_taint_not $1, "\t\$1";
486 check_taint_not $2, "\t\$2";
66cbab2c 487
b6fb4de7
KW
488 /(.)/; # untaint $&, $`, $', $+, $1.
489 check_taint_not $&, "\$& from /(.)/";
490
491 "0" =~ /(\d)/;
492 check_taint_not $&, "\$& from /(\\d)/";
493 check_taint_not $`, "\t\$`";
494 check_taint_not $', "\t\$'";
495 check_taint_not $+, "\t\$+";
496 check_taint_not $1, "\t\$1";
497 check_taint_not $2, "\t\$2";
498
499 /(.)/; # untaint $&, $`, $', $+, $1.
500 check_taint_not $&, "\$& from /(.)/";
66cbab2c 501
b6fb4de7
KW
502 /(\D)/;
503 check_taint_not $&, "\$& from /(\\D)/";
504 check_taint_not $`, "\t\$`";
505 check_taint_not $', "\t\$'";
506 check_taint_not $+, "\t\$+";
507 check_taint_not $1, "\t\$1";
508 check_taint_not $2, "\t\$2";
66cbab2c 509
b6fb4de7
KW
510 /(.)/; # untaint $&, $`, $', $+, $1.
511 check_taint_not $&, "\$& from /(.)/";
512
513 /([[:alnum:]])/;
514 check_taint_not $&, "\$& from /([[:alnum:]])/";
515 check_taint_not $`, "\t\$`";
516 check_taint_not $', "\t\$'";
517 check_taint_not $+, "\t\$+";
518 check_taint_not $1, "\t\$1";
519 check_taint_not $2, "\t\$2";
520
521 /(.)/; # untaint $&, $`, $', $+, $1.
522 check_taint_not $&, "\$& from /(.)/";
523
524 /([[:^alnum:]])/;
525 check_taint_not $&, "\$& from /([[:^alnum:]])/";
526 check_taint_not $`, "\t\$`";
527 check_taint_not $', "\t\$'";
528 check_taint_not $+, "\t\$+";
529 check_taint_not $1, "\t\$1";
530 check_taint_not $2, "\t\$2";
531
532 "a" =~ /(a)|(\w)/;
533 check_taint_not $&, "\$& from /(a)|(\\w)/";
534 check_taint_not $`, "\t\$`";
535 check_taint_not $', "\t\$'";
536 check_taint_not $+, "\t\$+";
537 check_taint_not $1, "\t\$1";
538 ok($1 eq 'a', ("\t" x 5) . "\$1 is 'a'");
539 ok(! defined $2, ("\t" x 5) . "\$2 is undefined");
540 check_taint_not $2, "\t\$2";
541 check_taint_not $3, "\t\$3";
542
543 /(.)/; # untaint $&, $`, $', $+, $1.
544 check_taint_not $&, "\$& from /(.)/";
545
546 "\N{CYRILLIC SMALL LETTER A}" =~ /(\N{CYRILLIC CAPITAL LETTER A})/i;
547 check_taint_not $&, "\$& from /(\\N{CYRILLIC CAPITAL LETTER A})/i";
548 check_taint_not $`, "\t\$`";
549 check_taint_not $', "\t\$'";
550 check_taint_not $+, "\t\$+";
551 check_taint_not $1, "\t\$1";
552 ok($1 eq "\N{CYRILLIC SMALL LETTER A}", ("\t" x 4) . "\t\$1 is 'small cyrillic a'");
553 check_taint_not $2, "\t\$2";
554
555 /(.)/; # untaint $&, $`, $', $+, $1.
556 check_taint_not $&, "\$& from /./";
66cbab2c 557
b6fb4de7
KW
558 "(\N{KELVIN SIGN})" =~ /(\N{KELVIN SIGN})/i;
559 check_taint_not $&, "\$& from /(\\N{KELVIN SIGN})/i";
560 check_taint_not $`, "\t\$`";
561 check_taint_not $', "\t\$'";
562 check_taint_not $+, "\t\$+";
563 check_taint_not $1, "\t\$1";
564 check_taint_not $2, "\t\$2";
565
566 /(.)/; # untaint $&, $`, $', $+, $1.
567 check_taint_not $&, "\$& from /(.)/";
568
569 "a:" =~ /(.)\b(.)/;
570 check_taint_not $&, "\$& from /(.)\\b(.)/";
571 check_taint_not $`, "\t\$`";
572 check_taint_not $', "\t\$'";
573 check_taint_not $+, "\t\$+";
574 check_taint_not $1, "\t\$1";
575 check_taint_not $2, "\t\$2";
576 check_taint_not $3, "\t\$3";
577
578 /(.)/; # untaint $&, $`, $', $+, $1.
579 check_taint_not $&, "\$& from /./";
580
581 "aa" =~ /(.)\B(.)/;
582 check_taint_not $&, "\$& from /(.)\\B(.)/";
583 check_taint_not $`, "\t\$`";
584 check_taint_not $', "\t\$'";
585 check_taint_not $+, "\t\$+";
586 check_taint_not $1, "\t\$1";
587 check_taint_not $2, "\t\$2";
588 check_taint_not $3, "\t\$3";
589
590 /(.)/; # untaint $&, $`, $', $+, $1.
591 check_taint_not $&, "\$& from /./";
592
593 "aaa" =~ /(.).(\1)/i; # notaint because not locale dependent
594 check_taint_not $&, "\$ & from /(.).(\\1)/";
595 check_taint_not $`, "\t\$`";
596 check_taint_not $', "\t\$'";
597 check_taint_not $+, "\t\$+";
598 check_taint_not $1, "\t\$1";
599 check_taint_not $2, "\t\$2";
600 check_taint_not $3, "\t\$3";
601
602 /(.)/; # untaint $&, $`, $', $+, $1.
603 check_taint_not $&, "\$ & from /./";
604
605 $_ = $a; # untaint $_
606
607 check_taint_not $_, 'untainting $_ works';
608
609 /(b)/;
610 check_taint_not $&, "\$ & from /(b)/";
611 check_taint_not $`, "\t\$`";
612 check_taint_not $', "\t\$'";
613 check_taint_not $+, "\t\$+";
614 check_taint_not $1, "\t\$1";
615 check_taint_not $2, "\t\$2";
66cbab2c
KW
616
617 $_ = $a; # untaint $_
618
b6fb4de7
KW
619 check_taint_not $_, 'untainting $_ works';
620
621 s/(.+)/b/;
622 check_taint_not $_, '$_ (wasn\'t tainted) from s/(.+)/b/';
623 check_taint_not $&, "\t\$&";
624 check_taint_not $`, "\t\$`";
625 check_taint_not $', "\t\$'";
626 check_taint_not $+, "\t\$+";
627 check_taint_not $1, "\t\$1";
628 check_taint_not $2, "\t\$2";
66cbab2c
KW
629
630 $b = $a; # untaint $b
631
632 ($b = $a) =~ s/\w/$&/;
b6fb4de7
KW
633 check_taint_not $b, '$b from ($b = $a) =~ s/\w/$&/';
634 check_taint_not $a, '$a from ($b = $a) =~ s/\w/$&/';
66cbab2c
KW
635
636 $_ = $a; # untaint $_
637
b6fb4de7
KW
638 s/(\w)/\l$1/;
639 check_taint_not $_, '$_ (wasn\'t tainted) from s/(\w)/\l$1/,'; # this must taint
640 check_taint_not $&, "\t\$&";
641 check_taint_not $`, "\t\$`";
642 check_taint_not $', "\t\$'";
643 check_taint_not $+, "\t\$+";
644 check_taint_not $1, "\t\$1";
645 check_taint_not $2, "\t\$2";
66cbab2c
KW
646
647 $_ = $a; # untaint $_
648
b6fb4de7
KW
649 s/(\w)/\L$1/;
650 check_taint_not $_, '$_ (wasn\'t tainted) from s/(\w)/\L$1/,';
651 check_taint_not $&, "\t\$&";
652 check_taint_not $`, "\t\$`";
653 check_taint_not $', "\t\$'";
654 check_taint_not $+, "\t\$+";
655 check_taint_not $1, "\t\$1";
656 check_taint_not $2, "\t\$2";
66cbab2c
KW
657
658 $_ = $a; # untaint $_
659
b6fb4de7
KW
660 s/(\w)/\u$1/;
661 check_taint_not $_, '$_ (wasn\'t tainted) from s/(\w)/\u$1/';
662 check_taint_not $&, "\t\$&";
663 check_taint_not $`, "\t\$`";
664 check_taint_not $', "\t\$'";
665 check_taint_not $+, "\t\$+";
666 check_taint_not $1, "\t\$1";
667 check_taint_not $2, "\t\$2";
66cbab2c
KW
668
669 $_ = $a; # untaint $_
670
b6fb4de7
KW
671 s/(\w)/\U$1/;
672 check_taint_not $_, '$_ (wasn\'t tainted) from s/(\w)/\U$1/';
673 check_taint_not $&, "\t\$&";
674 check_taint_not $`, "\t\$`";
675 check_taint_not $', "\t\$'";
676 check_taint_not $+, "\t\$+";
677 check_taint_not $1, "\t\$1";
678 check_taint_not $2, "\t\$2";
66cbab2c
KW
679
680 # After all this tainting $a should be cool.
681
b6fb4de7 682 check_taint_not $a, '$a still not tainted';
7a9d1c02
KW
683
684 "a" =~ /([a-z])/;
685 check_taint_not $1, '"a" =~ /([a-z])/';
686 "foo.bar_baz" =~ /^(.*)[._](.*?)$/; # Bug 120675
687 check_taint_not $1, '"foo.bar_baz" =~ /^(.*)[._](.*?)$/';
b6fb4de7 688
66cbab2c
KW
689}
690
691# Here are in scope of 'use locale'
692
8ebc5c01
PP
693# I think we've seen quite enough of taint.
694# Let us do some *real* locale work now,
284102e8 695# unless setlocale() is missing (i.e. minitest).
8ebc5c01 696
6cf0b567 697# The test number before our first setlocale()
66330f13 698my $final_without_setlocale = $test_num;
6cf0b567 699
284102e8
JH
700# Find locales.
701
46973bb2 702debug "Scanning for locales...\n";
6be75cd7 703
9b0711ee
KW
704require POSIX; import POSIX ':locale_h';
705
3d9e170f 706my @Locale = find_locales([ &POSIX::LC_CTYPE, &POSIX::LC_NUMERIC, &POSIX::LC_ALL ]);
4599a1de 707
46973bb2 708debug "Locales =\n";
887ef7ed 709for ( @Locale ) {
46973bb2 710 debug "$_\n";
887ef7ed 711}
8ebc5c01 712
d369fd5b
KW
713unless (@Locale) {
714 print "1..$test_num\n";
715 exit;
716}
717
d369fd5b
KW
718
719setlocale(&POSIX::LC_ALL, "C");
720
73fc293b
KW
721my %posixes;
722
284102e8 723my %Problem;
2a680da6
JH
724my %Okay;
725my %Testing;
30032ef4 726my @Added_alpha; # Alphas that aren't in the C locale.
c08acc4c 727my %test_names;
284102e8 728
705af3af 729sub disp_chars {
019bf7dd
KW
730 # This returns a display string denoting the input parameter @_, each
731 # entry of which is a single character in the range 0-255. The first part
732 # of the output is a string of the characters in @_ that are ASCII
733 # graphics, and hence unambiguously displayable. They are given by code
734 # point order. The second part is the remaining code points, the ordinals
735 # of which are each displayed as 2-digit hex. Blanks are inserted so as
736 # to keep anything from the first part looking like a 2-digit hex number.
737
738 no locale;
739 my @chars = sort { ord $a <=> ord $b } @_;
740 my $output = "";
019bf7dd
KW
741 my $range_start;
742 my $start_class;
743 push @chars, chr(258); # This sentinel simplifies the loop termination
744 # logic
745 foreach my $i (0 .. @chars - 1) {
746 my $char = $chars[$i];
747 my $range_end;
748 my $class;
749
750 # We avoid using [:posix:] classes, as these are being tested in this
751 # file. Each equivalence class below is for things that can appear in
752 # a range; those that can't be in a range have class -1. 0 for those
753 # which should be output in hex; and >0 for the other ranges
754 if ($char =~ /[A-Z]/) {
755 $class = 2;
756 }
757 elsif ($char =~ /[a-z]/) {
758 $class = 3;
759 }
760 elsif ($char =~ /[0-9]/) {
761 $class = 4;
762 }
78fbfaf5
KW
763 # Uncomment to get literal punctuation displayed instead of hex
764 #elsif ($char =~ /[[\]!"#\$\%&\'()*+,.\/:\\;<=>?\@\^_`{|}~-]/) {
765 # $class = -1; # Punct never appears in a range
766 #}
019bf7dd
KW
767 else {
768 $class = 0; # Output in hex
769 }
770
771 if (! defined $range_start) {
772 if ($class < 0) {
78fbfaf5 773 $output .= " " . $char;
019bf7dd
KW
774 }
775 else {
776 $range_start = ord $char;
777 $start_class = $class;
778 }
779 } # A range ends if not consecutive, or the class-type changes
780 elsif (ord $char != ($range_end = ord($chars[$i-1])) + 1
781 || $class != $start_class)
782 {
783
784 # Here, the current character is not in the range. This means the
785 # previous character must have been. Output the range up through
786 # that one.
787 my $range_length = $range_end - $range_start + 1;
788 if ($start_class > 0) {
789 $output .= " " . chr($range_start);
790 $output .= "-" . chr($range_end) if $range_length > 1;
791 }
792 else {
78fbfaf5
KW
793 $output .= sprintf(" %02X", $range_start);
794 $output .= sprintf("-%02X", $range_end) if $range_length > 1;
019bf7dd
KW
795 }
796
797 # Handle the new current character, as potentially beginning a new
798 # range
799 undef $range_start;
800 redo;
801 }
802 }
803
804 $output =~ s/^ //;
78fbfaf5 805 return $output;
019bf7dd
KW
806}
807
010b53a6
KW
808sub disp_str ($) {
809 my $string = shift;
810
811 # Displays the string unambiguously. ASCII printables are always output
812 # as-is, though perhaps separated by blanks from other characters. If
813 # entirely printable ASCII, just returns the string. Otherwise if valid
814 # UTF-8 it uses the character names for non-printable-ASCII. Otherwise it
815 # outputs hex for each non-ASCII-printable byte.
816
817 return $string if $string =~ / ^ [[:print:]]* $/xa;
818
819 my $result = "";
820 my $prev_was_punct = 1; # Beginning is considered punct
821 if (utf8::valid($string) && utf8::is_utf8($string)) {
822 use charnames ();
823 foreach my $char (split "", $string) {
824
825 # Keep punctuation adjacent to other characters; otherwise
826 # separate them with a blank
827 if ($char =~ /[[:punct:]]/a) {
828 $result .= $char;
829 $prev_was_punct = 1;
830 }
831 elsif ($char =~ /[[:print:]]/a) {
832 $result .= " " unless $prev_was_punct;
833 $result .= $char;
834 $prev_was_punct = 0;
835 }
836 else {
837 $result .= " " unless $prev_was_punct;
838 $result .= charnames::viacode(ord $char);
839 $prev_was_punct = 0;
840 }
841 }
842 }
843 else {
844 use bytes;
845 foreach my $char (split "", $string) {
846 if ($char =~ /[[:punct:]]/a) {
847 $result .= $char;
848 $prev_was_punct = 1;
849 }
850 elsif ($char =~ /[[:print:]]/a) {
851 $result .= " " unless $prev_was_punct;
852 $result .= $char;
853 $prev_was_punct = 0;
854 }
855 else {
856 $result .= " " unless $prev_was_punct;
857 $result .= sprintf("%02X", ord $char);
858 $prev_was_punct = 0;
859 }
860 }
861 }
862
863 return $result;
864}
865
30032ef4
KW
866sub report_result {
867 my ($Locale, $i, $pass_fail, $message) = @_;
15bbd6a2
KW
868 $message //= "";
869 $message = " ($message)" if $message;
30032ef4 870 unless ($pass_fail) {
2a680da6 871 $Problem{$i}{$Locale} = 1;
46973bb2 872 debug "failed $i ($test_names{$i}) with locale '$Locale'$message\n";
2a680da6
JH
873 } else {
874 push @{$Okay{$i}}, $Locale;
875 }
876}
877
7c844d17
KW
878sub report_multi_result {
879 my ($Locale, $i, $results_ref) = @_;
880
881 # $results_ref points to an array, each element of which is a character that was
882 # in error for this test numbered '$i'. If empty, the test passed
883
884 my $message = "";
885 if (@$results_ref) {
705af3af 886 $message = join " ", "for", disp_chars(@$results_ref);
7c844d17
KW
887 }
888 report_result($Locale, $i, @$results_ref == 0, $message);
889}
890
c4093d7d
KW
891my $first_locales_test_number = $final_without_setlocale + 1;
892my $locales_test_number;
893my $not_necessarily_a_problem_test_number;
6d5d702a 894my $first_casing_test_number;
c4093d7d
KW
895my %setlocale_failed; # List of locales that setlocale() didn't work on
896
73fc293b 897foreach my $Locale (@Locale) {
c4093d7d 898 $locales_test_number = $first_locales_test_number - 1;
46973bb2
KW
899 debug "\n";
900 debug "Locale = $Locale\n";
284102e8 901
a810e350 902 unless (setlocale(&POSIX::LC_ALL, $Locale)) {
c4093d7d 903 $setlocale_failed{$Locale} = $Locale;
284102e8 904 next;
8ebc5c01 905 }
8ebc5c01 906
31f05a37
KW
907 # We test UTF-8 locales only under ':not_characters'; It is easier to
908 # test them in other test files than here. Non- UTF-8 locales are tested
909 # only under plain 'use locale', as otherwise we would have to convert
910 # everything in them to Unicode.
66cbab2c 911
95eaa1bf
KW
912 my %UPPER = (); # All alpha X for which uc(X) == X and lc(X) != X
913 my %lower = (); # All alpha X for which lc(X) == X and uc(X) != X
914 my %BoThCaSe = (); # All alpha X for which uc(X) == lc(X) == X
66cbab2c 915
ab8b8bcc 916 my $is_utf8_locale = is_locale_utf8($Locale);
b9df08e4 917
46973bb2 918 debug "is utf8 locale? = $is_utf8_locale\n";
b9df08e4
KW
919
920 my $radix = localeconv()->{decimal_point};
921 if ($radix !~ / ^ [[:ascii:]] + $/x) {
922 use bytes;
923 $radix = disp_chars(split "", $radix);
924 }
46973bb2 925 debug "radix = $radix\n";
b9df08e4 926
66cbab2c
KW
927 if (! $is_utf8_locale) {
928 use locale;
fb844330
KW
929 @{$posixes{'word'}} = grep /\w/, map { chr } 0..255;
930 @{$posixes{'digit'}} = grep /\d/, map { chr } 0..255;
931 @{$posixes{'space'}} = grep /\s/, map { chr } 0..255;
932 @{$posixes{'alpha'}} = grep /[[:alpha:]]/, map {chr } 0..255;
933 @{$posixes{'alnum'}} = grep /[[:alnum:]]/, map {chr } 0..255;
934 @{$posixes{'ascii'}} = grep /[[:ascii:]]/, map {chr } 0..255;
935 @{$posixes{'blank'}} = grep /[[:blank:]]/, map {chr } 0..255;
936 @{$posixes{'cntrl'}} = grep /[[:cntrl:]]/, map {chr } 0..255;
937 @{$posixes{'graph'}} = grep /[[:graph:]]/, map {chr } 0..255;
938 @{$posixes{'lower'}} = grep /[[:lower:]]/, map {chr } 0..255;
939 @{$posixes{'print'}} = grep /[[:print:]]/, map {chr } 0..255;
98ed56e7 940 @{$posixes{'punct'}} = grep /[[:punct:]]/, map {chr } 0..255;
fb844330
KW
941 @{$posixes{'upper'}} = grep /[[:upper:]]/, map {chr } 0..255;
942 @{$posixes{'xdigit'}} = grep /[[:xdigit:]]/, map {chr } 0..255;
943 @{$posixes{'cased'}} = grep /[[:upper:]]/i, map {chr } 0..255;
e5272a46 944
71e5cbb3
KW
945 # Sieve the uppercase and the lowercase.
946
fb844330 947 for (@{$posixes{'word'}}) {
71e5cbb3
KW
948 if (/[^\d_]/) { # skip digits and the _
949 if (uc($_) eq $_) {
950 $UPPER{$_} = $_;
951 }
952 if (lc($_) eq $_) {
953 $lower{$_} = $_;
954 }
955 }
956 }
66cbab2c
KW
957 }
958 else {
959 use locale ':not_characters';
fb844330
KW
960 @{$posixes{'word'}} = grep /\w/, map { chr } 0..255;
961 @{$posixes{'digit'}} = grep /\d/, map { chr } 0..255;
962 @{$posixes{'space'}} = grep /\s/, map { chr } 0..255;
963 @{$posixes{'alpha'}} = grep /[[:alpha:]]/, map {chr } 0..255;
964 @{$posixes{'alnum'}} = grep /[[:alnum:]]/, map {chr } 0..255;
965 @{$posixes{'ascii'}} = grep /[[:ascii:]]/, map {chr } 0..255;
966 @{$posixes{'blank'}} = grep /[[:blank:]]/, map {chr } 0..255;
967 @{$posixes{'cntrl'}} = grep /[[:cntrl:]]/, map {chr } 0..255;
968 @{$posixes{'graph'}} = grep /[[:graph:]]/, map {chr } 0..255;
969 @{$posixes{'lower'}} = grep /[[:lower:]]/, map {chr } 0..255;
970 @{$posixes{'print'}} = grep /[[:print:]]/, map {chr } 0..255;
98ed56e7 971 @{$posixes{'punct'}} = grep /[[:punct:]]/, map {chr } 0..255;
fb844330
KW
972 @{$posixes{'upper'}} = grep /[[:upper:]]/, map {chr } 0..255;
973 @{$posixes{'xdigit'}} = grep /[[:xdigit:]]/, map {chr } 0..255;
974 @{$posixes{'cased'}} = grep /[[:upper:]]/i, map {chr } 0..255;
975 for (@{$posixes{'word'}}) {
66cbab2c
KW
976 if (/[^\d_]/) { # skip digits and the _
977 if (uc($_) eq $_) {
978 $UPPER{$_} = $_;
979 }
980 if (lc($_) eq $_) {
981 $lower{$_} = $_;
982 }
983 }
984 }
985 }
a160ac48 986
fb844330
KW
987 # Ordered, where possible, in groups of "this is a subset of the next
988 # one"
46973bb2
KW
989 debug ":upper: = ", disp_chars(@{$posixes{'upper'}}), "\n";
990 debug ":lower: = ", disp_chars(@{$posixes{'lower'}}), "\n";
991 debug ":cased: = ", disp_chars(@{$posixes{'cased'}}), "\n";
992 debug ":alpha: = ", disp_chars(@{$posixes{'alpha'}}), "\n";
993 debug ":alnum: = ", disp_chars(@{$posixes{'alnum'}}), "\n";
994 debug " w = ", disp_chars(@{$posixes{'word'}}), "\n";
995 debug ":graph: = ", disp_chars(@{$posixes{'graph'}}), "\n";
996 debug ":print: = ", disp_chars(@{$posixes{'print'}}), "\n";
997 debug " d = ", disp_chars(@{$posixes{'digit'}}), "\n";
998 debug ":xdigit: = ", disp_chars(@{$posixes{'xdigit'}}), "\n";
999 debug ":blank: = ", disp_chars(@{$posixes{'blank'}}), "\n";
1000 debug " s = ", disp_chars(@{$posixes{'space'}}), "\n";
1001 debug ":punct: = ", disp_chars(@{$posixes{'punct'}}), "\n";
1002 debug ":cntrl: = ", disp_chars(@{$posixes{'cntrl'}}), "\n";
1003 debug ":ascii: = ", disp_chars(@{$posixes{'ascii'}}), "\n";
a160ac48 1004
284102e8 1005 foreach (keys %UPPER) {
a160ac48 1006
097ee67d 1007 $BoThCaSe{$_}++ if exists $lower{$_};
284102e8
JH
1008 }
1009 foreach (keys %lower) {
097ee67d 1010 $BoThCaSe{$_}++ if exists $UPPER{$_};
284102e8 1011 }
097ee67d 1012 foreach (keys %BoThCaSe) {
284102e8
JH
1013 delete $UPPER{$_};
1014 delete $lower{$_};
1015 }
1016
e5cc0528
KW
1017 my %Unassigned;
1018 foreach my $ord ( 0 .. 255 ) {
1019 $Unassigned{chr $ord} = 1;
1020 }
1021 foreach my $class (keys %posixes) {
1022 foreach my $char (@{$posixes{$class}}) {
1023 delete $Unassigned{$char};
1024 }
1025 }
1026
46973bb2
KW
1027 debug "UPPER = ", disp_chars(sort { ord $a <=> ord $b } keys %UPPER), "\n";
1028 debug "lower = ", disp_chars(sort { ord $a <=> ord $b } keys %lower), "\n";
1029 debug "BoThCaSe = ", disp_chars(sort { ord $a <=> ord $b } keys %BoThCaSe), "\n";
1030 debug "Unassigned = ", disp_chars(sort { ord $a <=> ord $b } keys %Unassigned), "\n";
284102e8 1031
baa71cfd 1032 my @failures;
3da38613 1033 my @fold_failures;
b9df08e4 1034 foreach my $x (sort { ord $a <=> ord $b } keys %UPPER) {
baa71cfd 1035 my $ok;
3da38613 1036 my $fold_ok;
baa71cfd
KW
1037 if ($is_utf8_locale) {
1038 use locale ':not_characters';
1039 $ok = $x =~ /[[:upper:]]/;
3da38613 1040 $fold_ok = $x =~ /[[:lower:]]/i;
baa71cfd
KW
1041 }
1042 else {
1043 use locale;
1044 $ok = $x =~ /[[:upper:]]/;
3da38613 1045 $fold_ok = $x =~ /[[:lower:]]/i;
baa71cfd
KW
1046 }
1047 push @failures, $x unless $ok;
3da38613 1048 push @fold_failures, $x unless $fold_ok;
baa71cfd 1049 }
baa71cfd 1050 $locales_test_number++;
6d5d702a 1051 $first_casing_test_number = $locales_test_number;
95eaa1bf 1052 $test_names{$locales_test_number} = 'Verify that /[[:upper:]]/ matches all alpha X for which uc(X) == X and lc(X) != X';
7c844d17 1053 report_multi_result($Locale, $locales_test_number, \@failures);
6d5d702a 1054
3da38613 1055 $locales_test_number++;
6d5d702a 1056
95eaa1bf 1057 $test_names{$locales_test_number} = 'Verify that /[[:lower:]]/i matches all alpha X for which uc(X) == X and lc(X) != X';
7c844d17 1058 report_multi_result($Locale, $locales_test_number, \@fold_failures);
baa71cfd 1059
baa71cfd 1060 undef @failures;
3da38613 1061 undef @fold_failures;
baa71cfd 1062
b9df08e4 1063 foreach my $x (sort { ord $a <=> ord $b } keys %lower) {
baa71cfd 1064 my $ok;
3da38613 1065 my $fold_ok;
baa71cfd
KW
1066 if ($is_utf8_locale) {
1067 use locale ':not_characters';
1068 $ok = $x =~ /[[:lower:]]/;
3da38613 1069 $fold_ok = $x =~ /[[:upper:]]/i;
baa71cfd
KW
1070 }
1071 else {
1072 use locale;
1073 $ok = $x =~ /[[:lower:]]/;
3da38613 1074 $fold_ok = $x =~ /[[:upper:]]/i;
baa71cfd
KW
1075 }
1076 push @failures, $x unless $ok;
3da38613 1077 push @fold_failures, $x unless $fold_ok;
baa71cfd
KW
1078 }
1079
1080 $locales_test_number++;
95eaa1bf 1081 $test_names{$locales_test_number} = 'Verify that /[[:lower:]]/ matches all alpha X for which lc(X) == X and uc(X) != X';
7c844d17
KW
1082 report_multi_result($Locale, $locales_test_number, \@failures);
1083
3da38613 1084 $locales_test_number++;
95eaa1bf 1085 $test_names{$locales_test_number} = 'Verify that /[[:upper:]]/i matches all alpha X for which lc(X) == X and uc(X) != X';
7c844d17 1086 report_multi_result($Locale, $locales_test_number, \@fold_failures);
baa71cfd 1087
9445c837
KW
1088 { # Find the alphabetic characters that are not considered alphabetics
1089 # in the default (C) locale.
8ebc5c01 1090
284102e8 1091 no locale;
71e5cbb3 1092
30032ef4 1093 @Added_alpha = ();
5e7a1028 1094 for (keys %UPPER, keys %lower, keys %BoThCaSe) {
30032ef4 1095 push(@Added_alpha, $_) if (/\W/);
284102e8 1096 }
8ebc5c01 1097 }
8ebc5c01 1098
b9df08e4 1099 @Added_alpha = sort { ord $a <=> ord $b } @Added_alpha;
8ebc5c01 1100
46973bb2 1101 debug "Added_alpha = ", disp_chars(@Added_alpha), "\n";
8ebc5c01 1102
db31898d 1103 # Cross-check the whole 8-bit character set.
8ebc5c01 1104
db31898d
KW
1105 ++$locales_test_number;
1106 my @f;
1107 $test_names{$locales_test_number} = 'Verify that \w and [:word:] are identical';
1108 for (map { chr } 0..255) {
1109 if ($is_utf8_locale) {
1110 use locale ':not_characters';
1111 push @f, $_ unless /[[:word:]]/ == /\w/;
1112 }
1113 else {
1114 push @f, $_ unless /[[:word:]]/ == /\w/;
1115 }
1116 }
1117 report_multi_result($Locale, $locales_test_number, \@f);
8ebc5c01 1118
db31898d
KW
1119 ++$locales_test_number;
1120 undef @f;
1121 $test_names{$locales_test_number} = 'Verify that \d and [:digit:] are identical';
1122 for (map { chr } 0..255) {
1123 if ($is_utf8_locale) {
1124 use locale ':not_characters';
1125 push @f, $_ unless /[[:digit:]]/ == /\d/;
1126 }
1127 else {
1128 push @f, $_ unless /[[:digit:]]/ == /\d/;
1129 }
1130 }
1131 report_multi_result($Locale, $locales_test_number, \@f);
32e8aa3f 1132
db31898d
KW
1133 ++$locales_test_number;
1134 undef @f;
1135 $test_names{$locales_test_number} = 'Verify that \s and [:space:] are identical';
1136 for (map { chr } 0..255) {
1137 if ($is_utf8_locale) {
1138 use locale ':not_characters';
1139 push @f, $_ unless /[[:space:]]/ == /\s/;
1140 }
1141 else {
1142 push @f, $_ unless /[[:space:]]/ == /\s/;
1143 }
1144 }
1145 report_multi_result($Locale, $locales_test_number, \@f);
32e8aa3f 1146
db31898d
KW
1147 ++$locales_test_number;
1148 undef @f;
1149 $test_names{$locales_test_number} = 'Verify that [:posix:] and [:^posix:] are mutually exclusive';
1150 for (map { chr } 0..255) {
1151 if ($is_utf8_locale) {
1152 use locale ':not_characters';
1153 push @f, $_ unless (/[[:alpha:]]/ xor /[[:^alpha:]]/) ||
1154 (/[[:alnum:]]/ xor /[[:^alnum:]]/) ||
1155 (/[[:ascii:]]/ xor /[[:^ascii:]]/) ||
1156 (/[[:blank:]]/ xor /[[:^blank:]]/) ||
1157 (/[[:cntrl:]]/ xor /[[:^cntrl:]]/) ||
1158 (/[[:digit:]]/ xor /[[:^digit:]]/) ||
1159 (/[[:graph:]]/ xor /[[:^graph:]]/) ||
1160 (/[[:lower:]]/ xor /[[:^lower:]]/) ||
1161 (/[[:print:]]/ xor /[[:^print:]]/) ||
1162 (/[[:space:]]/ xor /[[:^space:]]/) ||
1163 (/[[:upper:]]/ xor /[[:^upper:]]/) ||
1164 (/[[:word:]]/ xor /[[:^word:]]/) ||
1165 (/[[:xdigit:]]/ xor /[[:^xdigit:]]/) ||
1166
1167 # effectively is what [:cased:] would be if it existed.
1168 (/[[:upper:]]/i xor /[[:^upper:]]/i);
1169 }
1170 else {
1171 push @f, $_ unless (/[[:alpha:]]/ xor /[[:^alpha:]]/) ||
1172 (/[[:alnum:]]/ xor /[[:^alnum:]]/) ||
1173 (/[[:ascii:]]/ xor /[[:^ascii:]]/) ||
1174 (/[[:blank:]]/ xor /[[:^blank:]]/) ||
1175 (/[[:cntrl:]]/ xor /[[:^cntrl:]]/) ||
1176 (/[[:digit:]]/ xor /[[:^digit:]]/) ||
1177 (/[[:graph:]]/ xor /[[:^graph:]]/) ||
1178 (/[[:lower:]]/ xor /[[:^lower:]]/) ||
1179 (/[[:print:]]/ xor /[[:^print:]]/) ||
1180 (/[[:space:]]/ xor /[[:^space:]]/) ||
1181 (/[[:upper:]]/ xor /[[:^upper:]]/) ||
1182 (/[[:word:]]/ xor /[[:^word:]]/) ||
1183 (/[[:xdigit:]]/ xor /[[:^xdigit:]]/) ||
1184 (/[[:upper:]]/i xor /[[:^upper:]]/i);
1185 }
1186 }
1187 report_multi_result($Locale, $locales_test_number, \@f);
32e8aa3f 1188
db31898d
KW
1189 # The rules for the relationships are given in:
1190 # http://www.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap07.html
32e8aa3f 1191
660c7bcc
KW
1192
1193 ++$locales_test_number;
1194 undef @f;
1195 $test_names{$locales_test_number} = 'Verify that [:lower:] contains at least a-z';
1196 for ('a' .. 'z') {
1197 if ($is_utf8_locale) {
1198 use locale ':not_characters';
1199 push @f, $_ unless /[[:lower:]]/;
1200 }
1201 else {
1202 push @f, $_ unless /[[:lower:]]/;
1203 }
1204 }
1205 report_multi_result($Locale, $locales_test_number, \@f);
1206
db31898d
KW
1207 ++$locales_test_number;
1208 undef @f;
1209 $test_names{$locales_test_number} = 'Verify that [:lower:] is a subset of [:alpha:]';
1210 for (map { chr } 0..255) {
1211 if ($is_utf8_locale) {
1212 use locale ':not_characters';
1213 push @f, $_ if /[[:lower:]]/ and ! /[[:alpha:]]/;
1214 }
1215 else {
1216 push @f, $_ if /[[:lower:]]/ and ! /[[:alpha:]]/;
1217 }
1218 }
1219 report_multi_result($Locale, $locales_test_number, \@f);
32e8aa3f 1220
db31898d
KW
1221 ++$locales_test_number;
1222 undef @f;
660c7bcc
KW
1223 $test_names{$locales_test_number} = 'Verify that [:upper:] contains at least A-Z';
1224 for ('A' .. 'Z') {
1225 if ($is_utf8_locale) {
1226 use locale ':not_characters';
1227 push @f, $_ unless /[[:upper:]]/;
1228 }
1229 else {
1230 push @f, $_ unless /[[:upper:]]/;
1231 }
1232 }
1233 report_multi_result($Locale, $locales_test_number, \@f);
1234
1235 ++$locales_test_number;
1236 undef @f;
db31898d
KW
1237 $test_names{$locales_test_number} = 'Verify that [:upper:] is a subset of [:alpha:]';
1238 for (map { chr } 0..255) {
1239 if ($is_utf8_locale) {
1240 use locale ':not_characters';
1241 push @f, $_ if /[[:upper:]]/ and ! /[[:alpha:]]/;
1242 }
1243 else {
1244 push @f, $_ if /[[:upper:]]/ and ! /[[:alpha:]]/;
1245 }
1246 }
1247 report_multi_result($Locale, $locales_test_number, \@f);
32e8aa3f 1248
db31898d
KW
1249 ++$locales_test_number;
1250 undef @f;
1251 $test_names{$locales_test_number} = 'Verify that /[[:lower:]]/i is a subset of [:alpha:]';
1252 for (map { chr } 0..255) {
1253 if ($is_utf8_locale) {
1254 use locale ':not_characters';
1255 push @f, $_ if /[[:lower:]]/i and ! /[[:alpha:]]/;
1256 }
1257 else {
1258 push @f, $_ if /[[:lower:]]/i and ! /[[:alpha:]]/;
1259 }
1260 }
1261 report_multi_result($Locale, $locales_test_number, \@f);
32e8aa3f 1262
db31898d
KW
1263 ++$locales_test_number;
1264 undef @f;
1265 $test_names{$locales_test_number} = 'Verify that [:alpha:] is a subset of [:alnum:]';
1266 for (map { chr } 0..255) {
1267 if ($is_utf8_locale) {
1268 use locale ':not_characters';
1269 push @f, $_ if /[[:alpha:]]/ and ! /[[:alnum:]]/;
1270 }
1271 else {
1272 push @f, $_ if /[[:alpha:]]/ and ! /[[:alnum:]]/;
1273 }
1274 }
1275 report_multi_result($Locale, $locales_test_number, \@f);
32e8aa3f 1276
db31898d
KW
1277 ++$locales_test_number;
1278 undef @f;
660c7bcc
KW
1279 $test_names{$locales_test_number} = 'Verify that [:digit:] contains at least 0-9';
1280 for ('0' .. '9') {
1281 if ($is_utf8_locale) {
1282 use locale ':not_characters';
1283 push @f, $_ unless /[[:digit:]]/;
1284 }
1285 else {
1286 push @f, $_ unless /[[:digit:]]/;
1287 }
1288 }
1289 report_multi_result($Locale, $locales_test_number, \@f);
1290
1291 ++$locales_test_number;
1292 undef @f;
db31898d
KW
1293 $test_names{$locales_test_number} = 'Verify that [:digit:] is a subset of [:alnum:]';
1294 for (map { chr } 0..255) {
1295 if ($is_utf8_locale) {
1296 use locale ':not_characters';
1297 push @f, $_ if /[[:digit:]]/ and ! /[[:alnum:]]/;
1298 }
1299 else {
1300 push @f, $_ if /[[:digit:]]/ and ! /[[:alnum:]]/;
1301 }
1302 }
1303 report_multi_result($Locale, $locales_test_number, \@f);
32e8aa3f 1304
db31898d
KW
1305 ++$locales_test_number;
1306 undef @f;
aac995d2 1307 $test_names{$locales_test_number} = 'Verify that [:digit:] matches either 10 or 20 code points';
fb844330 1308 report_result($Locale, $locales_test_number, @{$posixes{'digit'}} == 10 || @{$posixes{'digit'}} == 20);
aac995d2
KW
1309
1310 ++$locales_test_number;
1311 undef @f;
660c7bcc
KW
1312 $test_names{$locales_test_number} = 'Verify that if there is a second set of digits in [:digit:], they are consecutive';
1313 if (@{$posixes{'digit'}} == 20) {
1314 my $previous_ord;
fc81f5f2 1315 for (map { chr } 0..255) {
660c7bcc
KW
1316 next unless /[[:digit:]]/;
1317 next if /[0-9]/;
1318 if (defined $previous_ord) {
1319 if ($is_utf8_locale) {
1320 use locale ':not_characters';
1321 push @f, $_ if ord $_ != $previous_ord + 1;
1322 }
1323 else {
1324 push @f, $_ if ord $_ != $previous_ord + 1;
1325 }
fc81f5f2 1326 }
660c7bcc 1327 $previous_ord = ord $_;
db31898d
KW
1328 }
1329 }
1330 report_multi_result($Locale, $locales_test_number, \@f);
32e8aa3f 1331
db31898d
KW
1332 ++$locales_test_number;
1333 undef @f;
d458c02c
KW
1334 my @xdigit_digits; # :digit: & :xdigit:
1335 $test_names{$locales_test_number} = 'Verify that [:xdigit:] contains one or two blocks of 10 consecutive [:digit:] chars';
db31898d
KW
1336 for (map { chr } 0..255) {
1337 if ($is_utf8_locale) {
1338 use locale ':not_characters';
d458c02c
KW
1339 # For utf8 locales, we actually use a stricter test: that :digit:
1340 # is a subset of :xdigit:, as we know that only 0-9 should match
1341 push @f, $_ if /[[:digit:]]/ and ! /[[:xdigit:]]/;
db31898d
KW
1342 }
1343 else {
d458c02c 1344 push @xdigit_digits, $_ if /[[:digit:]]/ and /[[:xdigit:]]/;
db31898d
KW
1345 }
1346 }
d458c02c
KW
1347 if (! $is_utf8_locale) {
1348
1349 # For non-utf8 locales, @xdigit_digits is a list of the characters
1350 # that are both :xdigit: and :digit:. Because :digit: is stored in
1351 # increasing code point order (unless the tests above failed),
1352 # @xdigit_digits is as well. There should be exactly 10 or
1353 # 20 of these.
1354 if (@xdigit_digits != 10 && @xdigit_digits != 20) {
1355 @f = @xdigit_digits;
1356 }
1357 else {
1358
1359 # Look for contiguity in the series, adding any wrong ones to @f
1360 my @temp = @xdigit_digits;
1361 while (@temp > 1) {
1362 push @f, $temp[1] if ($temp[0] != $temp[1] - 1)
1363
1364 # Skip this test for the 0th character of
1365 # the second block of 10, as it won't be
1366 # contiguous with the previous block
1367 && (! defined $xdigit_digits[10]
1368 || $temp[1] != $xdigit_digits[10]);
1369 shift @temp;
1370 }
1371 }
1372 }
1373
db31898d 1374 report_multi_result($Locale, $locales_test_number, \@f);
32e8aa3f 1375
660c7bcc
KW
1376 ++$locales_test_number;
1377 undef @f;
1378 $test_names{$locales_test_number} = 'Verify that [:xdigit:] contains at least A-F, a-f';
1379 for ('A' .. 'F', 'a' .. 'f') {
1380 if ($is_utf8_locale) {
1381 use locale ':not_characters';
1382 push @f, $_ unless /[[:xdigit:]]/;
1383 }
1384 else {
1385 push @f, $_ unless /[[:xdigit:]]/;
1386 }
1387 }
1388 report_multi_result($Locale, $locales_test_number, \@f);
1389
1390 ++$locales_test_number;
1391 undef @f;
1392 $test_names{$locales_test_number} = 'Verify that any additional members of [:xdigit:], are in groups of 6 consecutive code points';
1393 my $previous_ord;
1394 my $count = 0;
d458c02c
KW
1395 for my $chr (map { chr } 0..255) {
1396 next unless $chr =~ /[[:xdigit:]]/;
1397 if ($is_utf8_locale) {
1398 next if $chr =~ /[[:digit:]]/;
1399 }
1400 else {
1401 next if grep { $chr eq $_ } @xdigit_digits;
1402 }
1403 next if $chr =~ /[A-Fa-f]/;
660c7bcc
KW
1404 if (defined $previous_ord) {
1405 if ($is_utf8_locale) {
1406 use locale ':not_characters';
d458c02c 1407 push @f, $chr if ord $chr != $previous_ord + 1;
660c7bcc
KW
1408 }
1409 else {
d458c02c 1410 push @f, $chr if ord $chr != $previous_ord + 1;
660c7bcc
KW
1411 }
1412 }
1413 $count++;
1414 if ($count == 6) {
1415 undef $previous_ord;
1416 }
1417 else {
d458c02c 1418 $previous_ord = ord $chr;
660c7bcc
KW
1419 }
1420 }
1421 report_multi_result($Locale, $locales_test_number, \@f);
32e8aa3f 1422
db31898d
KW
1423 ++$locales_test_number;
1424 undef @f;
1425 $test_names{$locales_test_number} = 'Verify that [:xdigit:] is a subset of [:graph:]';
1426 for (map { chr } 0..255) {
1427 if ($is_utf8_locale) {
1428 use locale ':not_characters';
1429 push @f, $_ if /[[:xdigit:]]/ and ! /[[:graph:]]/;
1430 }
1431 else {
1432 push @f, $_ if /[[:xdigit:]]/ and ! /[[:graph:]]/;
1433 }
1434 }
1435 report_multi_result($Locale, $locales_test_number, \@f);
32e8aa3f 1436
660c7bcc
KW
1437 # Note that xdigit doesn't have to be a subset of alnum
1438
db31898d
KW
1439 ++$locales_test_number;
1440 undef @f;
1441 $test_names{$locales_test_number} = 'Verify that [:punct:] is a subset of [:graph:]';
1442 for (map { chr } 0..255) {
1443 if ($is_utf8_locale) {
1444 use locale ':not_characters';
1445 push @f, $_ if /[[:punct:]]/ and ! /[[:graph:]]/;
1446 }
1447 else {
1448 push @f, $_ if /[[:punct:]]/ and ! /[[:graph:]]/;
1449 }
1450 }
1451 report_multi_result($Locale, $locales_test_number, \@f);
32e8aa3f 1452
db31898d
KW
1453 ++$locales_test_number;
1454 undef @f;
660c7bcc
KW
1455 $test_names{$locales_test_number} = 'Verify that the space character is not in [:graph:]';
1456 if ($is_utf8_locale) {
1457 use locale ':not_characters';
1458 push @f, " " if " " =~ /[[:graph:]]/;
1459 }
1460 else {
1461 push @f, " " if " " =~ /[[:graph:]]/;
1462 }
1463 report_multi_result($Locale, $locales_test_number, \@f);
1464
1465 ++$locales_test_number;
1466 undef @f;
1467 $test_names{$locales_test_number} = 'Verify that [:space:] contains at least [\f\n\r\t\cK ]';
1468 for (' ', "\f", "\n", "\r", "\t", "\cK") {
1469 if ($is_utf8_locale) {
1470 use locale ':not_characters';
1471 push @f, $_ unless /[[:space:]]/;
1472 }
1473 else {
1474 push @f, $_ unless /[[:space:]]/;
1475 }
1476 }
1477 report_multi_result($Locale, $locales_test_number, \@f);
1478
1479 ++$locales_test_number;
1480 undef @f;
1481 $test_names{$locales_test_number} = 'Verify that [:blank:] contains at least [\t ]';
1482 for (' ', "\t") {
1483 if ($is_utf8_locale) {
1484 use locale ':not_characters';
1485 push @f, $_ unless /[[:blank:]]/;
1486 }
1487 else {
1488 push @f, $_ unless /[[:blank:]]/;
1489 }
1490 }
1491 report_multi_result($Locale, $locales_test_number, \@f);
1492
1493 ++$locales_test_number;
1494 undef @f;
db31898d
KW
1495 $test_names{$locales_test_number} = 'Verify that [:blank:] is a subset of [:space:]';
1496 for (map { chr } 0..255) {
1497 if ($is_utf8_locale) {
1498 use locale ':not_characters';
1499 push @f, $_ if /[[:blank:]]/ and ! /[[:space:]]/;
1500 }
1501 else {
1502 push @f, $_ if /[[:blank:]]/ and ! /[[:space:]]/;
1503 }
1504 }
1505 report_multi_result($Locale, $locales_test_number, \@f);
32e8aa3f 1506
db31898d
KW
1507 ++$locales_test_number;
1508 undef @f;
1509 $test_names{$locales_test_number} = 'Verify that [:graph:] is a subset of [:print:]';
1510 for (map { chr } 0..255) {
1511 if ($is_utf8_locale) {
1512 use locale ':not_characters';
1513 push @f, $_ if /[[:graph:]]/ and ! /[[:print:]]/;
1514 }
1515 else {
1516 push @f, $_ if /[[:graph:]]/ and ! /[[:print:]]/;
1517 }
1518 }
1519 report_multi_result($Locale, $locales_test_number, \@f);
32e8aa3f 1520
db31898d
KW
1521 ++$locales_test_number;
1522 undef @f;
660c7bcc
KW
1523 $test_names{$locales_test_number} = 'Verify that the space character is in [:print:]';
1524 if ($is_utf8_locale) {
1525 use locale ':not_characters';
1526 push @f, " " if " " !~ /[[:print:]]/;
1527 }
1528 else {
1529 push @f, " " if " " !~ /[[:print:]]/;
1530 }
1531 report_multi_result($Locale, $locales_test_number, \@f);
1532
1533 ++$locales_test_number;
1534 undef @f;
db31898d
KW
1535 $test_names{$locales_test_number} = 'Verify that isn\'t both [:cntrl:] and [:print:]';
1536 for (map { chr } 0..255) {
1537 if ($is_utf8_locale) {
1538 use locale ':not_characters';
1539 push @f, $_ if (/[[:print:]]/ and /[[:cntrl:]]/);
1540 }
1541 else {
1542 push @f, $_ if (/[[:print:]]/ and /[[:cntrl:]]/);
1543 }
1544 }
1545 report_multi_result($Locale, $locales_test_number, \@f);
32e8aa3f 1546
db31898d
KW
1547 ++$locales_test_number;
1548 undef @f;
660c7bcc
KW
1549 $test_names{$locales_test_number} = 'Verify that isn\'t both [:alpha:] and [:digit:]';
1550 for (map { chr } 0..255) {
1551 if ($is_utf8_locale) {
1552 use locale ':not_characters';
1553 push @f, $_ if /[[:alpha:]]/ and /[[:digit:]]/;
1554 }
1555 else {
1556 push @f, $_ if /[[:alpha:]]/ and /[[:digit:]]/;
1557 }
1558 }
1559 report_multi_result($Locale, $locales_test_number, \@f);
1560
1561 ++$locales_test_number;
1562 undef @f;
db31898d
KW
1563 $test_names{$locales_test_number} = 'Verify that isn\'t both [:alnum:] and [:punct:]';
1564 for (map { chr } 0..255) {
1565 if ($is_utf8_locale) {
1566 use locale ':not_characters';
1567 push @f, $_ if /[[:alnum:]]/ and /[[:punct:]]/;
1568 }
1569 else {
1570 push @f, $_ if /[[:alnum:]]/ and /[[:punct:]]/;
1571 }
1572 }
1573 report_multi_result($Locale, $locales_test_number, \@f);
32e8aa3f 1574
db31898d
KW
1575 ++$locales_test_number;
1576 undef @f;
1577 $test_names{$locales_test_number} = 'Verify that isn\'t both [:xdigit:] and [:punct:]';
1578 for (map { chr } 0..255) {
1579 if ($is_utf8_locale) {
1580 use locale ':not_characters';
1581 push @f, $_ if (/[[:punct:]]/ and /[[:xdigit:]]/);
1582 }
1583 else {
1584 push @f, $_ if (/[[:punct:]]/ and /[[:xdigit:]]/);
1585 }
1586 }
1587 report_multi_result($Locale, $locales_test_number, \@f);
32e8aa3f 1588
db31898d
KW
1589 ++$locales_test_number;
1590 undef @f;
1591 $test_names{$locales_test_number} = 'Verify that isn\'t both [:graph:] and [:space:]';
1592 for (map { chr } 0..255) {
1593 if ($is_utf8_locale) {
1594 use locale ':not_characters';
1595 push @f, $_ if (/[[:graph:]]/ and /[[:space:]]/);
1596 }
1597 else {
1598 push @f, $_ if (/[[:graph:]]/ and /[[:space:]]/);
1599 }
1600 }
1601 report_multi_result($Locale, $locales_test_number, \@f);
32e8aa3f 1602
7493b8f2 1603 foreach ($first_casing_test_number..$locales_test_number) {
5b1f18a7 1604 $problematical_tests{$_} = 1;
7493b8f2
KW
1605 }
1606
32e8aa3f 1607
db31898d
KW
1608 # Test for read-only scalars' locale vs non-locale comparisons.
1609
1610 {
1611 no locale;
1612 my $ok;
1613 $a = "qwerty";
1614 if ($is_utf8_locale) {
1615 use locale ':not_characters';
1616 $ok = ($a cmp "qwerty") == 0;
1617 }
1618 else {
1619 use locale;
1620 $ok = ($a cmp "qwerty") == 0;
1621 }
1622 report_result($Locale, ++$locales_test_number, $ok);
1623 $test_names{$locales_test_number} = 'Verify that cmp works with a read-only scalar; no- vs locale';
1624 }
8ebc5c01 1625
db31898d
KW
1626 {
1627 my ($from, $to, $lesser, $greater,
1628 @test, %test, $test, $yes, $no, $sign);
284102e8 1629
db31898d
KW
1630 ++$locales_test_number;
1631 $test_names{$locales_test_number} = 'Verify that "le", "ne", etc work';
1632 $not_necessarily_a_problem_test_number = $locales_test_number;
1633 for (0..9) {
1634 # Select a slice.
fb844330
KW
1635 $from = int(($_*@{$posixes{'word'}})/10);
1636 $to = $from + int(@{$posixes{'word'}}/10);
1637 $to = $#{$posixes{'word'}} if ($to > $#{$posixes{'word'}});
1638 $lesser = join('', @{$posixes{'word'}}[$from..$to]);
db31898d
KW
1639 # Select a slice one character on.
1640 $from++; $to++;
fb844330
KW
1641 $to = $#{$posixes{'word'}} if ($to > $#{$posixes{'word'}});
1642 $greater = join('', @{$posixes{'word'}}[$from..$to]);
66cbab2c
KW
1643 if ($is_utf8_locale) {
1644 use locale ':not_characters';
db31898d
KW
1645 ($yes, $no, $sign) = ($lesser lt $greater
1646 ? (" ", "not ", 1)
1647 : ("not ", " ", -1));
66cbab2c
KW
1648 }
1649 else {
1650 use locale;
db31898d
KW
1651 ($yes, $no, $sign) = ($lesser lt $greater
1652 ? (" ", "not ", 1)
1653 : ("not ", " ", -1));
66cbab2c 1654 }
db31898d
KW
1655 # all these tests should FAIL (return 0). Exact lt or gt cannot
1656 # be tested because in some locales, say, eacute and E may test
1657 # equal.
1658 @test =
1659 (
1660 $no.' ($lesser le $greater)', # 1
1661 'not ($lesser ne $greater)', # 2
1662 ' ($lesser eq $greater)', # 3
1663 $yes.' ($lesser ge $greater)', # 4
1664 $yes.' ($lesser ge $greater)', # 5
1665 $yes.' ($greater le $lesser )', # 7
1666 'not ($greater ne $lesser )', # 8
1667 ' ($greater eq $lesser )', # 9
1668 $no.' ($greater ge $lesser )', # 10
1669 'not (($lesser cmp $greater) == -($sign))' # 11
1670 );
1671 @test{@test} = 0 x @test;
1672 $test = 0;
1673 for my $ti (@test) {
66cbab2c
KW
1674 if ($is_utf8_locale) {
1675 use locale ':not_characters';
db31898d 1676 $test{$ti} = eval $ti;
66cbab2c
KW
1677 }
1678 else {
db31898d
KW
1679 # Already in 'use locale';
1680 $test{$ti} = eval $ti;
66cbab2c 1681 }
db31898d
KW
1682 $test ||= $test{$ti}
1683 }
1684 report_result($Locale, $locales_test_number, $test == 0);
1685 if ($test) {
46973bb2
KW
1686 debug "lesser = '$lesser'\n";
1687 debug "greater = '$greater'\n";
1688 debug "lesser cmp greater = ",
db31898d 1689 $lesser cmp $greater, "\n";
46973bb2 1690 debug "greater cmp lesser = ",
db31898d 1691 $greater cmp $lesser, "\n";
46973bb2 1692 debug "(greater) from = $from, to = $to\n";
db31898d
KW
1693 for my $ti (@test) {
1694 debugf("# %-40s %-4s", $ti,
1695 $test{$ti} ? 'FAIL' : 'ok');
1696 if ($ti =~ /\(\.*(\$.+ +cmp +\$[^\)]+)\.*\)/) {
1697 debugf("(%s == %4d)", $1, eval $1);
66cbab2c 1698 }
46973bb2 1699 debugf("\n#");
db31898d
KW
1700 }
1701
1702 last;
1703 }
1704 }
1705 }
c4093d7d 1706
66cbab2c
KW
1707 my $ok1;
1708 my $ok2;
1709 my $ok3;
1710 my $ok4;
1711 my $ok5;
1712 my $ok6;
1713 my $ok7;
1714 my $ok8;
1715 my $ok9;
1716 my $ok10;
1717 my $ok11;
1718 my $ok12;
1719 my $ok13;
1500bd91 1720 my $ok14;
2c6ee1a7 1721 my $ok14_5;
28acfe03
KW
1722 my $ok15;
1723 my $ok16;
e46375fa
KW
1724 my $ok17;
1725 my $ok18;
f406a445 1726 my $ok19;
9717af6d 1727 my $ok20;
8ee4b769 1728 my $ok21;
66cbab2c
KW
1729
1730 my $c;
1731 my $d;
1732 my $e;
1733 my $f;
1734 my $g;
e46375fa
KW
1735 my $h;
1736 my $i;
1737 my $j;
66cbab2c
KW
1738
1739 if (! $is_utf8_locale) {
71e5cbb3 1740 use locale;
6be75cd7 1741
71e5cbb3 1742 my ($x, $y) = (1.23, 1.23);
6be75cd7 1743
71e5cbb3
KW
1744 $a = "$x";
1745 printf ''; # printf used to reset locale to "C"
1746 $b = "$y";
1747 $ok1 = $a eq $b;
6be75cd7 1748
71e5cbb3
KW
1749 $c = "$x";
1750 my $z = sprintf ''; # sprintf used to reset locale to "C"
1751 $d = "$y";
1752 $ok2 = $c eq $d;
1753 {
66cbab2c 1754
71e5cbb3
KW
1755 use warnings;
1756 my $w = 0;
1757 local $SIG{__WARN__} =
1758 sub {
1759 print "# @_\n";
1760 $w++;
1761 };
6be75cd7 1762
71e5cbb3
KW
1763 # The == (among other ops) used to warn for locales
1764 # that had something else than "." as the radix character.
6be75cd7 1765
71e5cbb3
KW
1766 $ok3 = $c == 1.23;
1767 $ok4 = $c == $x;
1768 $ok5 = $c == $d;
1769 {
1770 no locale;
66cbab2c 1771
b79536ea 1772 $e = "$x";
71e5cbb3
KW
1773
1774 $ok6 = $e == 1.23;
1775 $ok7 = $e == $x;
1776 $ok8 = $e == $c;
1777 }
66cbab2c 1778
71e5cbb3
KW
1779 $f = "1.23";
1780 $g = 2.34;
e46375fa
KW
1781 $h = 1.5;
1782 $i = 1.25;
1783 $j = "$h:$i";
66cbab2c 1784
71e5cbb3
KW
1785 $ok9 = $f == 1.23;
1786 $ok10 = $f == $x;
1787 $ok11 = $f == $c;
1788 $ok12 = abs(($f + $g) - 3.57) < 0.01;
1789 $ok13 = $w == 0;
2c6ee1a7 1790 $ok14 = $ok14_5 = $ok15 = $ok16 = 1; # Skip for non-utf8 locales
71e5cbb3 1791 }
e46375fa
KW
1792 {
1793 no locale;
1794 $ok17 = "1.5:1.25" eq sprintf("%g:%g", $h, $i);
1795 }
1796 $ok18 = $j eq sprintf("%g:%g", $h, $i);
66cbab2c
KW
1797 }
1798 else {
1799 use locale ':not_characters';
1800
1801 my ($x, $y) = (1.23, 1.23);
1802 $a = "$x";
1803 printf ''; # printf used to reset locale to "C"
1804 $b = "$y";
1805 $ok1 = $a eq $b;
1806
1807 $c = "$x";
1808 my $z = sprintf ''; # sprintf used to reset locale to "C"
1809 $d = "$y";
1810 $ok2 = $c eq $d;
1811 {
1812 use warnings;
1813 my $w = 0;
1814 local $SIG{__WARN__} =
1815 sub {
1816 print "# @_\n";
1817 $w++;
1818 };
1819 $ok3 = $c == 1.23;
1820 $ok4 = $c == $x;
1821 $ok5 = $c == $d;
1822 {
1823 no locale;
b79536ea 1824 $e = "$x";
66cbab2c
KW
1825
1826 $ok6 = $e == 1.23;
1827 $ok7 = $e == $x;
1828 $ok8 = $e == $c;
1829 }
1830
1831 $f = "1.23";
1832 $g = 2.34;
e46375fa
KW
1833 $h = 1.5;
1834 $i = 1.25;
1835 $j = "$h:$i";
66cbab2c
KW
1836
1837 $ok9 = $f == 1.23;
1838 $ok10 = $f == $x;
1839 $ok11 = $f == $c;
1840 $ok12 = abs(($f + $g) - 3.57) < 0.01;
1841 $ok13 = $w == 0;
1500bd91
KW
1842
1843 # Look for non-ASCII error messages, and verify that the first
5320b60d 1844 # such is in UTF-8 (the others almost certainly will be like the
5af2752a 1845 # first). This is only done if the current locale has LC_MESSAGES
1500bd91 1846 $ok14 = 1;
2c6ee1a7 1847 $ok14_5 = 1;
5af2752a 1848 if (setlocale(&POSIX::LC_MESSAGES, $Locale)) {
9bc33472
KW
1849 foreach my $err (keys %!) {
1850 use Errno;
1851 $! = eval "&Errno::$err"; # Convert to strerror() output
1852 my $strerror = "$!";
1853 if ("$strerror" =~ /\P{ASCII}/) {
1854 $ok14 = utf8::is_utf8($strerror);
1855 no locale;
1856 $ok14_5 = "$!" !~ /\P{ASCII}/;
1857 last;
1858 }
1500bd91
KW
1859 }
1860 }
28acfe03
KW
1861
1862 # Similarly, we verify that a non-ASCII radix is in UTF-8. This
1863 # also catches if there is a disparity between sprintf and
1864 # stringification.
1865
1866 my $string_g = "$g";
2ef12107 1867 my $sprintf_g = sprintf("%g", $g);
28acfe03 1868
2ef12107
KW
1869 $ok15 = $string_g =~ / ^ \p{ASCII}+ $ /x || utf8::is_utf8($string_g);
1870 $ok16 = $sprintf_g eq $string_g;
66cbab2c 1871 }
e46375fa
KW
1872 {
1873 no locale;
1874 $ok17 = "1.5:1.25" eq sprintf("%g:%g", $h, $i);
1875 }
1876 $ok18 = $j eq sprintf("%g:%g", $h, $i);
57ba90dd
KW
1877 }
1878
d6bfff6d
KW
1879 $ok19 = $ok20 = 1;
1880 if (setlocale(&POSIX::LC_TIME, $Locale)) { # These tests aren't affected by
1881 # :not_characters
57ba90dd
KW
1882 my @times = CORE::localtime();
1883
1884 use locale;
1885 $ok19 = POSIX::strftime("%p", @times) ne "%p"; # [perl #119425]
dc5a2383
KW
1886 my $date = POSIX::strftime("'%A' '%B' '%Z' '%p'", @times);
1887 debug("'Day' 'Month' 'TZ' 'am/pm' = ", disp_str($date));
57ba90dd
KW
1888
1889 # If there is any non-ascii, it better be UTF-8 in a UTF-8 locale, and
1890 # not UTF-8 if the locale isn't UTF-8.
1891 $ok20 = $date =~ / ^ \p{ASCII}+ $ /x
1892 || $is_utf8_locale == utf8::is_utf8($date);
66cbab2c
KW
1893 }
1894
8ee4b769
KW
1895 $ok21 = 1;
1896 foreach my $err (keys %!) {
1897 no locale;
1898 use Errno;
1899 $! = eval "&Errno::$err"; # Convert to strerror() output
1900 my $strerror = "$!";
1901 if ("$strerror" =~ /\P{ASCII}/) {
1902 $ok21 = 0;
1903 last;
1904 }
1905 }
1906
30032ef4 1907 report_result($Locale, ++$locales_test_number, $ok1);
66cbab2c
KW
1908 $test_names{$locales_test_number} = 'Verify that an intervening printf doesn\'t change assignment results';
1909 my $first_a_test = $locales_test_number;
1910
46973bb2 1911 debug "$first_a_test..$locales_test_number: \$a = $a, \$b = $b, Locale = $Locale\n";
66cbab2c 1912
30032ef4 1913 report_result($Locale, ++$locales_test_number, $ok2);
66cbab2c
KW
1914 $test_names{$locales_test_number} = 'Verify that an intervening sprintf doesn\'t change assignment results';
1915
1916 my $first_c_test = $locales_test_number;
1917
30032ef4 1918 report_result($Locale, ++$locales_test_number, $ok3);
71e5cbb3 1919 $test_names{$locales_test_number} = 'Verify that a different locale radix works when doing "==" with a constant';
b057411d 1920 $problematical_tests{$locales_test_number} = 1;
6be75cd7 1921
30032ef4 1922 report_result($Locale, ++$locales_test_number, $ok4);
71e5cbb3 1923 $test_names{$locales_test_number} = 'Verify that a different locale radix works when doing "==" with a scalar';
b057411d 1924 $problematical_tests{$locales_test_number} = 1;
66cbab2c 1925
30032ef4 1926 report_result($Locale, ++$locales_test_number, $ok5);
71e5cbb3 1927 $test_names{$locales_test_number} = 'Verify that a different locale radix works when doing "==" with a scalar and an intervening sprintf';
b057411d 1928 $problematical_tests{$locales_test_number} = 1;
66cbab2c 1929
46973bb2 1930 debug "$first_c_test..$locales_test_number: \$c = $c, \$d = $d, Locale = $Locale\n";
66cbab2c 1931
30032ef4 1932 report_result($Locale, ++$locales_test_number, $ok6);
b79536ea 1933 $test_names{$locales_test_number} = 'Verify that can assign stringified under inner no-locale block';
71e5cbb3 1934 my $first_e_test = $locales_test_number;
6be75cd7 1935
30032ef4 1936 report_result($Locale, ++$locales_test_number, $ok7);
71e5cbb3 1937 $test_names{$locales_test_number} = 'Verify that "==" with a scalar still works in inner no locale';
66cbab2c 1938
30032ef4 1939 report_result($Locale, ++$locales_test_number, $ok8);
71e5cbb3 1940 $test_names{$locales_test_number} = 'Verify that "==" with a scalar and an intervening sprintf still works in inner no locale';
b057411d 1941 $problematical_tests{$locales_test_number} = 1;
c4093d7d 1942
46973bb2 1943 debug "$first_e_test..$locales_test_number: \$e = $e, no locale\n";
2a680da6 1944
30032ef4 1945 report_result($Locale, ++$locales_test_number, $ok9);
71e5cbb3 1946 $test_names{$locales_test_number} = 'Verify that after a no-locale block, a different locale radix still works when doing "==" with a constant';
b057411d 1947 $problematical_tests{$locales_test_number} = 1;
71e5cbb3 1948 my $first_f_test = $locales_test_number;
6be75cd7 1949
30032ef4 1950 report_result($Locale, ++$locales_test_number, $ok10);
71e5cbb3 1951 $test_names{$locales_test_number} = 'Verify that after a no-locale block, a different locale radix still works when doing "==" with a scalar';
b057411d 1952 $problematical_tests{$locales_test_number} = 1;
66cbab2c 1953
30032ef4 1954 report_result($Locale, ++$locales_test_number, $ok11);
71e5cbb3 1955 $test_names{$locales_test_number} = 'Verify that after a no-locale block, a different locale radix still works when doing "==" with a scalar and an intervening sprintf';
b057411d 1956 $problematical_tests{$locales_test_number} = 1;
906f284f 1957
30032ef4 1958 report_result($Locale, ++$locales_test_number, $ok12);
71e5cbb3 1959 $test_names{$locales_test_number} = 'Verify that after a no-locale block, a different locale radix can participate in an addition and function call as numeric';
b057411d 1960 $problematical_tests{$locales_test_number} = 1;
c4093d7d 1961
30032ef4 1962 report_result($Locale, ++$locales_test_number, $ok13);
71e5cbb3 1963 $test_names{$locales_test_number} = 'Verify that don\'t get warning under "==" even if radix is not a dot';
b057411d 1964 $problematical_tests{$locales_test_number} = 1;
c4093d7d 1965
30032ef4 1966 report_result($Locale, ++$locales_test_number, $ok14);
5320b60d 1967 $test_names{$locales_test_number} = 'Verify that non-ASCII UTF-8 error messages are in UTF-8';
1500bd91 1968
2c6ee1a7
KW
1969 report_result($Locale, ++$locales_test_number, $ok14_5);
1970 $test_names{$locales_test_number} = '... and are ASCII outside "use locale"';
1971
30032ef4 1972 report_result($Locale, ++$locales_test_number, $ok15);
28acfe03
KW
1973 $test_names{$locales_test_number} = 'Verify that a number with a UTF-8 radix has a UTF-8 stringification';
1974
30032ef4 1975 report_result($Locale, ++$locales_test_number, $ok16);
28acfe03
KW
1976 $test_names{$locales_test_number} = 'Verify that a sprintf of a number with a UTF-8 radix yields UTF-8';
1977
e46375fa
KW
1978 report_result($Locale, ++$locales_test_number, $ok17);
1979 $test_names{$locales_test_number} = 'Verify that a sprintf of a number outside locale scope uses a dot radix';
1980
1981 report_result($Locale, ++$locales_test_number, $ok18);
1982 $test_names{$locales_test_number} = 'Verify that a sprintf of a number back within locale scope uses locale radix';
1983
f406a445
KW
1984 report_result($Locale, ++$locales_test_number, $ok19);
1985 $test_names{$locales_test_number} = 'Verify that strftime doesn\'t return "%p" in locales where %p is empty';
1986
9717af6d
KW
1987 report_result($Locale, ++$locales_test_number, $ok20);
1988 $test_names{$locales_test_number} = 'Verify that strftime returns date with UTF-8 flag appropriately set';
6a78954a
KW
1989 $problematical_tests{$locales_test_number} = 1; # This is broken in
1990 # OS X 10.9.3
9717af6d 1991
8ee4b769
KW
1992 report_result($Locale, ++$locales_test_number, $ok21);
1993 $test_names{$locales_test_number} = '"$!" is ASCII only outside of locale scope';
1994
46973bb2 1995 debug "$first_f_test..$locales_test_number: \$f = $f, \$g = $g, back to locale = $Locale\n";
906f284f 1996
26d80d95
LC
1997 # Does taking lc separately differ from taking
1998 # the lc "in-line"? (This was the bug 19990704.002, change #3568.)
1999 # The bug was in the caching of the 'o'-magic.
66cbab2c 2000 if (! $is_utf8_locale) {
2a680da6 2001 use locale;
6be75cd7 2002
2a680da6
JH
2003 sub lcA {
2004 my $lc0 = lc $_[0];
2005 my $lc1 = lc $_[1];
2006 return $lc0 cmp $lc1;
2007 }
6be75cd7 2008
2a680da6
JH
2009 sub lcB {
2010 return lc($_[0]) cmp lc($_[1]);
2011 }
6be75cd7 2012
2a680da6
JH
2013 my $x = "ab";
2014 my $y = "aa";
2015 my $z = "AB";
6be75cd7 2016
30032ef4 2017 report_result($Locale, ++$locales_test_number,
2a680da6
JH
2018 lcA($x, $y) == 1 && lcB($x, $y) == 1 ||
2019 lcA($x, $z) == 0 && lcB($x, $z) == 0);
6be75cd7 2020 }
66cbab2c
KW
2021 else {
2022 use locale ':not_characters';
2023
2024 sub lcC {
2025 my $lc0 = lc $_[0];
2026 my $lc1 = lc $_[1];
2027 return $lc0 cmp $lc1;
2028 }
2029
2030 sub lcD {
2031 return lc($_[0]) cmp lc($_[1]);
2032 }
2033
2034 my $x = "ab";
2035 my $y = "aa";
2036 my $z = "AB";
2037
30032ef4 2038 report_result($Locale, ++$locales_test_number,
66cbab2c
KW
2039 lcC($x, $y) == 1 && lcD($x, $y) == 1 ||
2040 lcC($x, $z) == 0 && lcD($x, $z) == 0);
2041 }
2042 $test_names{$locales_test_number} = 'Verify "lc(foo) cmp lc(bar)" is the same as using intermediaries for the cmp';
d8093b23 2043
26d80d95
LC
2044 # Does lc of an UPPER (if different from the UPPER) match
2045 # case-insensitively the UPPER, and does the UPPER match
2046 # case-insensitively the lc of the UPPER. And vice versa.
3ba0e062 2047 {
ef4a39e5
JH
2048 use locale;
2049 no utf8;
2050 my $re = qr/[\[\(\{\*\+\?\|\^\$\\]/;
2051
2052 my @f = ();
c4093d7d 2053 ++$locales_test_number;
c08acc4c 2054 $test_names{$locales_test_number} = 'Verify case insensitive matching works';
b9df08e4 2055 foreach my $x (sort { ord $a <=> ord $b } keys %UPPER) {
66cbab2c 2056 if (! $is_utf8_locale) {
71e5cbb3
KW
2057 my $y = lc $x;
2058 next unless uc $y eq $x;
46973bb2 2059 debug_more( "UPPER=", disp_chars(($x)),
92891c66 2060 "; lc=", disp_chars(($y)), "; ",
4eac893c 2061 "; fc=", disp_chars((fc $x)), "; ",
92891c66
KW
2062 disp_chars(($x)), "=~/", disp_chars(($y)), "/i=",
2063 $x =~ /$y/i ? 1 : 0,
2064 "; ",
2065 disp_chars(($y)), "=~/", disp_chars(($x)), "/i=",
2066 $y =~ /$x/i ? 1 : 0,
2067 "\n");
71e5cbb3
KW
2068 #
2069 # If $x and $y contain regular expression characters
2070 # AND THEY lowercase (/i) to regular expression characters,
2071 # regcomp() will be mightily confused. No, the \Q doesn't
2072 # help here (maybe regex engine internal lowercasing
2073 # is done after the \Q?) An example of this happening is
2074 # the bg_BG (Bulgarian) locale under EBCDIC (OS/390 USS):
2075 # the chr(173) (the "[") is the lowercase of the chr(235).
2076 #
2077 # Similarly losing EBCDIC locales include cs_cz, cs_CZ,
2078 # el_gr, el_GR, en_us.IBM-037 (!), en_US.IBM-037 (!),
2079 # et_ee, et_EE, hr_hr, hr_HR, hu_hu, hu_HU, lt_LT,
2080 # mk_mk, mk_MK, nl_nl.IBM-037, nl_NL.IBM-037,
2081 # pl_pl, pl_PL, ro_ro, ro_RO, ru_ru, ru_RU,
2082 # sk_sk, sk_SK, sl_si, sl_SI, tr_tr, tr_TR.
2083 #
2084 # Similar things can happen even under (bastardised)
2085 # non-EBCDIC locales: in many European countries before the
2086 # advent of ISO 8859-x nationally customised versions of
2087 # ISO 646 were devised, reusing certain punctuation
2088 # characters for modified characters needed by the
2089 # country/language. For example, the "|" might have
2090 # stood for U+00F6 or LATIN SMALL LETTER O WITH DIAERESIS.
2091 #
2092 if ($x =~ $re || $y =~ $re) {
2093 print "# Regex characters in '$x' or '$y', skipping test $locales_test_number for locale '$Locale'\n";
2094 next;
2095 }
69d77f94 2096 push @f, $x unless $x =~ /$y/i && $y =~ /$x/i;
26c1569f
KW
2097
2098 # fc is not a locale concept, so Perl uses lc for it.
2099 push @f, $x unless lc $x eq fc $x;
66cbab2c
KW
2100 }
2101 else {
2102 use locale ':not_characters';
2103 my $y = lc $x;
2104 next unless uc $y eq $x;
46973bb2 2105 debug_more( "UPPER=", disp_chars(($x)),
92891c66 2106 "; lc=", disp_chars(($y)), "; ",
4eac893c 2107 "; fc=", disp_chars((fc $x)), "; ",
92891c66
KW
2108 disp_chars(($x)), "=~/", disp_chars(($y)), "/i=",
2109 $x =~ /$y/i ? 1 : 0,
2110 "; ",
2111 disp_chars(($y)), "=~/", disp_chars(($x)), "/i=",
2112 $y =~ /$x/i ? 1 : 0,
2113 "\n");
66cbab2c 2114
66cbab2c 2115 push @f, $x unless $x =~ /$y/i && $y =~ /$x/i;
26c1569f
KW
2116
2117 # The places where Unicode's lc is different from fc are
2118 # skipped here by virtue of the 'next unless uc...' line above
2119 push @f, $x unless lc $x eq fc $x;
66cbab2c 2120 }
c00ff1c7 2121 }
ef4a39e5 2122
b9df08e4 2123 foreach my $x (sort { ord $a <=> ord $b } keys %lower) {
66cbab2c 2124 if (! $is_utf8_locale) {
71e5cbb3
KW
2125 my $y = uc $x;
2126 next unless lc $y eq $x;
46973bb2 2127 debug_more( "lower=", disp_chars(($x)),
92891c66 2128 "; uc=", disp_chars(($y)), "; ",
4eac893c 2129 "; fc=", disp_chars((fc $x)), "; ",
92891c66
KW
2130 disp_chars(($x)), "=~/", disp_chars(($y)), "/i=",
2131 $x =~ /$y/i ? 1 : 0,
2132 "; ",
2133 disp_chars(($y)), "=~/", disp_chars(($x)), "/i=",
2134 $y =~ /$x/i ? 1 : 0,
2135 "\n");
71e5cbb3
KW
2136 if ($x =~ $re || $y =~ $re) { # See above.
2137 print "# Regex characters in '$x' or '$y', skipping test $locales_test_number for locale '$Locale'\n";
2138 next;
2139 }
69d77f94 2140 push @f, $x unless $x =~ /$y/i && $y =~ /$x/i;
26c1569f
KW
2141
2142 push @f, $x unless lc $x eq fc $x;
66cbab2c
KW
2143 }
2144 else {
2145 use locale ':not_characters';
2146 my $y = uc $x;
2147 next unless lc $y eq $x;
46973bb2 2148 debug_more( "lower=", disp_chars(($x)),
92891c66 2149 "; uc=", disp_chars(($y)), "; ",
4eac893c 2150 "; fc=", disp_chars((fc $x)), "; ",
92891c66
KW
2151 disp_chars(($x)), "=~/", disp_chars(($y)), "/i=",
2152 $x =~ /$y/i ? 1 : 0,
2153 "; ",
2154 disp_chars(($y)), "=~/", disp_chars(($x)), "/i=",
2155 $y =~ /$x/i ? 1 : 0,
2156 "\n");
66cbab2c 2157 push @f, $x unless $x =~ /$y/i && $y =~ /$x/i;
26c1569f
KW
2158
2159 push @f, $x unless lc $x eq fc $x;
66cbab2c 2160 }
c00ff1c7 2161 }
7c844d17 2162 report_multi_result($Locale, $locales_test_number, \@f);
5b1f18a7 2163 $problematical_tests{$locales_test_number} = 1;
d8093b23 2164 }
78787052
JL
2165
2166 # [perl #109318]
2167 {
2168 my @f = ();
2169 ++$locales_test_number;
2170 $test_names{$locales_test_number} = 'Verify atof with locale radix and negative exponent';
b057411d 2171 $problematical_tests{$locales_test_number} = 1;
78787052
JL
2172
2173 my $radix = POSIX::localeconv()->{decimal_point};
2174 my @nums = (
2175 "3.14e+9", "3${radix}14e+9", "3.14e-9", "3${radix}14e-9",
2176 "-3.14e+9", "-3${radix}14e+9", "-3.14e-9", "-3${radix}14e-9",
2177 );
2178
2179 if (! $is_utf8_locale) {
2180 use locale;
2181 for my $num (@nums) {
2182 push @f, $num
2183 unless sprintf("%g", $num) =~ /3.+14/;
2184 }
2185 }
2186 else {
2187 use locale ':not_characters';
2188 for my $num (@nums) {
2189 push @f, $num
2190 unless sprintf("%g", $num) =~ /3.+14/;
2191 }
2192 }
2193
30032ef4 2194 report_result($Locale, $locales_test_number, @f == 0);
78787052
JL
2195 if (@f) {
2196 print "# failed $locales_test_number locale '$Locale' numbers @f\n"
2197 }
2198 }
8ebc5c01 2199}
284102e8 2200
c4093d7d 2201my $final_locales_test_number = $locales_test_number;
6cf0b567 2202
2a680da6
JH
2203# Recount the errors.
2204
0d3899b1 2205foreach $test_num ($first_locales_test_number..$final_locales_test_number) {
c4093d7d
KW
2206 if (%setlocale_failed) {
2207 print "not ";
2208 }
0d3899b1 2209 elsif ($Problem{$test_num} || !defined $Okay{$test_num} || !@{$Okay{$test_num}}) {
c4093d7d 2210 if (defined $not_necessarily_a_problem_test_number
0d3899b1 2211 && $test_num == $not_necessarily_a_problem_test_number)
c4093d7d
KW
2212 {
2213 print "# The failure of test $not_necessarily_a_problem_test_number is not necessarily fatal.\n";
b4e009be 2214 print "# It usually indicates a problem in the environment,\n";
284102e8
JH
2215 print "# not in Perl itself.\n";
2216 }
5b1f18a7 2217 if ($Okay{$test_num} && grep { $_ == $test_num } keys %problematical_tests) {
d401967c 2218 no warnings 'experimental::autoderef';
0a974e2d 2219 # Round to nearest .1%
0d3899b1 2220 my $percent_fail = (int(.5 + (1000 * scalar(keys $Problem{$test_num})
0a974e2d
KW
2221 / scalar(@Locale))))
2222 / 10;
a20d09fc
KW
2223 if ($percent_fail < $acceptable_failure_percentage) {
2224 if (! $debug) {
2225 $test_names{$test_num} .= 'TODO';
2226 print "# ", 100 - $percent_fail, "% of locales pass the following test, so it is likely that the failures\n";
2227 print "# are errors in the locale definitions. The test is marked TODO, as the\n";
2228 print "# problem is not likely to be Perl's\n";
2229 }
2230 }
2231 elsif ($debug) {
2232 print "# $percent_fail% of locales (",
2233 scalar(keys $Problem{$test_num}),
2234 " of ",
2235 scalar(@Locale),
2236 ") fail the following test\n";
6d5d702a
KW
2237 }
2238 }
f5627fc1
KW
2239 print "#\n";
2240 if ($debug) {
2241 print "# The code points that had this failure are given above. Look for lines\n";
0d3899b1 2242 print "# that match 'failed $test_num'\n";
f5627fc1
KW
2243 }
2244 else {
2245 print "# For more details, rerun, with environment variable PERL_DEBUG_FULL_TEST=1.\n";
0d3899b1 2246 print "# Then look at that output for lines that match 'failed $test_num'\n";
108a305e 2247 }
284102e8 2248 print "not ";
8ebc5c01 2249 }
0d3899b1
KW
2250 print "ok $test_num";
2251 if (defined $test_names{$test_num}) {
6c2e653d 2252 # If TODO is in the test name, make it thus
0d3899b1
KW
2253 my $todo = $test_names{$test_num} =~ s/TODO\s*//;
2254 print " $test_names{$test_num}";
6c2e653d
KW
2255 print " # TODO" if $todo;
2256 }
c4093d7d 2257 print "\n";
8ebc5c01 2258}
fb73857a 2259
c4093d7d 2260$test_num = $final_locales_test_number;
c213d471 2261
4bc67723 2262unless ( $^O =~ m!^(dragonfly|openbsd|bitrig|mirbsd)$! ) {
083ace8f 2263 # perl #115808
fbd840df
KW
2264 use warnings;
2265 my $warned = 0;
2266 local $SIG{__WARN__} = sub {
2267 $warned = $_[0] =~ /uninitialized/;
2268 };
2269 my $z = "y" . setlocale(&POSIX::LC_ALL, "xyzzy");
2270 ok($warned, "variable set to setlocale(BAD LOCALE) is considered uninitialized");
2271}
2272
094a2f8c 2273# Test that tainting and case changing works on utf8 strings. These tests are
1f5852c9
KW
2274# placed last to avoid disturbing the hard-coded test numbers that existed at
2275# the time these were added above this in this file.
0099bb8d
KW
2276# This also tests that locale overrides unicode_strings in the same scope for
2277# non-utf8 strings.
a810e350 2278setlocale(&POSIX::LC_ALL, "C");
094a2f8c
KW
2279{
2280 use locale;
0099bb8d 2281 use feature 'unicode_strings';
094a2f8c 2282
26c1569f 2283 foreach my $function ("uc", "ucfirst", "lc", "lcfirst", "fc") {
094a2f8c
KW
2284 my @list; # List of code points to test for $function
2285
2286 # Used to calculate the changed case for ASCII characters by using the
2287 # ord, instead of using one of the functions under test.
2288 my $ascii_case_change_delta;
2289 my $above_latin1_case_change_delta; # Same for the specific ords > 255
2290 # that we use
2291
445bf929 2292 # We test an ASCII character, which should change case;
094a2f8c 2293 # a Latin1 character, which shouldn't change case under this C locale,
094a2f8c 2294 # an above-Latin1 character that when the case is changed would cross
445bf929
KW
2295 # the 255/256 boundary, so doesn't change case
2296 # (the \x{149} is one of these, but changes into 2 characters, the
094a2f8c
KW
2297 # first one of which doesn't cross the boundary.
2298 # the final one in each list is an above-Latin1 character whose case
445bf929
KW
2299 # does change. The code below uses its position in its list as a
2300 # marker to indicate that it, unlike the other code points above
2301 # ASCII, has a successful case change
2302 #
2303 # All casing operations under locale (but not :not_characters) should
2304 # taint
094a2f8c 2305 if ($function =~ /^u/) {
094a2f8c
KW
2306 @list = ("", "a", "\xe0", "\xff", "\x{fb00}", "\x{149}", "\x{101}");
2307 $ascii_case_change_delta = -32;
2308 $above_latin1_case_change_delta = -1;
2309 }
2310 else {
1ca267a5 2311 @list = ("", "A", "\xC0", "\x{17F}", "\x{100}");
094a2f8c
KW
2312 $ascii_case_change_delta = +32;
2313 $above_latin1_case_change_delta = +1;
2314 }
66cbab2c 2315 foreach my $is_utf8_locale (0 .. 1) {
71e5cbb3
KW
2316 foreach my $j (0 .. $#list) {
2317 my $char = $list[$j];
0099bb8d
KW
2318
2319 for my $encoded_in_utf8 (0 .. 1) {
faf0c248
KW
2320 my $should_be;
2321 my $changed;
2322 if (! $is_utf8_locale) {
2323 $should_be = ($j == $#list)
2324 ? chr(ord($char) + $above_latin1_case_change_delta)
2325 : (length $char == 0 || ord($char) > 127)
2326 ? $char
2327 : chr(ord($char) + $ascii_case_change_delta);
2328
2329 # This monstrosity is in order to avoid using an eval,
2330 # which might perturb the results
2331 $changed = ($function eq "uc")
2332 ? uc($char)
2333 : ($function eq "ucfirst")
2334 ? ucfirst($char)
2335 : ($function eq "lc")
2336 ? lc($char)
2337 : ($function eq "lcfirst")
2338 ? lcfirst($char)
26c1569f
KW
2339 : ($function eq "fc")
2340 ? fc($char)
faf0c248
KW
2341 : die("Unexpected function \"$function\"");
2342 }
2343 else {
2344 {
2345 no locale;
71e5cbb3 2346
faf0c248
KW
2347 # For utf8-locales the case changing functions
2348 # should work just like they do outside of locale.
2349 # Can use eval here because not testing it when
2350 # not in locale.
2351 $should_be = eval "$function('$char')";
2352 die "Unexpected eval error $@ from 'eval \"$function('$char')\"'" if $@;
71e5cbb3 2353
faf0c248
KW
2354 }
2355 use locale ':not_characters';
2356 $changed = ($function eq "uc")
2357 ? uc($char)
2358 : ($function eq "ucfirst")
2359 ? ucfirst($char)
2360 : ($function eq "lc")
2361 ? lc($char)
2362 : ($function eq "lcfirst")
2363 ? lcfirst($char)
26c1569f
KW
2364 : ($function eq "fc")
2365 ? fc($char)
faf0c248 2366 : die("Unexpected function \"$function\"");
71e5cbb3 2367 }
faf0c248
KW
2368 ok($changed eq $should_be,
2369 "$function(\"$char\") in C locale "
2370 . (($is_utf8_locale)
2371 ? "(use locale ':not_characters'"
2372 : "(use locale")
2373 . (($encoded_in_utf8)
2374 ? "; encoded in utf8)"
2375 : "; not encoded in utf8)")
2376 . " should be \"$should_be\", got \"$changed\"");
2377
445bf929
KW
2378 # Tainting shouldn't happen for use locale :not_character
2379 # (a utf8 locale)
2380 (! $is_utf8_locale)
faf0c248
KW
2381 ? check_taint($changed)
2382 : check_taint_not($changed);
2383
2384 # Use UTF-8 next time through the loop
2385 utf8::upgrade($char);
0099bb8d 2386 }
66cbab2c 2387 }
094a2f8c
KW
2388 }
2389 }
2390}
2391
1bfe8fea
KW
2392# Give final advice.
2393
2394my $didwarn = 0;
2395
2396foreach ($first_locales_test_number..$final_locales_test_number) {
2397 if ($Problem{$_}) {
2398 my @f = sort keys %{ $Problem{$_} };
2399 my $f = join(" ", @f);
2400 $f =~ s/(.{50,60}) /$1\n#\t/g;
2401 print
2402 "#\n",
2403 "# The locale ", (@f == 1 ? "definition" : "definitions"), "\n#\n",
2404 "#\t", $f, "\n#\n",
2405 "# on your system may have errors because the locale test $_\n",
2406 "# \"$test_names{$_}\"\n",
2407 "# failed in ", (@f == 1 ? "that locale" : "those locales"),
2408 ".\n";
2409 print <<EOW;
2410#
2411# If your users are not using these locales you are safe for the moment,
2412# but please report this failure first to perlbug\@perl.com using the
2413# perlbug script (as described in the INSTALL file) so that the exact
2414# details of the failures can be sorted out first and then your operating
2415# system supplier can be alerted about these anomalies.
2416#
2417EOW
2418 $didwarn = 1;
2419 }
2420}
2421
2422# Tell which locales were okay and which were not.
2423
2424if ($didwarn) {
2425 my (@s, @F);
2426
2427 foreach my $l (@Locale) {
2428 my $p = 0;
2429 if ($setlocale_failed{$l}) {
2430 $p++;
2431 }
2432 else {
2433 foreach my $t
2434 ($first_locales_test_number..$final_locales_test_number)
2435 {
2436 $p++ if $Problem{$t}{$l};
2437 }
2438 }
2439 push @s, $l if $p == 0;
2440 push @F, $l unless $p == 0;
2441 }
2442
2443 if (@s) {
2444 my $s = join(" ", @s);
2445 $s =~ s/(.{50,60}) /$1\n#\t/g;
2446
a5cf558a 2447 print
1bfe8fea
KW
2448 "# The following locales\n#\n",
2449 "#\t", $s, "\n#\n",
2450 "# tested okay.\n#\n",
2451 } else {
a5cf558a 2452 print "# None of your locales were fully okay.\n";
1bfe8fea
KW
2453 }
2454
2455 if (@F) {
2456 my $F = join(" ", @F);
2457 $F =~ s/(.{50,60}) /$1\n#\t/g;
2458
c7e7f6b7
KW
2459 my $details = "";
2460 unless ($debug) {
2461 $details = "# For more details, rerun, with environment variable PERL_DEBUG_FULL_TEST=1.\n";
2462 }
2463 elsif ($debug == 1) {
2464 $details = "# For even more details, rerun, with environment variable PERL_DEBUG_FULL_TEST=2.\n";
2465 }
2466
a5cf558a 2467 print
1bfe8fea
KW
2468 "# The following locales\n#\n",
2469 "#\t", $F, "\n#\n",
2470 "# had problems.\n#\n",
c7e7f6b7 2471 $details;
1bfe8fea 2472 } else {
a5cf558a 2473 print "# None of your locales were broken.\n";
1bfe8fea
KW
2474 }
2475}
2476
fdf053ee 2477print "1..$test_num\n";
906f284f 2478
90248788 2479# eof