Commit | Line | Data |
---|---|---|
8ebc5c01 | 1 | #!./perl -wT |
2 | ||
66cbab2c KW |
3 | # This tests plain 'use locale' and adorned 'use locale ":not_characters"' |
4 | # Because these pragmas are compile time, and I (khw) am trying to test | |
5 | # without using 'eval' as much as possible, which might cloud the issue, the | |
6 | # crucial parts of the code are duplicated in a block for each pragma. | |
7 | ||
6c2e653d KW |
8 | # To make a TODO test, add the string 'TODO' to its %test_names value |
9 | ||
e3a2734b KW |
10 | binmode STDOUT, ':utf8'; |
11 | binmode STDERR, ':utf8'; | |
12 | ||
8ebc5c01 | 13 | BEGIN { |
14 | chdir 't' if -d 't'; | |
20822f61 | 15 | @INC = '../lib'; |
f9cbebe1 | 16 | unshift @INC, '.'; |
b002077a | 17 | require Config; import Config; |
97a0514d | 18 | if (!$Config{d_setlocale} || $Config{ccflags} =~ /\bD?NO_LOCALE\b/) { |
b002077a CS |
19 | print "1..0\n"; |
20 | exit; | |
21 | } | |
2de3dbcc | 22 | $| = 1; |
8ebc5c01 | 23 | } |
24 | ||
25 | use strict; | |
26c1569f | 26 | use feature 'fc'; |
8ebc5c01 | 27 | |
92891c66 | 28 | # =1 adds debugging output; =2 increases the verbosity somewhat |
108a305e | 29 | my $debug = $ENV{PERL_DEBUG_FULL_TEST} // 0; |
284102e8 | 30 | |
6d5d702a KW |
31 | # Certain tests have been shown to be problematical for a few locales. Don't |
32 | # fail them unless at least this percentage of the tested locales fail. | |
8321c25f KW |
33 | # Some Windows machines are defective in every locale but the C, calling \t |
34 | # printable; superscripts to be digits, etc. See | |
662399e7 KW |
35 | # http://markmail.org/message/5jwam4xsx4amsdnv. Also on AIX machines, many |
36 | # locales call a no-break space a graphic. | |
df33771d | 37 | # (There aren't 1000 locales currently in existence, so 99.9 works) |
662399e7 KW |
38 | my $acceptable_fold_failure_percentage = ($^O =~ / ^ ( MSWin32 | AIX ) $ /ix) |
39 | ? 99.9 | |
40 | : 5; | |
6d5d702a | 41 | |
7493b8f2 KW |
42 | # The list of test numbers of the problematic tests. |
43 | my @problematical_tests; | |
44 | ||
45 | ||
db4b7445 A |
46 | use Dumpvalue; |
47 | ||
48 | my $dumper = Dumpvalue->new( | |
49 | tick => qq{"}, | |
50 | quoteHighBit => 0, | |
51 | unctrl => "quote" | |
52 | ); | |
6be75cd7 | 53 | sub debug { |
db4b7445 A |
54 | return unless $debug; |
55 | my($mess) = join "", @_; | |
56 | chop $mess; | |
57 | print $dumper->stringify($mess,1), "\n"; | |
6be75cd7 JH |
58 | } |
59 | ||
92891c66 KW |
60 | sub debug_more { |
61 | return unless $debug > 1; | |
62 | return debug(@_); | |
63 | } | |
64 | ||
6be75cd7 JH |
65 | sub debugf { |
66 | printf @_ if $debug; | |
67 | } | |
68 | ||
8ebc5c01 | 69 | my $have_setlocale = 0; |
70 | eval { | |
71 | require POSIX; | |
72 | import POSIX ':locale_h'; | |
73 | $have_setlocale++; | |
74 | }; | |
75 | ||
6dead956 | 76 | # Visual C's CRT goes silly on strings of the form "en_US.ISO8859-1" |
f6c6487a | 77 | # and mingw32 uses said silly CRT |
3a2d1764 SH |
78 | # This doesn't seem to be an issue any more, at least on Windows XP, |
79 | # so re-enable the tests for Windows XP onwards. | |
80 | my $winxp = ($^O eq 'MSWin32' && defined &Win32::GetOSVersion && | |
81 | join('.', (Win32::GetOSVersion())[1..2]) >= 5.1); | |
82 | $have_setlocale = 0 if ((($^O eq 'MSWin32' && !$winxp) || $^O eq 'NetWare') && | |
83 | $Config{cc} =~ /^(cl|gcc)/i); | |
6dead956 | 84 | |
36a42ae7 | 85 | # UWIN seems to loop after taint tests, just skip for now |
cd19b65c JH |
86 | $have_setlocale = 0 if ($^O =~ /^uwin/); |
87 | ||
0e053d1e | 88 | $a = 'abc %'; |
8ebc5c01 | 89 | |
c213d471 KW |
90 | my $test_num = 0; |
91 | ||
8ebc5c01 | 92 | sub ok { |
c213d471 | 93 | my ($result, $message) = @_; |
e3a2734b | 94 | $message = "" unless defined $message; |
8ebc5c01 | 95 | |
96 | print 'not ' unless ($result); | |
c213d471 | 97 | print "ok " . ++$test_num; |
e3a2734b KW |
98 | print " $message"; |
99 | print "\n"; | |
8ebc5c01 | 100 | } |
101 | ||
102 | # First we'll do a lot of taint checking for locales. | |
103 | # This is the easiest to test, actually, as any locale, | |
104 | # even the default locale will taint under 'use locale'. | |
105 | ||
106 | sub is_tainted { # hello, camel two. | |
9f1b1f2d | 107 | no warnings 'uninitialized' ; |
8ebc5c01 | 108 | my $dummy; |
ba74571d | 109 | local $@; |
8ebc5c01 | 110 | not eval { $dummy = join("", @_), kill 0; 1 } |
111 | } | |
112 | ||
a9b7c637 KW |
113 | sub check_taint ($;$) { |
114 | my $message_tail = $_[1] // ""; | |
115 | $message_tail = ": $message_tail" if $message_tail; | |
116 | ok is_tainted($_[0]), "verify that is tainted$message_tail"; | |
8ebc5c01 | 117 | } |
118 | ||
a9b7c637 KW |
119 | sub check_taint_not ($;$) { |
120 | my $message_tail = $_[1] // ""; | |
121 | $message_tail = ": $message_tail" if $message_tail; | |
122 | ok((not is_tainted($_[0])), "verify that isn't tainted$message_tail"); | |
8ebc5c01 | 123 | } |
124 | ||
bf3cd0e6 KW |
125 | "\tb\t" =~ /^m?(\s)(.*)\1$/; |
126 | check_taint_not $&, "not tainted outside 'use locale'"; | |
127 | ; | |
128 | ||
8ebc5c01 | 129 | use locale; # engage locale and therefore locale taint. |
130 | ||
36a42ae7 | 131 | check_taint_not $a; |
8ebc5c01 | 132 | |
36a42ae7 KW |
133 | check_taint uc($a); |
134 | check_taint "\U$a"; | |
135 | check_taint ucfirst($a); | |
136 | check_taint "\u$a"; | |
137 | check_taint lc($a); | |
26c1569f | 138 | check_taint fc($a); |
36a42ae7 | 139 | check_taint "\L$a"; |
26c1569f | 140 | check_taint "\F$a"; |
36a42ae7 KW |
141 | check_taint lcfirst($a); |
142 | check_taint "\l$a"; | |
8ebc5c01 | 143 | |
36a42ae7 KW |
144 | check_taint_not sprintf('%e', 123.456); |
145 | check_taint_not sprintf('%f', 123.456); | |
146 | check_taint_not sprintf('%g', 123.456); | |
147 | check_taint_not sprintf('%d', 123.456); | |
148 | check_taint_not sprintf('%x', 123.456); | |
8ebc5c01 | 149 | |
150 | $_ = $a; # untaint $_ | |
151 | ||
152 | $_ = uc($a); # taint $_ | |
153 | ||
36a42ae7 | 154 | check_taint $_; |
8ebc5c01 | 155 | |
156 | /(\w)/; # taint $&, $`, $', $+, $1. | |
36a42ae7 KW |
157 | check_taint $&; |
158 | check_taint $`; | |
159 | check_taint $'; | |
160 | check_taint $+; | |
161 | check_taint $1; | |
162 | check_taint_not $2; | |
8ebc5c01 | 163 | |
164 | /(.)/; # untaint $&, $`, $', $+, $1. | |
36a42ae7 KW |
165 | check_taint_not $&; |
166 | check_taint_not $`; | |
167 | check_taint_not $'; | |
168 | check_taint_not $+; | |
169 | check_taint_not $1; | |
170 | check_taint_not $2; | |
8ebc5c01 | 171 | |
172 | /(\W)/; # taint $&, $`, $', $+, $1. | |
36a42ae7 KW |
173 | check_taint $&; |
174 | check_taint $`; | |
175 | check_taint $'; | |
176 | check_taint $+; | |
177 | check_taint $1; | |
178 | check_taint_not $2; | |
8ebc5c01 | 179 | |
180 | /(\s)/; # taint $&, $`, $', $+, $1. | |
36a42ae7 KW |
181 | check_taint $&; |
182 | check_taint $`; | |
183 | check_taint $'; | |
184 | check_taint $+; | |
185 | check_taint $1; | |
186 | check_taint_not $2; | |
8ebc5c01 | 187 | |
188 | /(\S)/; # taint $&, $`, $', $+, $1. | |
36a42ae7 KW |
189 | check_taint $&; |
190 | check_taint $`; | |
191 | check_taint $'; | |
192 | check_taint $+; | |
193 | check_taint $1; | |
194 | check_taint_not $2; | |
8ebc5c01 | 195 | |
196 | $_ = $a; # untaint $_ | |
197 | ||
36a42ae7 | 198 | check_taint_not $_; |
8ebc5c01 | 199 | |
200 | /(b)/; # this must not taint | |
36a42ae7 KW |
201 | check_taint_not $&; |
202 | check_taint_not $`; | |
203 | check_taint_not $'; | |
204 | check_taint_not $+; | |
205 | check_taint_not $1; | |
206 | check_taint_not $2; | |
8ebc5c01 | 207 | |
208 | $_ = $a; # untaint $_ | |
209 | ||
36a42ae7 | 210 | check_taint_not $_; |
8ebc5c01 | 211 | |
212 | $b = uc($a); # taint $b | |
213 | s/(.+)/$b/; # this must taint only the $_ | |
214 | ||
36a42ae7 KW |
215 | check_taint $_; |
216 | check_taint_not $&; | |
217 | check_taint_not $`; | |
218 | check_taint_not $'; | |
219 | check_taint_not $+; | |
220 | check_taint_not $1; | |
221 | check_taint_not $2; | |
8ebc5c01 | 222 | |
223 | $_ = $a; # untaint $_ | |
224 | ||
225 | s/(.+)/b/; # this must not taint | |
36a42ae7 KW |
226 | check_taint_not $_; |
227 | check_taint_not $&; | |
228 | check_taint_not $`; | |
229 | check_taint_not $'; | |
230 | check_taint_not $+; | |
231 | check_taint_not $1; | |
232 | check_taint_not $2; | |
8ebc5c01 | 233 | |
234 | $b = $a; # untaint $b | |
235 | ||
236 | ($b = $a) =~ s/\w/$&/; | |
36a42ae7 KW |
237 | check_taint $b; # $b should be tainted. |
238 | check_taint_not $a; # $a should be not. | |
8ebc5c01 | 239 | |
240 | $_ = $a; # untaint $_ | |
241 | ||
242 | s/(\w)/\l$1/; # this must taint | |
36a42ae7 KW |
243 | check_taint $_; |
244 | check_taint $&; | |
245 | check_taint $`; | |
246 | check_taint $'; | |
247 | check_taint $+; | |
248 | check_taint $1; | |
249 | check_taint_not $2; | |
8ebc5c01 | 250 | |
251 | $_ = $a; # untaint $_ | |
252 | ||
253 | s/(\w)/\L$1/; # this must taint | |
36a42ae7 KW |
254 | check_taint $_; |
255 | check_taint $&; | |
256 | check_taint $`; | |
257 | check_taint $'; | |
258 | check_taint $+; | |
259 | check_taint $1; | |
260 | check_taint_not $2; | |
8ebc5c01 | 261 | |
262 | $_ = $a; # untaint $_ | |
263 | ||
264 | s/(\w)/\u$1/; # this must taint | |
36a42ae7 KW |
265 | check_taint $_; |
266 | check_taint $&; | |
267 | check_taint $`; | |
268 | check_taint $'; | |
269 | check_taint $+; | |
270 | check_taint $1; | |
271 | check_taint_not $2; | |
8ebc5c01 | 272 | |
273 | $_ = $a; # untaint $_ | |
274 | ||
275 | s/(\w)/\U$1/; # this must taint | |
36a42ae7 KW |
276 | check_taint $_; |
277 | check_taint $&; | |
278 | check_taint $`; | |
279 | check_taint $'; | |
280 | check_taint $+; | |
281 | check_taint $1; | |
282 | check_taint_not $2; | |
8ebc5c01 | 283 | |
faab3281 KW |
284 | # After all this tainting $a should be cool. |
285 | ||
286 | check_taint_not $a; | |
287 | ||
b99851e1 KW |
288 | "a" =~ /([a-z])/; |
289 | check_taint_not $1, '"a" =~ /([a-z])/'; | |
290 | "foo.bar_baz" =~ /^(.*)[._](.*?)$/; # Bug 120675 | |
291 | check_taint_not $1, '"foo.bar_baz" =~ /^(.*)[._](.*?)$/'; | |
292 | ||
7a9d1c02 | 293 | # BE SURE TO COPY ANYTHING YOU ADD to the block below |
8ebc5c01 | 294 | |
66cbab2c KW |
295 | { # This is just the previous tests copied here with a different |
296 | # compile-time pragma. | |
297 | ||
298 | use locale ':not_characters'; # engage restricted locale with different | |
299 | # tainting rules | |
300 | ||
301 | check_taint_not $a; | |
302 | ||
303 | check_taint_not uc($a); | |
304 | check_taint_not "\U$a"; | |
305 | check_taint_not ucfirst($a); | |
306 | check_taint_not "\u$a"; | |
307 | check_taint_not lc($a); | |
26c1569f | 308 | check_taint_not fc($a); |
66cbab2c | 309 | check_taint_not "\L$a"; |
26c1569f | 310 | check_taint_not "\F$a"; |
66cbab2c KW |
311 | check_taint_not lcfirst($a); |
312 | check_taint_not "\l$a"; | |
313 | ||
314 | check_taint_not sprintf('%e', 123.456); | |
315 | check_taint_not sprintf('%f', 123.456); | |
316 | check_taint_not sprintf('%g', 123.456); | |
317 | check_taint_not sprintf('%d', 123.456); | |
318 | check_taint_not sprintf('%x', 123.456); | |
319 | ||
320 | $_ = $a; # untaint $_ | |
321 | ||
322 | $_ = uc($a); # taint $_ | |
323 | ||
324 | check_taint_not $_; | |
325 | ||
326 | /(\w)/; # taint $&, $`, $', $+, $1. | |
327 | check_taint_not $&; | |
328 | check_taint_not $`; | |
329 | check_taint_not $'; | |
330 | check_taint_not $+; | |
331 | check_taint_not $1; | |
332 | check_taint_not $2; | |
333 | ||
334 | /(.)/; # untaint $&, $`, $', $+, $1. | |
335 | check_taint_not $&; | |
336 | check_taint_not $`; | |
337 | check_taint_not $'; | |
338 | check_taint_not $+; | |
339 | check_taint_not $1; | |
340 | check_taint_not $2; | |
341 | ||
342 | /(\W)/; # taint $&, $`, $', $+, $1. | |
343 | check_taint_not $&; | |
344 | check_taint_not $`; | |
345 | check_taint_not $'; | |
346 | check_taint_not $+; | |
347 | check_taint_not $1; | |
348 | check_taint_not $2; | |
349 | ||
350 | /(\s)/; # taint $&, $`, $', $+, $1. | |
351 | check_taint_not $&; | |
352 | check_taint_not $`; | |
353 | check_taint_not $'; | |
354 | check_taint_not $+; | |
355 | check_taint_not $1; | |
356 | check_taint_not $2; | |
357 | ||
358 | /(\S)/; # taint $&, $`, $', $+, $1. | |
359 | check_taint_not $&; | |
360 | check_taint_not $`; | |
361 | check_taint_not $'; | |
362 | check_taint_not $+; | |
363 | check_taint_not $1; | |
364 | check_taint_not $2; | |
365 | ||
366 | $_ = $a; # untaint $_ | |
367 | ||
368 | check_taint_not $_; | |
369 | ||
370 | /(b)/; # this must not taint | |
371 | check_taint_not $&; | |
372 | check_taint_not $`; | |
373 | check_taint_not $'; | |
374 | check_taint_not $+; | |
375 | check_taint_not $1; | |
376 | check_taint_not $2; | |
377 | ||
378 | $_ = $a; # untaint $_ | |
379 | ||
380 | check_taint_not $_; | |
381 | ||
382 | $b = uc($a); # taint $b | |
383 | s/(.+)/$b/; # this must taint only the $_ | |
384 | ||
385 | check_taint_not $_; | |
386 | check_taint_not $&; | |
387 | check_taint_not $`; | |
388 | check_taint_not $'; | |
389 | check_taint_not $+; | |
390 | check_taint_not $1; | |
391 | check_taint_not $2; | |
392 | ||
393 | $_ = $a; # untaint $_ | |
394 | ||
395 | s/(.+)/b/; # this must not taint | |
396 | check_taint_not $_; | |
397 | check_taint_not $&; | |
398 | check_taint_not $`; | |
399 | check_taint_not $'; | |
400 | check_taint_not $+; | |
401 | check_taint_not $1; | |
402 | check_taint_not $2; | |
403 | ||
404 | $b = $a; # untaint $b | |
405 | ||
406 | ($b = $a) =~ s/\w/$&/; | |
407 | check_taint_not $b; # $b should be tainted. | |
408 | check_taint_not $a; # $a should be not. | |
409 | ||
410 | $_ = $a; # untaint $_ | |
411 | ||
412 | s/(\w)/\l$1/; # this must taint | |
413 | check_taint_not $_; | |
414 | check_taint_not $&; | |
415 | check_taint_not $`; | |
416 | check_taint_not $'; | |
417 | check_taint_not $+; | |
418 | check_taint_not $1; | |
419 | check_taint_not $2; | |
420 | ||
421 | $_ = $a; # untaint $_ | |
422 | ||
423 | s/(\w)/\L$1/; # this must taint | |
424 | check_taint_not $_; | |
425 | check_taint_not $&; | |
426 | check_taint_not $`; | |
427 | check_taint_not $'; | |
428 | check_taint_not $+; | |
429 | check_taint_not $1; | |
430 | check_taint_not $2; | |
431 | ||
432 | $_ = $a; # untaint $_ | |
433 | ||
434 | s/(\w)/\u$1/; # this must taint | |
435 | check_taint_not $_; | |
436 | check_taint_not $&; | |
437 | check_taint_not $`; | |
438 | check_taint_not $'; | |
439 | check_taint_not $+; | |
440 | check_taint_not $1; | |
441 | check_taint_not $2; | |
442 | ||
443 | $_ = $a; # untaint $_ | |
444 | ||
445 | s/(\w)/\U$1/; # this must taint | |
446 | check_taint_not $_; | |
447 | check_taint_not $&; | |
448 | check_taint_not $`; | |
449 | check_taint_not $'; | |
450 | check_taint_not $+; | |
451 | check_taint_not $1; | |
452 | check_taint_not $2; | |
453 | ||
454 | # After all this tainting $a should be cool. | |
455 | ||
456 | check_taint_not $a; | |
7a9d1c02 KW |
457 | |
458 | "a" =~ /([a-z])/; | |
459 | check_taint_not $1, '"a" =~ /([a-z])/'; | |
460 | "foo.bar_baz" =~ /^(.*)[._](.*?)$/; # Bug 120675 | |
461 | check_taint_not $1, '"foo.bar_baz" =~ /^(.*)[._](.*?)$/'; | |
66cbab2c KW |
462 | } |
463 | ||
464 | # Here are in scope of 'use locale' | |
465 | ||
8ebc5c01 | 466 | # I think we've seen quite enough of taint. |
467 | # Let us do some *real* locale work now, | |
284102e8 | 468 | # unless setlocale() is missing (i.e. minitest). |
8ebc5c01 | 469 | |
fdf053ee KW |
470 | unless ($have_setlocale) { |
471 | print "1..$test_num\n"; | |
472 | exit; | |
473 | } | |
8ebc5c01 | 474 | |
6cf0b567 | 475 | # The test number before our first setlocale() |
66330f13 | 476 | my $final_without_setlocale = $test_num; |
6cf0b567 | 477 | |
284102e8 JH |
478 | # Find locales. |
479 | ||
6be75cd7 JH |
480 | debug "# Scanning for locales...\n"; |
481 | ||
482 | # Note that it's okay that some languages have their native names | |
483 | # capitalized here even though that's not "right". They are lowercased | |
484 | # anyway later during the scanning process (and besides, some clueless | |
98dc9551 | 485 | # vendor might have them capitalized erroneously anyway). |
6be75cd7 | 486 | |
284102e8 | 487 | my $locales = <<EOF; |
6be75cd7 | 488 | Afrikaans:af:za:1 15 |
284102e8 | 489 | Arabic:ar:dz eg sa:6 arabic8 |
6be75cd7 JH |
490 | Brezhoneg Breton:br:fr:1 15 |
491 | Bulgarski Bulgarian:bg:bg:5 | |
dd8482fc | 492 | Chinese:zh:cn tw:cn.EUC eucCN eucTW euc.CN euc.TW Big5 GB2312 tw.EUC |
6be75cd7 JH |
493 | Hrvatski Croatian:hr:hr:2 |
494 | Cymraeg Welsh:cy:cy:1 14 15 | |
284102e8 | 495 | Czech:cs:cz:2 |
df8a53a3 | 496 | Dansk Danish:da:dk:1 15 |
6be75cd7 | 497 | Nederlands Dutch:nl:be nl:1 15 |
dd8482fc | 498 | English American British:en:au ca gb ie nz us uk zw:1 15 cp850 |
6be75cd7 JH |
499 | Esperanto:eo:eo:3 |
500 | Eesti Estonian:et:ee:4 6 13 | |
501 | Suomi Finnish:fi:fi:1 15 | |
502 | Flamish::fl:1 15 | |
6be75cd7 JH |
503 | Deutsch German:de:at be ch de lu:1 15 |
504 | Euskaraz Basque:eu:es fr:1 15 | |
6be75cd7 JH |
505 | Galego Galician:gl:es:1 15 |
506 | Ellada Greek:el:gr:7 g8 | |
6be75cd7 JH |
507 | Frysk:fy:nl:1 15 |
508 | Greenlandic:kl:gl:4 6 | |
284102e8 JH |
509 | Hebrew:iw:il:8 hebrew8 |
510 | Hungarian:hu:hu:2 | |
df8a53a3 | 511 | Indonesian:id:id:1 15 |
6be75cd7 JH |
512 | Gaeilge Irish:ga:IE:1 14 15 |
513 | Italiano Italian:it:ch it:1 15 | |
514 | Nihongo Japanese:ja:jp:euc eucJP jp.EUC sjis | |
284102e8 | 515 | Korean:ko:kr: |
6be75cd7 JH |
516 | Latine Latin:la:va:1 15 |
517 | Latvian:lv:lv:4 6 13 | |
518 | Lithuanian:lt:lt:4 6 13 | |
519 | Macedonian:mk:mk:1 15 | |
520 | Maltese:mt:mt:3 | |
dd8482fc | 521 | Moldovan:mo:mo:2 |
df8a53a3 | 522 | Norsk Norwegian:no no\@nynorsk nb nn:no:1 15 |
6be75cd7 JH |
523 | Occitan:oc:es:1 15 |
524 | Polski Polish:pl:pl:2 | |
284102e8 | 525 | Rumanian:ro:ro:2 |
a528dad0 | 526 | Russki Russian:ru:ru su ua:5 koi8 koi8r KOI8-R koi8u cp1251 cp866 |
6be75cd7 | 527 | Serbski Serbian:sr:yu:5 |
284102e8 | 528 | Slovak:sk:sk:2 |
6be75cd7 | 529 | Slovene Slovenian:sl:si:2 |
d43ce814 JH |
530 | Sqhip Albanian:sq:sq:1 15 |
531 | Svenska Swedish:sv:fi se:1 15 | |
6be75cd7 | 532 | Thai:th:th:11 tis620 |
284102e8 | 533 | Turkish:tr:tr:9 turkish8 |
dd8482fc | 534 | Yiddish:yi::1 15 |
284102e8 JH |
535 | EOF |
536 | ||
ee50adbe | 537 | if ($^O eq 'os390') { |
dd8482fc | 538 | # These cause heartburn. Broken locales? |
ee50adbe PP |
539 | $locales =~ s/Svenska Swedish:sv:fi se:1 15\n//; |
540 | $locales =~ s/Thai:th:th:11 tis620\n//; | |
541 | } | |
542 | ||
ef4a39e5 | 543 | sub in_utf8 () { $^H & 0x08 || (${^OPEN} || "") =~ /:utf8/ } |
f9cbebe1 JH |
544 | |
545 | if (in_utf8) { | |
8a6cb2cb | 546 | require "lib/locale/utf8"; |
f9cbebe1 | 547 | } else { |
8a6cb2cb | 548 | require "lib/locale/latin1"; |
f9cbebe1 JH |
549 | } |
550 | ||
284102e8 JH |
551 | my @Locale; |
552 | my $Locale; | |
fb844330 | 553 | my %posixes; |
284102e8 | 554 | |
284102e8 JH |
555 | sub trylocale { |
556 | my $locale = shift; | |
0b9f254b | 557 | return if grep { $locale eq $_ } @Locale; |
a810e350 | 558 | return unless setlocale(&POSIX::LC_ALL, $locale); |
e439cacb KW |
559 | my $badutf8; |
560 | { | |
561 | local $SIG{__WARN__} = sub { | |
562 | $badutf8 = $_[0] =~ /Malformed UTF-8/; | |
563 | }; | |
564 | $Locale =~ /UTF-?8/i; | |
284102e8 | 565 | } |
e439cacb KW |
566 | |
567 | if ($badutf8) { | |
568 | ok(0, "Locale name contains malformed utf8"); | |
569 | return; | |
570 | } | |
571 | push @Locale, $locale; | |
284102e8 | 572 | } |
8ebc5c01 | 573 | |
284102e8 JH |
574 | sub decode_encodings { |
575 | my @enc; | |
8ebc5c01 | 576 | |
284102e8 JH |
577 | foreach (split(/ /, shift)) { |
578 | if (/^(\d+)$/) { | |
579 | push @enc, "ISO8859-$1"; | |
580 | push @enc, "iso8859$1"; # HP | |
581 | if ($1 eq '1') { | |
582 | push @enc, "roman8"; # HP | |
583 | } | |
584 | } else { | |
585 | push @enc, $_; | |
dd8482fc | 586 | push @enc, "$_.UTF-8"; |
8ebc5c01 | 587 | } |
588 | } | |
ee50adbe PP |
589 | if ($^O eq 'os390') { |
590 | push @enc, qw(IBM-037 IBM-819 IBM-1047); | |
591 | } | |
8ebc5c01 | 592 | |
284102e8 | 593 | return @enc; |
8ebc5c01 | 594 | } |
595 | ||
284102e8 JH |
596 | trylocale("C"); |
597 | trylocale("POSIX"); | |
598 | foreach (0..15) { | |
599 | trylocale("ISO8859-$_"); | |
284102e8 | 600 | trylocale("iso8859$_"); |
097ee67d JH |
601 | trylocale("iso8859-$_"); |
602 | trylocale("iso_8859_$_"); | |
603 | trylocale("isolatin$_"); | |
604 | trylocale("isolatin-$_"); | |
605 | trylocale("iso_latin_$_"); | |
8ebc5c01 | 606 | } |
607 | ||
645e49ed JH |
608 | # Sanitize the environment so that we can run the external 'locale' |
609 | # program without the taint mode getting grumpy. | |
cce5967e JH |
610 | |
611 | # $ENV{PATH} is special in VMS. | |
612 | delete $ENV{PATH} if $^O ne 'VMS' or $Config{d_setenv}; | |
613 | ||
614 | # Other subversive stuff. | |
615 | delete @ENV{qw(IFS CDPATH ENV BASH_ENV)}; | |
dd8482fc | 616 | |
21477fb4 | 617 | if (-x "/usr/bin/locale" && open(LOCALES, "/usr/bin/locale -a 2>/dev/null|")) { |
dd8482fc | 618 | while (<LOCALES>) { |
d281a6ac NC |
619 | # It seems that /usr/bin/locale steadfastly outputs 8 bit data, which |
620 | # ain't great when we're running this testPERL_UNICODE= so that utf8 | |
621 | # locales will cause all IO hadles to default to (assume) utf8 | |
622 | next unless utf8::valid($_); | |
dd8482fc JH |
623 | chomp; |
624 | trylocale($_); | |
284102e8 | 625 | } |
dd8482fc | 626 | close(LOCALES); |
a6259068 | 627 | } elsif ($^O eq 'VMS' && defined($ENV{'SYS$I18N_LOCALE'}) && -d 'SYS$I18N_LOCALE') { |
71e5cbb3 | 628 | # The SYS$I18N_LOCALE logical name search list was not present on |
a6259068 PP |
629 | # VAX VMS V5.5-12, but was on AXP && VAX VMS V6.2 as well as later versions. |
630 | opendir(LOCALES, "SYS\$I18N_LOCALE:"); | |
631 | while ($_ = readdir(LOCALES)) { | |
632 | chomp; | |
633 | trylocale($_); | |
634 | } | |
635 | close(LOCALES); | |
39853c2a | 636 | } elsif (($^O eq 'openbsd' || $^O eq 'bitrig' ) && -e '/usr/share/locale') { |
87e33296 SP |
637 | |
638 | # OpenBSD doesn't have a locale executable, so reading /usr/share/locale | |
639 | # is much easier and faster than the last resort method. | |
640 | ||
641 | opendir(LOCALES, '/usr/share/locale'); | |
642 | while ($_ = readdir(LOCALES)) { | |
643 | chomp; | |
644 | trylocale($_); | |
645 | } | |
646 | close(LOCALES); | |
dd8482fc JH |
647 | } else { |
648 | ||
649 | # This is going to be slow. | |
650 | ||
651 | foreach my $locale (split(/\n/, $locales)) { | |
652 | my ($locale_name, $language_codes, $country_codes, $encodings) = | |
653 | split(/:/, $locale); | |
654 | my @enc = decode_encodings($encodings); | |
655 | foreach my $loc (split(/ /, $locale_name)) { | |
656 | trylocale($loc); | |
284102e8 | 657 | foreach my $enc (@enc) { |
dd8482fc | 658 | trylocale("$loc.$enc"); |
284102e8 | 659 | } |
dd8482fc | 660 | $loc = lc $loc; |
284102e8 | 661 | foreach my $enc (@enc) { |
dd8482fc JH |
662 | trylocale("$loc.$enc"); |
663 | } | |
664 | } | |
665 | foreach my $lang (split(/ /, $language_codes)) { | |
666 | trylocale($lang); | |
667 | foreach my $country (split(/ /, $country_codes)) { | |
668 | my $lc = "${lang}_${country}"; | |
669 | trylocale($lc); | |
670 | foreach my $enc (@enc) { | |
671 | trylocale("$lc.$enc"); | |
672 | } | |
673 | my $lC = "${lang}_\U${country}"; | |
674 | trylocale($lC); | |
675 | foreach my $enc (@enc) { | |
676 | trylocale("$lC.$enc"); | |
677 | } | |
284102e8 JH |
678 | } |
679 | } | |
680 | } | |
681 | } | |
4599a1de | 682 | |
a810e350 | 683 | setlocale(&POSIX::LC_ALL, "C"); |
d43ce814 | 684 | |
86f50d7d | 685 | if ($^O eq 'darwin') { |
4373e181 | 686 | # Darwin 8/Mac OS X 10.4 and 10.5 have bad Basque locales: perl bug #35895, |
86f50d7d | 687 | # Apple bug ID# 4139653. It also has a problem in Byelorussian. |
4373e181 RGS |
688 | (my $v) = $Config{osvers} =~ /^(\d+)/; |
689 | if ($v >= 8 and $v < 10) { | |
86f50d7d | 690 | debug "# Skipping eu_ES, be_BY locales -- buggy in Darwin\n"; |
a5ec937f | 691 | @Locale = grep ! m/^(eu_ES(?:\..*)?|be_BY\.CP1131)$/, @Locale; |
dfa5c78f | 692 | } elsif ($v < 12) { |
a44d0896 NC |
693 | debug "# Skipping be_BY locales -- buggy in Darwin\n"; |
694 | @Locale = grep ! m/^be_BY\.CP1131$/, @Locale; | |
a5ec937f | 695 | } |
86f50d7d DD |
696 | } |
697 | ||
4599a1de JH |
698 | @Locale = sort @Locale; |
699 | ||
887ef7ed PP |
700 | debug "# Locales =\n"; |
701 | for ( @Locale ) { | |
702 | debug "# $_\n"; | |
703 | } | |
8ebc5c01 | 704 | |
284102e8 | 705 | my %Problem; |
2a680da6 JH |
706 | my %Okay; |
707 | my %Testing; | |
30032ef4 | 708 | my @Added_alpha; # Alphas that aren't in the C locale. |
c08acc4c | 709 | my %test_names; |
284102e8 | 710 | |
705af3af | 711 | sub disp_chars { |
019bf7dd KW |
712 | # This returns a display string denoting the input parameter @_, each |
713 | # entry of which is a single character in the range 0-255. The first part | |
714 | # of the output is a string of the characters in @_ that are ASCII | |
715 | # graphics, and hence unambiguously displayable. They are given by code | |
716 | # point order. The second part is the remaining code points, the ordinals | |
717 | # of which are each displayed as 2-digit hex. Blanks are inserted so as | |
718 | # to keep anything from the first part looking like a 2-digit hex number. | |
719 | ||
720 | no locale; | |
721 | my @chars = sort { ord $a <=> ord $b } @_; | |
722 | my $output = ""; | |
019bf7dd KW |
723 | my $range_start; |
724 | my $start_class; | |
725 | push @chars, chr(258); # This sentinel simplifies the loop termination | |
726 | # logic | |
727 | foreach my $i (0 .. @chars - 1) { | |
728 | my $char = $chars[$i]; | |
729 | my $range_end; | |
730 | my $class; | |
731 | ||
732 | # We avoid using [:posix:] classes, as these are being tested in this | |
733 | # file. Each equivalence class below is for things that can appear in | |
734 | # a range; those that can't be in a range have class -1. 0 for those | |
735 | # which should be output in hex; and >0 for the other ranges | |
736 | if ($char =~ /[A-Z]/) { | |
737 | $class = 2; | |
738 | } | |
739 | elsif ($char =~ /[a-z]/) { | |
740 | $class = 3; | |
741 | } | |
742 | elsif ($char =~ /[0-9]/) { | |
743 | $class = 4; | |
744 | } | |
78fbfaf5 KW |
745 | # Uncomment to get literal punctuation displayed instead of hex |
746 | #elsif ($char =~ /[[\]!"#\$\%&\'()*+,.\/:\\;<=>?\@\^_`{|}~-]/) { | |
747 | # $class = -1; # Punct never appears in a range | |
748 | #} | |
019bf7dd KW |
749 | else { |
750 | $class = 0; # Output in hex | |
751 | } | |
752 | ||
753 | if (! defined $range_start) { | |
754 | if ($class < 0) { | |
78fbfaf5 | 755 | $output .= " " . $char; |
019bf7dd KW |
756 | } |
757 | else { | |
758 | $range_start = ord $char; | |
759 | $start_class = $class; | |
760 | } | |
761 | } # A range ends if not consecutive, or the class-type changes | |
762 | elsif (ord $char != ($range_end = ord($chars[$i-1])) + 1 | |
763 | || $class != $start_class) | |
764 | { | |
765 | ||
766 | # Here, the current character is not in the range. This means the | |
767 | # previous character must have been. Output the range up through | |
768 | # that one. | |
769 | my $range_length = $range_end - $range_start + 1; | |
770 | if ($start_class > 0) { | |
771 | $output .= " " . chr($range_start); | |
772 | $output .= "-" . chr($range_end) if $range_length > 1; | |
773 | } | |
774 | else { | |
78fbfaf5 KW |
775 | $output .= sprintf(" %02X", $range_start); |
776 | $output .= sprintf("-%02X", $range_end) if $range_length > 1; | |
019bf7dd KW |
777 | } |
778 | ||
779 | # Handle the new current character, as potentially beginning a new | |
780 | # range | |
781 | undef $range_start; | |
782 | redo; | |
783 | } | |
784 | } | |
785 | ||
786 | $output =~ s/^ //; | |
78fbfaf5 | 787 | return $output; |
019bf7dd KW |
788 | } |
789 | ||
30032ef4 KW |
790 | sub report_result { |
791 | my ($Locale, $i, $pass_fail, $message) = @_; | |
15bbd6a2 KW |
792 | $message //= ""; |
793 | $message = " ($message)" if $message; | |
30032ef4 | 794 | unless ($pass_fail) { |
2a680da6 | 795 | $Problem{$i}{$Locale} = 1; |
baae13cb | 796 | debug "# failed $i ($test_names{$i}) with locale '$Locale'$message\n"; |
2a680da6 JH |
797 | } else { |
798 | push @{$Okay{$i}}, $Locale; | |
799 | } | |
800 | } | |
801 | ||
7c844d17 KW |
802 | sub report_multi_result { |
803 | my ($Locale, $i, $results_ref) = @_; | |
804 | ||
805 | # $results_ref points to an array, each element of which is a character that was | |
806 | # in error for this test numbered '$i'. If empty, the test passed | |
807 | ||
808 | my $message = ""; | |
809 | if (@$results_ref) { | |
705af3af | 810 | $message = join " ", "for", disp_chars(@$results_ref); |
7c844d17 KW |
811 | } |
812 | report_result($Locale, $i, @$results_ref == 0, $message); | |
813 | } | |
814 | ||
c4093d7d KW |
815 | my $first_locales_test_number = $final_without_setlocale + 1; |
816 | my $locales_test_number; | |
817 | my $not_necessarily_a_problem_test_number; | |
6d5d702a | 818 | my $first_casing_test_number; |
c4093d7d KW |
819 | my %setlocale_failed; # List of locales that setlocale() didn't work on |
820 | ||
284102e8 | 821 | foreach $Locale (@Locale) { |
c4093d7d | 822 | $locales_test_number = $first_locales_test_number - 1; |
78fbfaf5 | 823 | debug "#\n"; |
284102e8 | 824 | debug "# Locale = $Locale\n"; |
284102e8 | 825 | |
a810e350 | 826 | unless (setlocale(&POSIX::LC_ALL, $Locale)) { |
c4093d7d | 827 | $setlocale_failed{$Locale} = $Locale; |
284102e8 | 828 | next; |
8ebc5c01 | 829 | } |
8ebc5c01 | 830 | |
66cbab2c KW |
831 | # We test UTF-8 locales only under ':not_characters'; otherwise they have |
832 | # documented deficiencies. Non- UTF-8 locales are tested only under plain | |
833 | # 'use locale', as otherwise we would have to convert everything in them | |
834 | # to Unicode. | |
9ecfa8e4 KW |
835 | # The locale name doesn't necessarily have to have "utf8" in it to be a |
836 | # UTF-8 locale, but it works mostly. | |
66cbab2c KW |
837 | my $is_utf8_locale = $Locale =~ /UTF-?8/i; |
838 | ||
95eaa1bf KW |
839 | my %UPPER = (); # All alpha X for which uc(X) == X and lc(X) != X |
840 | my %lower = (); # All alpha X for which lc(X) == X and uc(X) != X | |
841 | my %BoThCaSe = (); # All alpha X for which uc(X) == lc(X) == X | |
66cbab2c KW |
842 | |
843 | if (! $is_utf8_locale) { | |
844 | use locale; | |
fb844330 KW |
845 | @{$posixes{'word'}} = grep /\w/, map { chr } 0..255; |
846 | @{$posixes{'digit'}} = grep /\d/, map { chr } 0..255; | |
847 | @{$posixes{'space'}} = grep /\s/, map { chr } 0..255; | |
848 | @{$posixes{'alpha'}} = grep /[[:alpha:]]/, map {chr } 0..255; | |
849 | @{$posixes{'alnum'}} = grep /[[:alnum:]]/, map {chr } 0..255; | |
850 | @{$posixes{'ascii'}} = grep /[[:ascii:]]/, map {chr } 0..255; | |
851 | @{$posixes{'blank'}} = grep /[[:blank:]]/, map {chr } 0..255; | |
852 | @{$posixes{'cntrl'}} = grep /[[:cntrl:]]/, map {chr } 0..255; | |
853 | @{$posixes{'graph'}} = grep /[[:graph:]]/, map {chr } 0..255; | |
854 | @{$posixes{'lower'}} = grep /[[:lower:]]/, map {chr } 0..255; | |
855 | @{$posixes{'print'}} = grep /[[:print:]]/, map {chr } 0..255; | |
98ed56e7 | 856 | @{$posixes{'punct'}} = grep /[[:punct:]]/, map {chr } 0..255; |
fb844330 KW |
857 | @{$posixes{'upper'}} = grep /[[:upper:]]/, map {chr } 0..255; |
858 | @{$posixes{'xdigit'}} = grep /[[:xdigit:]]/, map {chr } 0..255; | |
859 | @{$posixes{'cased'}} = grep /[[:upper:]]/i, map {chr } 0..255; | |
e5272a46 | 860 | |
71e5cbb3 KW |
861 | # Sieve the uppercase and the lowercase. |
862 | ||
fb844330 | 863 | for (@{$posixes{'word'}}) { |
71e5cbb3 KW |
864 | if (/[^\d_]/) { # skip digits and the _ |
865 | if (uc($_) eq $_) { | |
866 | $UPPER{$_} = $_; | |
867 | } | |
868 | if (lc($_) eq $_) { | |
869 | $lower{$_} = $_; | |
870 | } | |
871 | } | |
872 | } | |
66cbab2c KW |
873 | } |
874 | else { | |
875 | use locale ':not_characters'; | |
fb844330 KW |
876 | @{$posixes{'word'}} = grep /\w/, map { chr } 0..255; |
877 | @{$posixes{'digit'}} = grep /\d/, map { chr } 0..255; | |
878 | @{$posixes{'space'}} = grep /\s/, map { chr } 0..255; | |
879 | @{$posixes{'alpha'}} = grep /[[:alpha:]]/, map {chr } 0..255; | |
880 | @{$posixes{'alnum'}} = grep /[[:alnum:]]/, map {chr } 0..255; | |
881 | @{$posixes{'ascii'}} = grep /[[:ascii:]]/, map {chr } 0..255; | |
882 | @{$posixes{'blank'}} = grep /[[:blank:]]/, map {chr } 0..255; | |
883 | @{$posixes{'cntrl'}} = grep /[[:cntrl:]]/, map {chr } 0..255; | |
884 | @{$posixes{'graph'}} = grep /[[:graph:]]/, map {chr } 0..255; | |
885 | @{$posixes{'lower'}} = grep /[[:lower:]]/, map {chr } 0..255; | |
886 | @{$posixes{'print'}} = grep /[[:print:]]/, map {chr } 0..255; | |
98ed56e7 | 887 | @{$posixes{'punct'}} = grep /[[:punct:]]/, map {chr } 0..255; |
fb844330 KW |
888 | @{$posixes{'upper'}} = grep /[[:upper:]]/, map {chr } 0..255; |
889 | @{$posixes{'xdigit'}} = grep /[[:xdigit:]]/, map {chr } 0..255; | |
890 | @{$posixes{'cased'}} = grep /[[:upper:]]/i, map {chr } 0..255; | |
891 | for (@{$posixes{'word'}}) { | |
66cbab2c KW |
892 | if (/[^\d_]/) { # skip digits and the _ |
893 | if (uc($_) eq $_) { | |
894 | $UPPER{$_} = $_; | |
895 | } | |
896 | if (lc($_) eq $_) { | |
897 | $lower{$_} = $_; | |
898 | } | |
899 | } | |
900 | } | |
901 | } | |
a160ac48 | 902 | |
fb844330 KW |
903 | # Ordered, where possible, in groups of "this is a subset of the next |
904 | # one" | |
705af3af KW |
905 | debug "# :upper: = ", disp_chars(@{$posixes{'upper'}}), "\n"; |
906 | debug "# :lower: = ", disp_chars(@{$posixes{'lower'}}), "\n"; | |
907 | debug "# :cased: = ", disp_chars(@{$posixes{'cased'}}), "\n"; | |
908 | debug "# :alpha: = ", disp_chars(@{$posixes{'alpha'}}), "\n"; | |
909 | debug "# :alnum: = ", disp_chars(@{$posixes{'alnum'}}), "\n"; | |
910 | debug "# w = ", disp_chars(@{$posixes{'word'}}), "\n"; | |
911 | debug "# :graph: = ", disp_chars(@{$posixes{'graph'}}), "\n"; | |
912 | debug "# :print: = ", disp_chars(@{$posixes{'print'}}), "\n"; | |
913 | debug "# d = ", disp_chars(@{$posixes{'digit'}}), "\n"; | |
914 | debug "# :xdigit: = ", disp_chars(@{$posixes{'xdigit'}}), "\n"; | |
915 | debug "# :blank: = ", disp_chars(@{$posixes{'blank'}}), "\n"; | |
916 | debug "# s = ", disp_chars(@{$posixes{'space'}}), "\n"; | |
917 | debug "# :punct: = ", disp_chars(@{$posixes{'punct'}}), "\n"; | |
918 | debug "# :cntrl: = ", disp_chars(@{$posixes{'cntrl'}}), "\n"; | |
919 | debug "# :ascii: = ", disp_chars(@{$posixes{'ascii'}}), "\n"; | |
a160ac48 | 920 | |
284102e8 | 921 | foreach (keys %UPPER) { |
a160ac48 | 922 | |
097ee67d | 923 | $BoThCaSe{$_}++ if exists $lower{$_}; |
284102e8 JH |
924 | } |
925 | foreach (keys %lower) { | |
097ee67d | 926 | $BoThCaSe{$_}++ if exists $UPPER{$_}; |
284102e8 | 927 | } |
097ee67d | 928 | foreach (keys %BoThCaSe) { |
284102e8 JH |
929 | delete $UPPER{$_}; |
930 | delete $lower{$_}; | |
931 | } | |
932 | ||
e5cc0528 KW |
933 | my %Unassigned; |
934 | foreach my $ord ( 0 .. 255 ) { | |
935 | $Unassigned{chr $ord} = 1; | |
936 | } | |
937 | foreach my $class (keys %posixes) { | |
938 | foreach my $char (@{$posixes{$class}}) { | |
939 | delete $Unassigned{$char}; | |
940 | } | |
941 | } | |
942 | ||
705af3af KW |
943 | debug "# UPPER = ", disp_chars(keys %UPPER), "\n"; |
944 | debug "# lower = ", disp_chars(keys %lower), "\n"; | |
945 | debug "# BoThCaSe = ", disp_chars(keys %BoThCaSe), "\n"; | |
946 | debug "# Unassigned = ", disp_chars(sort { ord $a <=> ord $b } keys %Unassigned), "\n"; | |
284102e8 | 947 | |
baa71cfd | 948 | my @failures; |
3da38613 | 949 | my @fold_failures; |
baa71cfd KW |
950 | foreach my $x (sort keys %UPPER) { |
951 | my $ok; | |
3da38613 | 952 | my $fold_ok; |
baa71cfd KW |
953 | if ($is_utf8_locale) { |
954 | use locale ':not_characters'; | |
955 | $ok = $x =~ /[[:upper:]]/; | |
3da38613 | 956 | $fold_ok = $x =~ /[[:lower:]]/i; |
baa71cfd KW |
957 | } |
958 | else { | |
959 | use locale; | |
960 | $ok = $x =~ /[[:upper:]]/; | |
3da38613 | 961 | $fold_ok = $x =~ /[[:lower:]]/i; |
baa71cfd KW |
962 | } |
963 | push @failures, $x unless $ok; | |
3da38613 | 964 | push @fold_failures, $x unless $fold_ok; |
baa71cfd | 965 | } |
baa71cfd | 966 | $locales_test_number++; |
6d5d702a | 967 | $first_casing_test_number = $locales_test_number; |
95eaa1bf | 968 | $test_names{$locales_test_number} = 'Verify that /[[:upper:]]/ matches all alpha X for which uc(X) == X and lc(X) != X'; |
7c844d17 | 969 | report_multi_result($Locale, $locales_test_number, \@failures); |
6d5d702a | 970 | |
3da38613 | 971 | $locales_test_number++; |
6d5d702a | 972 | |
95eaa1bf | 973 | $test_names{$locales_test_number} = 'Verify that /[[:lower:]]/i matches all alpha X for which uc(X) == X and lc(X) != X'; |
7c844d17 | 974 | report_multi_result($Locale, $locales_test_number, \@fold_failures); |
baa71cfd | 975 | |
baa71cfd | 976 | undef @failures; |
3da38613 | 977 | undef @fold_failures; |
baa71cfd KW |
978 | |
979 | foreach my $x (sort keys %lower) { | |
980 | my $ok; | |
3da38613 | 981 | my $fold_ok; |
baa71cfd KW |
982 | if ($is_utf8_locale) { |
983 | use locale ':not_characters'; | |
984 | $ok = $x =~ /[[:lower:]]/; | |
3da38613 | 985 | $fold_ok = $x =~ /[[:upper:]]/i; |
baa71cfd KW |
986 | } |
987 | else { | |
988 | use locale; | |
989 | $ok = $x =~ /[[:lower:]]/; | |
3da38613 | 990 | $fold_ok = $x =~ /[[:upper:]]/i; |
baa71cfd KW |
991 | } |
992 | push @failures, $x unless $ok; | |
3da38613 | 993 | push @fold_failures, $x unless $fold_ok; |
baa71cfd KW |
994 | } |
995 | ||
996 | $locales_test_number++; | |
95eaa1bf | 997 | $test_names{$locales_test_number} = 'Verify that /[[:lower:]]/ matches all alpha X for which lc(X) == X and uc(X) != X'; |
7c844d17 KW |
998 | report_multi_result($Locale, $locales_test_number, \@failures); |
999 | ||
3da38613 | 1000 | $locales_test_number++; |
95eaa1bf | 1001 | $test_names{$locales_test_number} = 'Verify that /[[:upper:]]/i matches all alpha X for which lc(X) == X and uc(X) != X'; |
7c844d17 | 1002 | report_multi_result($Locale, $locales_test_number, \@fold_failures); |
baa71cfd | 1003 | |
9445c837 KW |
1004 | { # Find the alphabetic characters that are not considered alphabetics |
1005 | # in the default (C) locale. | |
8ebc5c01 | 1006 | |
284102e8 | 1007 | no locale; |
71e5cbb3 | 1008 | |
30032ef4 | 1009 | @Added_alpha = (); |
5e7a1028 | 1010 | for (keys %UPPER, keys %lower, keys %BoThCaSe) { |
30032ef4 | 1011 | push(@Added_alpha, $_) if (/\W/); |
284102e8 | 1012 | } |
8ebc5c01 | 1013 | } |
8ebc5c01 | 1014 | |
30032ef4 | 1015 | @Added_alpha = sort @Added_alpha; |
8ebc5c01 | 1016 | |
705af3af | 1017 | debug "# Added_alpha = ", disp_chars(@Added_alpha), "\n"; |
8ebc5c01 | 1018 | |
db31898d | 1019 | # Cross-check the whole 8-bit character set. |
8ebc5c01 | 1020 | |
db31898d KW |
1021 | ++$locales_test_number; |
1022 | my @f; | |
1023 | $test_names{$locales_test_number} = 'Verify that \w and [:word:] are identical'; | |
1024 | for (map { chr } 0..255) { | |
1025 | if ($is_utf8_locale) { | |
1026 | use locale ':not_characters'; | |
1027 | push @f, $_ unless /[[:word:]]/ == /\w/; | |
1028 | } | |
1029 | else { | |
1030 | push @f, $_ unless /[[:word:]]/ == /\w/; | |
1031 | } | |
1032 | } | |
1033 | report_multi_result($Locale, $locales_test_number, \@f); | |
8ebc5c01 | 1034 | |
db31898d KW |
1035 | ++$locales_test_number; |
1036 | undef @f; | |
1037 | $test_names{$locales_test_number} = 'Verify that \d and [:digit:] are identical'; | |
1038 | for (map { chr } 0..255) { | |
1039 | if ($is_utf8_locale) { | |
1040 | use locale ':not_characters'; | |
1041 | push @f, $_ unless /[[:digit:]]/ == /\d/; | |
1042 | } | |
1043 | else { | |
1044 | push @f, $_ unless /[[:digit:]]/ == /\d/; | |
1045 | } | |
1046 | } | |
1047 | report_multi_result($Locale, $locales_test_number, \@f); | |
32e8aa3f | 1048 | |
db31898d KW |
1049 | ++$locales_test_number; |
1050 | undef @f; | |
1051 | $test_names{$locales_test_number} = 'Verify that \s and [:space:] are identical'; | |
1052 | for (map { chr } 0..255) { | |
1053 | if ($is_utf8_locale) { | |
1054 | use locale ':not_characters'; | |
1055 | push @f, $_ unless /[[:space:]]/ == /\s/; | |
1056 | } | |
1057 | else { | |
1058 | push @f, $_ unless /[[:space:]]/ == /\s/; | |
1059 | } | |
1060 | } | |
1061 | report_multi_result($Locale, $locales_test_number, \@f); | |
32e8aa3f | 1062 | |
db31898d KW |
1063 | ++$locales_test_number; |
1064 | undef @f; | |
1065 | $test_names{$locales_test_number} = 'Verify that [:posix:] and [:^posix:] are mutually exclusive'; | |
1066 | for (map { chr } 0..255) { | |
1067 | if ($is_utf8_locale) { | |
1068 | use locale ':not_characters'; | |
1069 | push @f, $_ unless (/[[:alpha:]]/ xor /[[:^alpha:]]/) || | |
1070 | (/[[:alnum:]]/ xor /[[:^alnum:]]/) || | |
1071 | (/[[:ascii:]]/ xor /[[:^ascii:]]/) || | |
1072 | (/[[:blank:]]/ xor /[[:^blank:]]/) || | |
1073 | (/[[:cntrl:]]/ xor /[[:^cntrl:]]/) || | |
1074 | (/[[:digit:]]/ xor /[[:^digit:]]/) || | |
1075 | (/[[:graph:]]/ xor /[[:^graph:]]/) || | |
1076 | (/[[:lower:]]/ xor /[[:^lower:]]/) || | |
1077 | (/[[:print:]]/ xor /[[:^print:]]/) || | |
1078 | (/[[:space:]]/ xor /[[:^space:]]/) || | |
1079 | (/[[:upper:]]/ xor /[[:^upper:]]/) || | |
1080 | (/[[:word:]]/ xor /[[:^word:]]/) || | |
1081 | (/[[:xdigit:]]/ xor /[[:^xdigit:]]/) || | |
1082 | ||
1083 | # effectively is what [:cased:] would be if it existed. | |
1084 | (/[[:upper:]]/i xor /[[:^upper:]]/i); | |
1085 | } | |
1086 | else { | |
1087 | push @f, $_ unless (/[[:alpha:]]/ xor /[[:^alpha:]]/) || | |
1088 | (/[[:alnum:]]/ xor /[[:^alnum:]]/) || | |
1089 | (/[[:ascii:]]/ xor /[[:^ascii:]]/) || | |
1090 | (/[[:blank:]]/ xor /[[:^blank:]]/) || | |
1091 | (/[[:cntrl:]]/ xor /[[:^cntrl:]]/) || | |
1092 | (/[[:digit:]]/ xor /[[:^digit:]]/) || | |
1093 | (/[[:graph:]]/ xor /[[:^graph:]]/) || | |
1094 | (/[[:lower:]]/ xor /[[:^lower:]]/) || | |
1095 | (/[[:print:]]/ xor /[[:^print:]]/) || | |
1096 | (/[[:space:]]/ xor /[[:^space:]]/) || | |
1097 | (/[[:upper:]]/ xor /[[:^upper:]]/) || | |
1098 | (/[[:word:]]/ xor /[[:^word:]]/) || | |
1099 | (/[[:xdigit:]]/ xor /[[:^xdigit:]]/) || | |
1100 | (/[[:upper:]]/i xor /[[:^upper:]]/i); | |
1101 | } | |
1102 | } | |
1103 | report_multi_result($Locale, $locales_test_number, \@f); | |
32e8aa3f | 1104 | |
db31898d KW |
1105 | # The rules for the relationships are given in: |
1106 | # http://www.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap07.html | |
32e8aa3f | 1107 | |
660c7bcc KW |
1108 | |
1109 | ++$locales_test_number; | |
1110 | undef @f; | |
1111 | $test_names{$locales_test_number} = 'Verify that [:lower:] contains at least a-z'; | |
1112 | for ('a' .. 'z') { | |
1113 | if ($is_utf8_locale) { | |
1114 | use locale ':not_characters'; | |
1115 | push @f, $_ unless /[[:lower:]]/; | |
1116 | } | |
1117 | else { | |
1118 | push @f, $_ unless /[[:lower:]]/; | |
1119 | } | |
1120 | } | |
1121 | report_multi_result($Locale, $locales_test_number, \@f); | |
1122 | ||
db31898d KW |
1123 | ++$locales_test_number; |
1124 | undef @f; | |
1125 | $test_names{$locales_test_number} = 'Verify that [:lower:] is a subset of [:alpha:]'; | |
1126 | for (map { chr } 0..255) { | |
1127 | if ($is_utf8_locale) { | |
1128 | use locale ':not_characters'; | |
1129 | push @f, $_ if /[[:lower:]]/ and ! /[[:alpha:]]/; | |
1130 | } | |
1131 | else { | |
1132 | push @f, $_ if /[[:lower:]]/ and ! /[[:alpha:]]/; | |
1133 | } | |
1134 | } | |
1135 | report_multi_result($Locale, $locales_test_number, \@f); | |
32e8aa3f | 1136 | |
db31898d KW |
1137 | ++$locales_test_number; |
1138 | undef @f; | |
660c7bcc KW |
1139 | $test_names{$locales_test_number} = 'Verify that [:upper:] contains at least A-Z'; |
1140 | for ('A' .. 'Z') { | |
1141 | if ($is_utf8_locale) { | |
1142 | use locale ':not_characters'; | |
1143 | push @f, $_ unless /[[:upper:]]/; | |
1144 | } | |
1145 | else { | |
1146 | push @f, $_ unless /[[:upper:]]/; | |
1147 | } | |
1148 | } | |
1149 | report_multi_result($Locale, $locales_test_number, \@f); | |
1150 | ||
1151 | ++$locales_test_number; | |
1152 | undef @f; | |
db31898d KW |
1153 | $test_names{$locales_test_number} = 'Verify that [:upper:] is a subset of [:alpha:]'; |
1154 | for (map { chr } 0..255) { | |
1155 | if ($is_utf8_locale) { | |
1156 | use locale ':not_characters'; | |
1157 | push @f, $_ if /[[:upper:]]/ and ! /[[:alpha:]]/; | |
1158 | } | |
1159 | else { | |
1160 | push @f, $_ if /[[:upper:]]/ and ! /[[:alpha:]]/; | |
1161 | } | |
1162 | } | |
1163 | report_multi_result($Locale, $locales_test_number, \@f); | |
32e8aa3f | 1164 | |
db31898d KW |
1165 | ++$locales_test_number; |
1166 | undef @f; | |
1167 | $test_names{$locales_test_number} = 'Verify that /[[:lower:]]/i is a subset of [:alpha:]'; | |
1168 | for (map { chr } 0..255) { | |
1169 | if ($is_utf8_locale) { | |
1170 | use locale ':not_characters'; | |
1171 | push @f, $_ if /[[:lower:]]/i and ! /[[:alpha:]]/; | |
1172 | } | |
1173 | else { | |
1174 | push @f, $_ if /[[:lower:]]/i and ! /[[:alpha:]]/; | |
1175 | } | |
1176 | } | |
1177 | report_multi_result($Locale, $locales_test_number, \@f); | |
32e8aa3f | 1178 | |
db31898d KW |
1179 | ++$locales_test_number; |
1180 | undef @f; | |
1181 | $test_names{$locales_test_number} = 'Verify that [:alpha:] is a subset of [:alnum:]'; | |
1182 | for (map { chr } 0..255) { | |
1183 | if ($is_utf8_locale) { | |
1184 | use locale ':not_characters'; | |
1185 | push @f, $_ if /[[:alpha:]]/ and ! /[[:alnum:]]/; | |
1186 | } | |
1187 | else { | |
1188 | push @f, $_ if /[[:alpha:]]/ and ! /[[:alnum:]]/; | |
1189 | } | |
1190 | } | |
1191 | report_multi_result($Locale, $locales_test_number, \@f); | |
32e8aa3f | 1192 | |
db31898d KW |
1193 | ++$locales_test_number; |
1194 | undef @f; | |
660c7bcc KW |
1195 | $test_names{$locales_test_number} = 'Verify that [:digit:] contains at least 0-9'; |
1196 | for ('0' .. '9') { | |
1197 | if ($is_utf8_locale) { | |
1198 | use locale ':not_characters'; | |
1199 | push @f, $_ unless /[[:digit:]]/; | |
1200 | } | |
1201 | else { | |
1202 | push @f, $_ unless /[[:digit:]]/; | |
1203 | } | |
1204 | } | |
1205 | report_multi_result($Locale, $locales_test_number, \@f); | |
1206 | ||
1207 | ++$locales_test_number; | |
1208 | undef @f; | |
db31898d KW |
1209 | $test_names{$locales_test_number} = 'Verify that [:digit:] is a subset of [:alnum:]'; |
1210 | for (map { chr } 0..255) { | |
1211 | if ($is_utf8_locale) { | |
1212 | use locale ':not_characters'; | |
1213 | push @f, $_ if /[[:digit:]]/ and ! /[[:alnum:]]/; | |
1214 | } | |
1215 | else { | |
1216 | push @f, $_ if /[[:digit:]]/ and ! /[[:alnum:]]/; | |
1217 | } | |
1218 | } | |
1219 | report_multi_result($Locale, $locales_test_number, \@f); | |
32e8aa3f | 1220 | |
db31898d KW |
1221 | ++$locales_test_number; |
1222 | undef @f; | |
aac995d2 | 1223 | $test_names{$locales_test_number} = 'Verify that [:digit:] matches either 10 or 20 code points'; |
fb844330 | 1224 | report_result($Locale, $locales_test_number, @{$posixes{'digit'}} == 10 || @{$posixes{'digit'}} == 20); |
aac995d2 KW |
1225 | |
1226 | ++$locales_test_number; | |
1227 | undef @f; | |
660c7bcc KW |
1228 | $test_names{$locales_test_number} = 'Verify that if there is a second set of digits in [:digit:], they are consecutive'; |
1229 | if (@{$posixes{'digit'}} == 20) { | |
1230 | my $previous_ord; | |
fc81f5f2 | 1231 | for (map { chr } 0..255) { |
660c7bcc KW |
1232 | next unless /[[:digit:]]/; |
1233 | next if /[0-9]/; | |
1234 | if (defined $previous_ord) { | |
1235 | if ($is_utf8_locale) { | |
1236 | use locale ':not_characters'; | |
1237 | push @f, $_ if ord $_ != $previous_ord + 1; | |
1238 | } | |
1239 | else { | |
1240 | push @f, $_ if ord $_ != $previous_ord + 1; | |
1241 | } | |
fc81f5f2 | 1242 | } |
660c7bcc | 1243 | $previous_ord = ord $_; |
db31898d KW |
1244 | } |
1245 | } | |
1246 | report_multi_result($Locale, $locales_test_number, \@f); | |
32e8aa3f | 1247 | |
db31898d KW |
1248 | ++$locales_test_number; |
1249 | undef @f; | |
660c7bcc | 1250 | $test_names{$locales_test_number} = 'Verify that [:digit:] is a subset of [:xdigit:]'; |
db31898d KW |
1251 | for (map { chr } 0..255) { |
1252 | if ($is_utf8_locale) { | |
1253 | use locale ':not_characters'; | |
660c7bcc | 1254 | push @f, $_ if /[[:digit:]]/ and ! /[[:xdigit:]]/; |
db31898d KW |
1255 | } |
1256 | else { | |
660c7bcc | 1257 | push @f, $_ if /[[:digit:]]/ and ! /[[:xdigit:]]/; |
db31898d KW |
1258 | } |
1259 | } | |
1260 | report_multi_result($Locale, $locales_test_number, \@f); | |
32e8aa3f | 1261 | |
660c7bcc KW |
1262 | ++$locales_test_number; |
1263 | undef @f; | |
1264 | $test_names{$locales_test_number} = 'Verify that [:xdigit:] contains at least A-F, a-f'; | |
1265 | for ('A' .. 'F', 'a' .. 'f') { | |
1266 | if ($is_utf8_locale) { | |
1267 | use locale ':not_characters'; | |
1268 | push @f, $_ unless /[[:xdigit:]]/; | |
1269 | } | |
1270 | else { | |
1271 | push @f, $_ unless /[[:xdigit:]]/; | |
1272 | } | |
1273 | } | |
1274 | report_multi_result($Locale, $locales_test_number, \@f); | |
1275 | ||
1276 | ++$locales_test_number; | |
1277 | undef @f; | |
1278 | $test_names{$locales_test_number} = 'Verify that any additional members of [:xdigit:], are in groups of 6 consecutive code points'; | |
1279 | my $previous_ord; | |
1280 | my $count = 0; | |
1281 | for (map { chr } 0..255) { | |
1282 | next unless /[[:xdigit:]]/; | |
1283 | next if /[[:digit:]]/; | |
1284 | next if /[A-Fa-f]/; | |
1285 | if (defined $previous_ord) { | |
1286 | if ($is_utf8_locale) { | |
1287 | use locale ':not_characters'; | |
1288 | push @f, $_ if ord $_ != $previous_ord + 1; | |
1289 | } | |
1290 | else { | |
1291 | push @f, $_ if ord $_ != $previous_ord + 1; | |
1292 | } | |
1293 | } | |
1294 | $count++; | |
1295 | if ($count == 6) { | |
1296 | undef $previous_ord; | |
1297 | } | |
1298 | else { | |
1299 | $previous_ord = ord $_; | |
1300 | } | |
1301 | } | |
1302 | report_multi_result($Locale, $locales_test_number, \@f); | |
32e8aa3f | 1303 | |
db31898d KW |
1304 | ++$locales_test_number; |
1305 | undef @f; | |
1306 | $test_names{$locales_test_number} = 'Verify that [:xdigit:] is a subset of [:graph:]'; | |
1307 | for (map { chr } 0..255) { | |
1308 | if ($is_utf8_locale) { | |
1309 | use locale ':not_characters'; | |
1310 | push @f, $_ if /[[:xdigit:]]/ and ! /[[:graph:]]/; | |
1311 | } | |
1312 | else { | |
1313 | push @f, $_ if /[[:xdigit:]]/ and ! /[[:graph:]]/; | |
1314 | } | |
1315 | } | |
1316 | report_multi_result($Locale, $locales_test_number, \@f); | |
32e8aa3f | 1317 | |
660c7bcc KW |
1318 | # Note that xdigit doesn't have to be a subset of alnum |
1319 | ||
db31898d KW |
1320 | ++$locales_test_number; |
1321 | undef @f; | |
1322 | $test_names{$locales_test_number} = 'Verify that [:punct:] is a subset of [:graph:]'; | |
1323 | for (map { chr } 0..255) { | |
1324 | if ($is_utf8_locale) { | |
1325 | use locale ':not_characters'; | |
1326 | push @f, $_ if /[[:punct:]]/ and ! /[[:graph:]]/; | |
1327 | } | |
1328 | else { | |
1329 | push @f, $_ if /[[:punct:]]/ and ! /[[:graph:]]/; | |
1330 | } | |
1331 | } | |
1332 | report_multi_result($Locale, $locales_test_number, \@f); | |
32e8aa3f | 1333 | |
db31898d KW |
1334 | ++$locales_test_number; |
1335 | undef @f; | |
660c7bcc KW |
1336 | $test_names{$locales_test_number} = 'Verify that the space character is not in [:graph:]'; |
1337 | if ($is_utf8_locale) { | |
1338 | use locale ':not_characters'; | |
1339 | push @f, " " if " " =~ /[[:graph:]]/; | |
1340 | } | |
1341 | else { | |
1342 | push @f, " " if " " =~ /[[:graph:]]/; | |
1343 | } | |
1344 | report_multi_result($Locale, $locales_test_number, \@f); | |
1345 | ||
1346 | ++$locales_test_number; | |
1347 | undef @f; | |
1348 | $test_names{$locales_test_number} = 'Verify that [:space:] contains at least [\f\n\r\t\cK ]'; | |
1349 | for (' ', "\f", "\n", "\r", "\t", "\cK") { | |
1350 | if ($is_utf8_locale) { | |
1351 | use locale ':not_characters'; | |
1352 | push @f, $_ unless /[[:space:]]/; | |
1353 | } | |
1354 | else { | |
1355 | push @f, $_ unless /[[:space:]]/; | |
1356 | } | |
1357 | } | |
1358 | report_multi_result($Locale, $locales_test_number, \@f); | |
1359 | ||
1360 | ++$locales_test_number; | |
1361 | undef @f; | |
1362 | $test_names{$locales_test_number} = 'Verify that [:blank:] contains at least [\t ]'; | |
1363 | for (' ', "\t") { | |
1364 | if ($is_utf8_locale) { | |
1365 | use locale ':not_characters'; | |
1366 | push @f, $_ unless /[[:blank:]]/; | |
1367 | } | |
1368 | else { | |
1369 | push @f, $_ unless /[[:blank:]]/; | |
1370 | } | |
1371 | } | |
1372 | report_multi_result($Locale, $locales_test_number, \@f); | |
1373 | ||
1374 | ++$locales_test_number; | |
1375 | undef @f; | |
db31898d KW |
1376 | $test_names{$locales_test_number} = 'Verify that [:blank:] is a subset of [:space:]'; |
1377 | for (map { chr } 0..255) { | |
1378 | if ($is_utf8_locale) { | |
1379 | use locale ':not_characters'; | |
1380 | push @f, $_ if /[[:blank:]]/ and ! /[[:space:]]/; | |
1381 | } | |
1382 | else { | |
1383 | push @f, $_ if /[[:blank:]]/ and ! /[[:space:]]/; | |
1384 | } | |
1385 | } | |
1386 | report_multi_result($Locale, $locales_test_number, \@f); | |
32e8aa3f | 1387 | |
db31898d KW |
1388 | ++$locales_test_number; |
1389 | undef @f; | |
1390 | $test_names{$locales_test_number} = 'Verify that [:graph:] is a subset of [:print:]'; | |
1391 | for (map { chr } 0..255) { | |
1392 | if ($is_utf8_locale) { | |
1393 | use locale ':not_characters'; | |
1394 | push @f, $_ if /[[:graph:]]/ and ! /[[:print:]]/; | |
1395 | } | |
1396 | else { | |
1397 | push @f, $_ if /[[:graph:]]/ and ! /[[:print:]]/; | |
1398 | } | |
1399 | } | |
1400 | report_multi_result($Locale, $locales_test_number, \@f); | |
32e8aa3f | 1401 | |
db31898d KW |
1402 | ++$locales_test_number; |
1403 | undef @f; | |
660c7bcc KW |
1404 | $test_names{$locales_test_number} = 'Verify that the space character is in [:print:]'; |
1405 | if ($is_utf8_locale) { | |
1406 | use locale ':not_characters'; | |
1407 | push @f, " " if " " !~ /[[:print:]]/; | |
1408 | } | |
1409 | else { | |
1410 | push @f, " " if " " !~ /[[:print:]]/; | |
1411 | } | |
1412 | report_multi_result($Locale, $locales_test_number, \@f); | |
1413 | ||
1414 | ++$locales_test_number; | |
1415 | undef @f; | |
db31898d KW |
1416 | $test_names{$locales_test_number} = 'Verify that isn\'t both [:cntrl:] and [:print:]'; |
1417 | for (map { chr } 0..255) { | |
1418 | if ($is_utf8_locale) { | |
1419 | use locale ':not_characters'; | |
1420 | push @f, $_ if (/[[:print:]]/ and /[[:cntrl:]]/); | |
1421 | } | |
1422 | else { | |
1423 | push @f, $_ if (/[[:print:]]/ and /[[:cntrl:]]/); | |
1424 | } | |
1425 | } | |
1426 | report_multi_result($Locale, $locales_test_number, \@f); | |
32e8aa3f | 1427 | |
db31898d KW |
1428 | ++$locales_test_number; |
1429 | undef @f; | |
660c7bcc KW |
1430 | $test_names{$locales_test_number} = 'Verify that isn\'t both [:alpha:] and [:digit:]'; |
1431 | for (map { chr } 0..255) { | |
1432 | if ($is_utf8_locale) { | |
1433 | use locale ':not_characters'; | |
1434 | push @f, $_ if /[[:alpha:]]/ and /[[:digit:]]/; | |
1435 | } | |
1436 | else { | |
1437 | push @f, $_ if /[[:alpha:]]/ and /[[:digit:]]/; | |
1438 | } | |
1439 | } | |
1440 | report_multi_result($Locale, $locales_test_number, \@f); | |
1441 | ||
1442 | ++$locales_test_number; | |
1443 | undef @f; | |
db31898d KW |
1444 | $test_names{$locales_test_number} = 'Verify that isn\'t both [:alnum:] and [:punct:]'; |
1445 | for (map { chr } 0..255) { | |
1446 | if ($is_utf8_locale) { | |
1447 | use locale ':not_characters'; | |
1448 | push @f, $_ if /[[:alnum:]]/ and /[[:punct:]]/; | |
1449 | } | |
1450 | else { | |
1451 | push @f, $_ if /[[:alnum:]]/ and /[[:punct:]]/; | |
1452 | } | |
1453 | } | |
1454 | report_multi_result($Locale, $locales_test_number, \@f); | |
32e8aa3f | 1455 | |
db31898d KW |
1456 | ++$locales_test_number; |
1457 | undef @f; | |
1458 | $test_names{$locales_test_number} = 'Verify that isn\'t both [:xdigit:] and [:punct:]'; | |
1459 | for (map { chr } 0..255) { | |
1460 | if ($is_utf8_locale) { | |
1461 | use locale ':not_characters'; | |
1462 | push @f, $_ if (/[[:punct:]]/ and /[[:xdigit:]]/); | |
1463 | } | |
1464 | else { | |
1465 | push @f, $_ if (/[[:punct:]]/ and /[[:xdigit:]]/); | |
1466 | } | |
1467 | } | |
1468 | report_multi_result($Locale, $locales_test_number, \@f); | |
32e8aa3f | 1469 | |
db31898d KW |
1470 | ++$locales_test_number; |
1471 | undef @f; | |
1472 | $test_names{$locales_test_number} = 'Verify that isn\'t both [:graph:] and [:space:]'; | |
1473 | for (map { chr } 0..255) { | |
1474 | if ($is_utf8_locale) { | |
1475 | use locale ':not_characters'; | |
1476 | push @f, $_ if (/[[:graph:]]/ and /[[:space:]]/); | |
1477 | } | |
1478 | else { | |
1479 | push @f, $_ if (/[[:graph:]]/ and /[[:space:]]/); | |
1480 | } | |
1481 | } | |
1482 | report_multi_result($Locale, $locales_test_number, \@f); | |
32e8aa3f | 1483 | |
7493b8f2 KW |
1484 | foreach ($first_casing_test_number..$locales_test_number) { |
1485 | push @problematical_tests, $_; | |
1486 | } | |
1487 | ||
32e8aa3f | 1488 | |
db31898d KW |
1489 | # Test for read-only scalars' locale vs non-locale comparisons. |
1490 | ||
1491 | { | |
1492 | no locale; | |
1493 | my $ok; | |
1494 | $a = "qwerty"; | |
1495 | if ($is_utf8_locale) { | |
1496 | use locale ':not_characters'; | |
1497 | $ok = ($a cmp "qwerty") == 0; | |
1498 | } | |
1499 | else { | |
1500 | use locale; | |
1501 | $ok = ($a cmp "qwerty") == 0; | |
1502 | } | |
1503 | report_result($Locale, ++$locales_test_number, $ok); | |
1504 | $test_names{$locales_test_number} = 'Verify that cmp works with a read-only scalar; no- vs locale'; | |
1505 | } | |
8ebc5c01 | 1506 | |
db31898d KW |
1507 | { |
1508 | my ($from, $to, $lesser, $greater, | |
1509 | @test, %test, $test, $yes, $no, $sign); | |
284102e8 | 1510 | |
db31898d KW |
1511 | ++$locales_test_number; |
1512 | $test_names{$locales_test_number} = 'Verify that "le", "ne", etc work'; | |
1513 | $not_necessarily_a_problem_test_number = $locales_test_number; | |
1514 | for (0..9) { | |
1515 | # Select a slice. | |
fb844330 KW |
1516 | $from = int(($_*@{$posixes{'word'}})/10); |
1517 | $to = $from + int(@{$posixes{'word'}}/10); | |
1518 | $to = $#{$posixes{'word'}} if ($to > $#{$posixes{'word'}}); | |
1519 | $lesser = join('', @{$posixes{'word'}}[$from..$to]); | |
db31898d KW |
1520 | # Select a slice one character on. |
1521 | $from++; $to++; | |
fb844330 KW |
1522 | $to = $#{$posixes{'word'}} if ($to > $#{$posixes{'word'}}); |
1523 | $greater = join('', @{$posixes{'word'}}[$from..$to]); | |
66cbab2c KW |
1524 | if ($is_utf8_locale) { |
1525 | use locale ':not_characters'; | |
db31898d KW |
1526 | ($yes, $no, $sign) = ($lesser lt $greater |
1527 | ? (" ", "not ", 1) | |
1528 | : ("not ", " ", -1)); | |
66cbab2c KW |
1529 | } |
1530 | else { | |
1531 | use locale; | |
db31898d KW |
1532 | ($yes, $no, $sign) = ($lesser lt $greater |
1533 | ? (" ", "not ", 1) | |
1534 | : ("not ", " ", -1)); | |
66cbab2c | 1535 | } |
db31898d KW |
1536 | # all these tests should FAIL (return 0). Exact lt or gt cannot |
1537 | # be tested because in some locales, say, eacute and E may test | |
1538 | # equal. | |
1539 | @test = | |
1540 | ( | |
1541 | $no.' ($lesser le $greater)', # 1 | |
1542 | 'not ($lesser ne $greater)', # 2 | |
1543 | ' ($lesser eq $greater)', # 3 | |
1544 | $yes.' ($lesser ge $greater)', # 4 | |
1545 | $yes.' ($lesser ge $greater)', # 5 | |
1546 | $yes.' ($greater le $lesser )', # 7 | |
1547 | 'not ($greater ne $lesser )', # 8 | |
1548 | ' ($greater eq $lesser )', # 9 | |
1549 | $no.' ($greater ge $lesser )', # 10 | |
1550 | 'not (($lesser cmp $greater) == -($sign))' # 11 | |
1551 | ); | |
1552 | @test{@test} = 0 x @test; | |
1553 | $test = 0; | |
1554 | for my $ti (@test) { | |
66cbab2c KW |
1555 | if ($is_utf8_locale) { |
1556 | use locale ':not_characters'; | |
db31898d | 1557 | $test{$ti} = eval $ti; |
66cbab2c KW |
1558 | } |
1559 | else { | |
db31898d KW |
1560 | # Already in 'use locale'; |
1561 | $test{$ti} = eval $ti; | |
66cbab2c | 1562 | } |
db31898d KW |
1563 | $test ||= $test{$ti} |
1564 | } | |
1565 | report_result($Locale, $locales_test_number, $test == 0); | |
1566 | if ($test) { | |
1567 | debug "# lesser = '$lesser'\n"; | |
1568 | debug "# greater = '$greater'\n"; | |
1569 | debug "# lesser cmp greater = ", | |
1570 | $lesser cmp $greater, "\n"; | |
1571 | debug "# greater cmp lesser = ", | |
1572 | $greater cmp $lesser, "\n"; | |
1573 | debug "# (greater) from = $from, to = $to\n"; | |
1574 | for my $ti (@test) { | |
1575 | debugf("# %-40s %-4s", $ti, | |
1576 | $test{$ti} ? 'FAIL' : 'ok'); | |
1577 | if ($ti =~ /\(\.*(\$.+ +cmp +\$[^\)]+)\.*\)/) { | |
1578 | debugf("(%s == %4d)", $1, eval $1); | |
66cbab2c | 1579 | } |
db31898d KW |
1580 | debug "\n#"; |
1581 | } | |
1582 | ||
1583 | last; | |
1584 | } | |
1585 | } | |
1586 | } | |
c4093d7d | 1587 | |
66cbab2c KW |
1588 | my $ok1; |
1589 | my $ok2; | |
1590 | my $ok3; | |
1591 | my $ok4; | |
1592 | my $ok5; | |
1593 | my $ok6; | |
1594 | my $ok7; | |
1595 | my $ok8; | |
1596 | my $ok9; | |
1597 | my $ok10; | |
1598 | my $ok11; | |
1599 | my $ok12; | |
1600 | my $ok13; | |
1500bd91 | 1601 | my $ok14; |
28acfe03 KW |
1602 | my $ok15; |
1603 | my $ok16; | |
e46375fa KW |
1604 | my $ok17; |
1605 | my $ok18; | |
66cbab2c KW |
1606 | |
1607 | my $c; | |
1608 | my $d; | |
1609 | my $e; | |
1610 | my $f; | |
1611 | my $g; | |
e46375fa KW |
1612 | my $h; |
1613 | my $i; | |
1614 | my $j; | |
66cbab2c KW |
1615 | |
1616 | if (! $is_utf8_locale) { | |
71e5cbb3 | 1617 | use locale; |
6be75cd7 | 1618 | |
71e5cbb3 | 1619 | my ($x, $y) = (1.23, 1.23); |
6be75cd7 | 1620 | |
71e5cbb3 KW |
1621 | $a = "$x"; |
1622 | printf ''; # printf used to reset locale to "C" | |
1623 | $b = "$y"; | |
1624 | $ok1 = $a eq $b; | |
6be75cd7 | 1625 | |
71e5cbb3 KW |
1626 | $c = "$x"; |
1627 | my $z = sprintf ''; # sprintf used to reset locale to "C" | |
1628 | $d = "$y"; | |
1629 | $ok2 = $c eq $d; | |
1630 | { | |
66cbab2c | 1631 | |
71e5cbb3 KW |
1632 | use warnings; |
1633 | my $w = 0; | |
1634 | local $SIG{__WARN__} = | |
1635 | sub { | |
1636 | print "# @_\n"; | |
1637 | $w++; | |
1638 | }; | |
6be75cd7 | 1639 | |
71e5cbb3 KW |
1640 | # The == (among other ops) used to warn for locales |
1641 | # that had something else than "." as the radix character. | |
6be75cd7 | 1642 | |
71e5cbb3 KW |
1643 | $ok3 = $c == 1.23; |
1644 | $ok4 = $c == $x; | |
1645 | $ok5 = $c == $d; | |
1646 | { | |
1647 | no locale; | |
66cbab2c | 1648 | |
b79536ea | 1649 | $e = "$x"; |
71e5cbb3 KW |
1650 | |
1651 | $ok6 = $e == 1.23; | |
1652 | $ok7 = $e == $x; | |
1653 | $ok8 = $e == $c; | |
1654 | } | |
66cbab2c | 1655 | |
71e5cbb3 KW |
1656 | $f = "1.23"; |
1657 | $g = 2.34; | |
e46375fa KW |
1658 | $h = 1.5; |
1659 | $i = 1.25; | |
1660 | $j = "$h:$i"; | |
66cbab2c | 1661 | |
71e5cbb3 KW |
1662 | $ok9 = $f == 1.23; |
1663 | $ok10 = $f == $x; | |
1664 | $ok11 = $f == $c; | |
1665 | $ok12 = abs(($f + $g) - 3.57) < 0.01; | |
1666 | $ok13 = $w == 0; | |
28acfe03 | 1667 | $ok14 = $ok15 = $ok16 = 1; # Skip for non-utf8 locales |
71e5cbb3 | 1668 | } |
e46375fa KW |
1669 | { |
1670 | no locale; | |
1671 | $ok17 = "1.5:1.25" eq sprintf("%g:%g", $h, $i); | |
1672 | } | |
1673 | $ok18 = $j eq sprintf("%g:%g", $h, $i); | |
66cbab2c KW |
1674 | } |
1675 | else { | |
1676 | use locale ':not_characters'; | |
1677 | ||
1678 | my ($x, $y) = (1.23, 1.23); | |
1679 | $a = "$x"; | |
1680 | printf ''; # printf used to reset locale to "C" | |
1681 | $b = "$y"; | |
1682 | $ok1 = $a eq $b; | |
1683 | ||
1684 | $c = "$x"; | |
1685 | my $z = sprintf ''; # sprintf used to reset locale to "C" | |
1686 | $d = "$y"; | |
1687 | $ok2 = $c eq $d; | |
1688 | { | |
1689 | use warnings; | |
1690 | my $w = 0; | |
1691 | local $SIG{__WARN__} = | |
1692 | sub { | |
1693 | print "# @_\n"; | |
1694 | $w++; | |
1695 | }; | |
1696 | $ok3 = $c == 1.23; | |
1697 | $ok4 = $c == $x; | |
1698 | $ok5 = $c == $d; | |
1699 | { | |
1700 | no locale; | |
b79536ea | 1701 | $e = "$x"; |
66cbab2c KW |
1702 | |
1703 | $ok6 = $e == 1.23; | |
1704 | $ok7 = $e == $x; | |
1705 | $ok8 = $e == $c; | |
1706 | } | |
1707 | ||
1708 | $f = "1.23"; | |
1709 | $g = 2.34; | |
e46375fa KW |
1710 | $h = 1.5; |
1711 | $i = 1.25; | |
1712 | $j = "$h:$i"; | |
66cbab2c KW |
1713 | |
1714 | $ok9 = $f == 1.23; | |
1715 | $ok10 = $f == $x; | |
1716 | $ok11 = $f == $c; | |
1717 | $ok12 = abs(($f + $g) - 3.57) < 0.01; | |
1718 | $ok13 = $w == 0; | |
1500bd91 KW |
1719 | |
1720 | # Look for non-ASCII error messages, and verify that the first | |
1721 | # such is in UTF-8 (the others almost certainly will be like the | |
1722 | # first). | |
1723 | $ok14 = 1; | |
1724 | foreach my $err (keys %!) { | |
1725 | use Errno; | |
1726 | $! = eval "&Errno::$err"; # Convert to strerror() output | |
1727 | my $strerror = "$!"; | |
1728 | if ("$strerror" =~ /\P{ASCII}/) { | |
2ef12107 | 1729 | $ok14 = utf8::is_utf8($strerror); |
1500bd91 KW |
1730 | last; |
1731 | } | |
1732 | } | |
28acfe03 KW |
1733 | |
1734 | # Similarly, we verify that a non-ASCII radix is in UTF-8. This | |
1735 | # also catches if there is a disparity between sprintf and | |
1736 | # stringification. | |
1737 | ||
1738 | my $string_g = "$g"; | |
2ef12107 | 1739 | my $sprintf_g = sprintf("%g", $g); |
28acfe03 | 1740 | |
2ef12107 KW |
1741 | $ok15 = $string_g =~ / ^ \p{ASCII}+ $ /x || utf8::is_utf8($string_g); |
1742 | $ok16 = $sprintf_g eq $string_g; | |
66cbab2c | 1743 | } |
e46375fa KW |
1744 | { |
1745 | no locale; | |
1746 | $ok17 = "1.5:1.25" eq sprintf("%g:%g", $h, $i); | |
1747 | } | |
1748 | $ok18 = $j eq sprintf("%g:%g", $h, $i); | |
66cbab2c KW |
1749 | } |
1750 | ||
30032ef4 | 1751 | report_result($Locale, ++$locales_test_number, $ok1); |
66cbab2c KW |
1752 | $test_names{$locales_test_number} = 'Verify that an intervening printf doesn\'t change assignment results'; |
1753 | my $first_a_test = $locales_test_number; | |
1754 | ||
1755 | debug "# $first_a_test..$locales_test_number: \$a = $a, \$b = $b, Locale = $Locale\n"; | |
1756 | ||
30032ef4 | 1757 | report_result($Locale, ++$locales_test_number, $ok2); |
66cbab2c KW |
1758 | $test_names{$locales_test_number} = 'Verify that an intervening sprintf doesn\'t change assignment results'; |
1759 | ||
1760 | my $first_c_test = $locales_test_number; | |
1761 | ||
30032ef4 | 1762 | report_result($Locale, ++$locales_test_number, $ok3); |
71e5cbb3 | 1763 | $test_names{$locales_test_number} = 'Verify that a different locale radix works when doing "==" with a constant'; |
6be75cd7 | 1764 | |
30032ef4 | 1765 | report_result($Locale, ++$locales_test_number, $ok4); |
71e5cbb3 | 1766 | $test_names{$locales_test_number} = 'Verify that a different locale radix works when doing "==" with a scalar'; |
66cbab2c | 1767 | |
30032ef4 | 1768 | report_result($Locale, ++$locales_test_number, $ok5); |
71e5cbb3 | 1769 | $test_names{$locales_test_number} = 'Verify that a different locale radix works when doing "==" with a scalar and an intervening sprintf'; |
66cbab2c | 1770 | |
71e5cbb3 | 1771 | debug "# $first_c_test..$locales_test_number: \$c = $c, \$d = $d, Locale = $Locale\n"; |
66cbab2c | 1772 | |
30032ef4 | 1773 | report_result($Locale, ++$locales_test_number, $ok6); |
b79536ea | 1774 | $test_names{$locales_test_number} = 'Verify that can assign stringified under inner no-locale block'; |
71e5cbb3 | 1775 | my $first_e_test = $locales_test_number; |
6be75cd7 | 1776 | |
30032ef4 | 1777 | report_result($Locale, ++$locales_test_number, $ok7); |
71e5cbb3 | 1778 | $test_names{$locales_test_number} = 'Verify that "==" with a scalar still works in inner no locale'; |
66cbab2c | 1779 | |
30032ef4 | 1780 | report_result($Locale, ++$locales_test_number, $ok8); |
71e5cbb3 | 1781 | $test_names{$locales_test_number} = 'Verify that "==" with a scalar and an intervening sprintf still works in inner no locale'; |
c4093d7d | 1782 | |
71e5cbb3 | 1783 | debug "# $first_e_test..$locales_test_number: \$e = $e, no locale\n"; |
2a680da6 | 1784 | |
30032ef4 | 1785 | report_result($Locale, ++$locales_test_number, $ok9); |
71e5cbb3 KW |
1786 | $test_names{$locales_test_number} = 'Verify that after a no-locale block, a different locale radix still works when doing "==" with a constant'; |
1787 | my $first_f_test = $locales_test_number; | |
6be75cd7 | 1788 | |
30032ef4 | 1789 | report_result($Locale, ++$locales_test_number, $ok10); |
71e5cbb3 | 1790 | $test_names{$locales_test_number} = 'Verify that after a no-locale block, a different locale radix still works when doing "==" with a scalar'; |
66cbab2c | 1791 | |
30032ef4 | 1792 | report_result($Locale, ++$locales_test_number, $ok11); |
71e5cbb3 | 1793 | $test_names{$locales_test_number} = 'Verify that after a no-locale block, a different locale radix still works when doing "==" with a scalar and an intervening sprintf'; |
906f284f | 1794 | |
30032ef4 | 1795 | report_result($Locale, ++$locales_test_number, $ok12); |
71e5cbb3 | 1796 | $test_names{$locales_test_number} = 'Verify that after a no-locale block, a different locale radix can participate in an addition and function call as numeric'; |
c4093d7d | 1797 | |
30032ef4 | 1798 | report_result($Locale, ++$locales_test_number, $ok13); |
71e5cbb3 | 1799 | $test_names{$locales_test_number} = 'Verify that don\'t get warning under "==" even if radix is not a dot'; |
c4093d7d | 1800 | |
30032ef4 | 1801 | report_result($Locale, ++$locales_test_number, $ok14); |
1500bd91 KW |
1802 | $test_names{$locales_test_number} = 'Verify that non-ASCII UTF-8 error messages are in UTF-8'; |
1803 | ||
30032ef4 | 1804 | report_result($Locale, ++$locales_test_number, $ok15); |
28acfe03 KW |
1805 | $test_names{$locales_test_number} = 'Verify that a number with a UTF-8 radix has a UTF-8 stringification'; |
1806 | ||
30032ef4 | 1807 | report_result($Locale, ++$locales_test_number, $ok16); |
28acfe03 KW |
1808 | $test_names{$locales_test_number} = 'Verify that a sprintf of a number with a UTF-8 radix yields UTF-8'; |
1809 | ||
e46375fa KW |
1810 | report_result($Locale, ++$locales_test_number, $ok17); |
1811 | $test_names{$locales_test_number} = 'Verify that a sprintf of a number outside locale scope uses a dot radix'; | |
1812 | ||
1813 | report_result($Locale, ++$locales_test_number, $ok18); | |
1814 | $test_names{$locales_test_number} = 'Verify that a sprintf of a number back within locale scope uses locale radix'; | |
1815 | ||
71e5cbb3 | 1816 | debug "# $first_f_test..$locales_test_number: \$f = $f, \$g = $g, back to locale = $Locale\n"; |
906f284f | 1817 | |
26d80d95 LC |
1818 | # Does taking lc separately differ from taking |
1819 | # the lc "in-line"? (This was the bug 19990704.002, change #3568.) | |
1820 | # The bug was in the caching of the 'o'-magic. | |
66cbab2c | 1821 | if (! $is_utf8_locale) { |
2a680da6 | 1822 | use locale; |
6be75cd7 | 1823 | |
2a680da6 JH |
1824 | sub lcA { |
1825 | my $lc0 = lc $_[0]; | |
1826 | my $lc1 = lc $_[1]; | |
1827 | return $lc0 cmp $lc1; | |
1828 | } | |
6be75cd7 | 1829 | |
2a680da6 JH |
1830 | sub lcB { |
1831 | return lc($_[0]) cmp lc($_[1]); | |
1832 | } | |
6be75cd7 | 1833 | |
2a680da6 JH |
1834 | my $x = "ab"; |
1835 | my $y = "aa"; | |
1836 | my $z = "AB"; | |
6be75cd7 | 1837 | |
30032ef4 | 1838 | report_result($Locale, ++$locales_test_number, |
2a680da6 JH |
1839 | lcA($x, $y) == 1 && lcB($x, $y) == 1 || |
1840 | lcA($x, $z) == 0 && lcB($x, $z) == 0); | |
6be75cd7 | 1841 | } |
66cbab2c KW |
1842 | else { |
1843 | use locale ':not_characters'; | |
1844 | ||
1845 | sub lcC { | |
1846 | my $lc0 = lc $_[0]; | |
1847 | my $lc1 = lc $_[1]; | |
1848 | return $lc0 cmp $lc1; | |
1849 | } | |
1850 | ||
1851 | sub lcD { | |
1852 | return lc($_[0]) cmp lc($_[1]); | |
1853 | } | |
1854 | ||
1855 | my $x = "ab"; | |
1856 | my $y = "aa"; | |
1857 | my $z = "AB"; | |
1858 | ||
30032ef4 | 1859 | report_result($Locale, ++$locales_test_number, |
66cbab2c KW |
1860 | lcC($x, $y) == 1 && lcD($x, $y) == 1 || |
1861 | lcC($x, $z) == 0 && lcD($x, $z) == 0); | |
1862 | } | |
1863 | $test_names{$locales_test_number} = 'Verify "lc(foo) cmp lc(bar)" is the same as using intermediaries for the cmp'; | |
d8093b23 | 1864 | |
26d80d95 LC |
1865 | # Does lc of an UPPER (if different from the UPPER) match |
1866 | # case-insensitively the UPPER, and does the UPPER match | |
1867 | # case-insensitively the lc of the UPPER. And vice versa. | |
3ba0e062 | 1868 | { |
ef4a39e5 JH |
1869 | use locale; |
1870 | no utf8; | |
1871 | my $re = qr/[\[\(\{\*\+\?\|\^\$\\]/; | |
1872 | ||
1873 | my @f = (); | |
c4093d7d | 1874 | ++$locales_test_number; |
c08acc4c | 1875 | $test_names{$locales_test_number} = 'Verify case insensitive matching works'; |
f78d9f29 | 1876 | foreach my $x (sort keys %UPPER) { |
66cbab2c | 1877 | if (! $is_utf8_locale) { |
71e5cbb3 KW |
1878 | my $y = lc $x; |
1879 | next unless uc $y eq $x; | |
92891c66 KW |
1880 | debug_more( "# UPPER=", disp_chars(($x)), |
1881 | "; lc=", disp_chars(($y)), "; ", | |
4eac893c | 1882 | "; fc=", disp_chars((fc $x)), "; ", |
92891c66 KW |
1883 | disp_chars(($x)), "=~/", disp_chars(($y)), "/i=", |
1884 | $x =~ /$y/i ? 1 : 0, | |
1885 | "; ", | |
1886 | disp_chars(($y)), "=~/", disp_chars(($x)), "/i=", | |
1887 | $y =~ /$x/i ? 1 : 0, | |
1888 | "\n"); | |
71e5cbb3 KW |
1889 | # |
1890 | # If $x and $y contain regular expression characters | |
1891 | # AND THEY lowercase (/i) to regular expression characters, | |
1892 | # regcomp() will be mightily confused. No, the \Q doesn't | |
1893 | # help here (maybe regex engine internal lowercasing | |
1894 | # is done after the \Q?) An example of this happening is | |
1895 | # the bg_BG (Bulgarian) locale under EBCDIC (OS/390 USS): | |
1896 | # the chr(173) (the "[") is the lowercase of the chr(235). | |
1897 | # | |
1898 | # Similarly losing EBCDIC locales include cs_cz, cs_CZ, | |
1899 | # el_gr, el_GR, en_us.IBM-037 (!), en_US.IBM-037 (!), | |
1900 | # et_ee, et_EE, hr_hr, hr_HR, hu_hu, hu_HU, lt_LT, | |
1901 | # mk_mk, mk_MK, nl_nl.IBM-037, nl_NL.IBM-037, | |
1902 | # pl_pl, pl_PL, ro_ro, ro_RO, ru_ru, ru_RU, | |
1903 | # sk_sk, sk_SK, sl_si, sl_SI, tr_tr, tr_TR. | |
1904 | # | |
1905 | # Similar things can happen even under (bastardised) | |
1906 | # non-EBCDIC locales: in many European countries before the | |
1907 | # advent of ISO 8859-x nationally customised versions of | |
1908 | # ISO 646 were devised, reusing certain punctuation | |
1909 | # characters for modified characters needed by the | |
1910 | # country/language. For example, the "|" might have | |
1911 | # stood for U+00F6 or LATIN SMALL LETTER O WITH DIAERESIS. | |
1912 | # | |
1913 | if ($x =~ $re || $y =~ $re) { | |
1914 | print "# Regex characters in '$x' or '$y', skipping test $locales_test_number for locale '$Locale'\n"; | |
1915 | next; | |
1916 | } | |
69d77f94 | 1917 | push @f, $x unless $x =~ /$y/i && $y =~ /$x/i; |
26c1569f KW |
1918 | |
1919 | # fc is not a locale concept, so Perl uses lc for it. | |
1920 | push @f, $x unless lc $x eq fc $x; | |
66cbab2c KW |
1921 | } |
1922 | else { | |
1923 | use locale ':not_characters'; | |
1924 | my $y = lc $x; | |
1925 | next unless uc $y eq $x; | |
92891c66 KW |
1926 | debug_more( "# UPPER=", disp_chars(($x)), |
1927 | "; lc=", disp_chars(($y)), "; ", | |
4eac893c | 1928 | "; fc=", disp_chars((fc $x)), "; ", |
92891c66 KW |
1929 | disp_chars(($x)), "=~/", disp_chars(($y)), "/i=", |
1930 | $x =~ /$y/i ? 1 : 0, | |
1931 | "; ", | |
1932 | disp_chars(($y)), "=~/", disp_chars(($x)), "/i=", | |
1933 | $y =~ /$x/i ? 1 : 0, | |
1934 | "\n"); | |
66cbab2c | 1935 | |
66cbab2c | 1936 | push @f, $x unless $x =~ /$y/i && $y =~ /$x/i; |
26c1569f KW |
1937 | |
1938 | # The places where Unicode's lc is different from fc are | |
1939 | # skipped here by virtue of the 'next unless uc...' line above | |
1940 | push @f, $x unless lc $x eq fc $x; | |
66cbab2c | 1941 | } |
c00ff1c7 | 1942 | } |
ef4a39e5 | 1943 | |
f78d9f29 | 1944 | foreach my $x (sort keys %lower) { |
66cbab2c | 1945 | if (! $is_utf8_locale) { |
71e5cbb3 KW |
1946 | my $y = uc $x; |
1947 | next unless lc $y eq $x; | |
92891c66 KW |
1948 | debug_more( "# lower=", disp_chars(($x)), |
1949 | "; uc=", disp_chars(($y)), "; ", | |
4eac893c | 1950 | "; fc=", disp_chars((fc $x)), "; ", |
92891c66 KW |
1951 | disp_chars(($x)), "=~/", disp_chars(($y)), "/i=", |
1952 | $x =~ /$y/i ? 1 : 0, | |
1953 | "; ", | |
1954 | disp_chars(($y)), "=~/", disp_chars(($x)), "/i=", | |
1955 | $y =~ /$x/i ? 1 : 0, | |
1956 | "\n"); | |
71e5cbb3 KW |
1957 | if ($x =~ $re || $y =~ $re) { # See above. |
1958 | print "# Regex characters in '$x' or '$y', skipping test $locales_test_number for locale '$Locale'\n"; | |
1959 | next; | |
1960 | } | |
69d77f94 | 1961 | push @f, $x unless $x =~ /$y/i && $y =~ /$x/i; |
26c1569f KW |
1962 | |
1963 | push @f, $x unless lc $x eq fc $x; | |
66cbab2c KW |
1964 | } |
1965 | else { | |
1966 | use locale ':not_characters'; | |
1967 | my $y = uc $x; | |
1968 | next unless lc $y eq $x; | |
92891c66 KW |
1969 | debug_more( "# lower=", disp_chars(($x)), |
1970 | "; uc=", disp_chars(($y)), "; ", | |
4eac893c | 1971 | "; fc=", disp_chars((fc $x)), "; ", |
92891c66 KW |
1972 | disp_chars(($x)), "=~/", disp_chars(($y)), "/i=", |
1973 | $x =~ /$y/i ? 1 : 0, | |
1974 | "; ", | |
1975 | disp_chars(($y)), "=~/", disp_chars(($x)), "/i=", | |
1976 | $y =~ /$x/i ? 1 : 0, | |
1977 | "\n"); | |
66cbab2c | 1978 | push @f, $x unless $x =~ /$y/i && $y =~ /$x/i; |
26c1569f KW |
1979 | |
1980 | push @f, $x unless lc $x eq fc $x; | |
66cbab2c | 1981 | } |
c00ff1c7 | 1982 | } |
7c844d17 | 1983 | report_multi_result($Locale, $locales_test_number, \@f); |
7493b8f2 | 1984 | push @problematical_tests, $locales_test_number; |
d8093b23 | 1985 | } |
78787052 JL |
1986 | |
1987 | # [perl #109318] | |
1988 | { | |
1989 | my @f = (); | |
1990 | ++$locales_test_number; | |
1991 | $test_names{$locales_test_number} = 'Verify atof with locale radix and negative exponent'; | |
1992 | ||
1993 | my $radix = POSIX::localeconv()->{decimal_point}; | |
1994 | my @nums = ( | |
1995 | "3.14e+9", "3${radix}14e+9", "3.14e-9", "3${radix}14e-9", | |
1996 | "-3.14e+9", "-3${radix}14e+9", "-3.14e-9", "-3${radix}14e-9", | |
1997 | ); | |
1998 | ||
1999 | if (! $is_utf8_locale) { | |
2000 | use locale; | |
2001 | for my $num (@nums) { | |
2002 | push @f, $num | |
2003 | unless sprintf("%g", $num) =~ /3.+14/; | |
2004 | } | |
2005 | } | |
2006 | else { | |
2007 | use locale ':not_characters'; | |
2008 | for my $num (@nums) { | |
2009 | push @f, $num | |
2010 | unless sprintf("%g", $num) =~ /3.+14/; | |
2011 | } | |
2012 | } | |
2013 | ||
30032ef4 | 2014 | report_result($Locale, $locales_test_number, @f == 0); |
78787052 JL |
2015 | if (@f) { |
2016 | print "# failed $locales_test_number locale '$Locale' numbers @f\n" | |
2017 | } | |
2018 | } | |
8ebc5c01 | 2019 | } |
284102e8 | 2020 | |
c4093d7d | 2021 | my $final_locales_test_number = $locales_test_number; |
6cf0b567 | 2022 | |
2a680da6 JH |
2023 | # Recount the errors. |
2024 | ||
0d3899b1 | 2025 | foreach $test_num ($first_locales_test_number..$final_locales_test_number) { |
c4093d7d KW |
2026 | if (%setlocale_failed) { |
2027 | print "not "; | |
2028 | } | |
0d3899b1 | 2029 | elsif ($Problem{$test_num} || !defined $Okay{$test_num} || !@{$Okay{$test_num}}) { |
c4093d7d | 2030 | if (defined $not_necessarily_a_problem_test_number |
0d3899b1 | 2031 | && $test_num == $not_necessarily_a_problem_test_number) |
c4093d7d KW |
2032 | { |
2033 | print "# The failure of test $not_necessarily_a_problem_test_number is not necessarily fatal.\n"; | |
b4e009be | 2034 | print "# It usually indicates a problem in the environment,\n"; |
284102e8 JH |
2035 | print "# not in Perl itself.\n"; |
2036 | } | |
7493b8f2 | 2037 | if ($Okay{$test_num} && grep { $_ == $test_num } @problematical_tests) { |
a69823cd | 2038 | no warnings 'experimental::aggref'; |
0a974e2d | 2039 | # Round to nearest .1% |
0d3899b1 | 2040 | my $percent_fail = (int(.5 + (1000 * scalar(keys $Problem{$test_num}) |
0a974e2d KW |
2041 | / scalar(@Locale)))) |
2042 | / 10; | |
f5627fc1 KW |
2043 | if (! $debug && $percent_fail < $acceptable_fold_failure_percentage) |
2044 | { | |
0d3899b1 | 2045 | $test_names{$test_num} .= 'TODO'; |
6d5d702a KW |
2046 | print "# ", 100 - $percent_fail, "% of locales pass the following test, so it is likely that the failures\n"; |
2047 | print "# are errors in the locale definitions. The test is marked TODO, as the\n"; | |
2048 | print "# problem is not likely to be Perl's\n"; | |
2049 | } | |
2050 | } | |
f5627fc1 KW |
2051 | print "#\n"; |
2052 | if ($debug) { | |
2053 | print "# The code points that had this failure are given above. Look for lines\n"; | |
0d3899b1 | 2054 | print "# that match 'failed $test_num'\n"; |
f5627fc1 KW |
2055 | } |
2056 | else { | |
2057 | print "# For more details, rerun, with environment variable PERL_DEBUG_FULL_TEST=1.\n"; | |
0d3899b1 | 2058 | print "# Then look at that output for lines that match 'failed $test_num'\n"; |
108a305e | 2059 | } |
284102e8 | 2060 | print "not "; |
8ebc5c01 | 2061 | } |
0d3899b1 KW |
2062 | print "ok $test_num"; |
2063 | if (defined $test_names{$test_num}) { | |
6c2e653d | 2064 | # If TODO is in the test name, make it thus |
0d3899b1 KW |
2065 | my $todo = $test_names{$test_num} =~ s/TODO\s*//; |
2066 | print " $test_names{$test_num}"; | |
6c2e653d KW |
2067 | print " # TODO" if $todo; |
2068 | } | |
c4093d7d | 2069 | print "\n"; |
8ebc5c01 | 2070 | } |
fb73857a | 2071 | |
c4093d7d | 2072 | $test_num = $final_locales_test_number; |
c213d471 | 2073 | |
083ace8f CBW |
2074 | unless ( $^O eq 'dragonfly' ) { |
2075 | # perl #115808 | |
fbd840df KW |
2076 | use warnings; |
2077 | my $warned = 0; | |
2078 | local $SIG{__WARN__} = sub { | |
2079 | $warned = $_[0] =~ /uninitialized/; | |
2080 | }; | |
2081 | my $z = "y" . setlocale(&POSIX::LC_ALL, "xyzzy"); | |
2082 | ok($warned, "variable set to setlocale(BAD LOCALE) is considered uninitialized"); | |
2083 | } | |
2084 | ||
094a2f8c | 2085 | # Test that tainting and case changing works on utf8 strings. These tests are |
1f5852c9 KW |
2086 | # placed last to avoid disturbing the hard-coded test numbers that existed at |
2087 | # the time these were added above this in this file. | |
0099bb8d KW |
2088 | # This also tests that locale overrides unicode_strings in the same scope for |
2089 | # non-utf8 strings. | |
a810e350 | 2090 | setlocale(&POSIX::LC_ALL, "C"); |
094a2f8c KW |
2091 | { |
2092 | use locale; | |
0099bb8d | 2093 | use feature 'unicode_strings'; |
094a2f8c | 2094 | |
26c1569f | 2095 | foreach my $function ("uc", "ucfirst", "lc", "lcfirst", "fc") { |
094a2f8c KW |
2096 | my @list; # List of code points to test for $function |
2097 | ||
2098 | # Used to calculate the changed case for ASCII characters by using the | |
2099 | # ord, instead of using one of the functions under test. | |
2100 | my $ascii_case_change_delta; | |
2101 | my $above_latin1_case_change_delta; # Same for the specific ords > 255 | |
2102 | # that we use | |
2103 | ||
2104 | # We test an ASCII character, which should change case and be tainted; | |
2105 | # a Latin1 character, which shouldn't change case under this C locale, | |
2106 | # and is tainted. | |
2107 | # an above-Latin1 character that when the case is changed would cross | |
2108 | # the 255/256 boundary, so doesn't change case and isn't tainted | |
2109 | # (the \x{149} is one of these, but changes into 2 characters, the | |
2110 | # first one of which doesn't cross the boundary. | |
2111 | # the final one in each list is an above-Latin1 character whose case | |
2112 | # does change, and shouldn't be tainted. The code below uses its | |
2113 | # position in its list as a marker to indicate that it, unlike the | |
2114 | # other code points above ASCII, has a successful case change | |
2115 | if ($function =~ /^u/) { | |
094a2f8c KW |
2116 | @list = ("", "a", "\xe0", "\xff", "\x{fb00}", "\x{149}", "\x{101}"); |
2117 | $ascii_case_change_delta = -32; | |
2118 | $above_latin1_case_change_delta = -1; | |
2119 | } | |
2120 | else { | |
1ca267a5 | 2121 | @list = ("", "A", "\xC0", "\x{17F}", "\x{100}"); |
094a2f8c KW |
2122 | $ascii_case_change_delta = +32; |
2123 | $above_latin1_case_change_delta = +1; | |
2124 | } | |
66cbab2c | 2125 | foreach my $is_utf8_locale (0 .. 1) { |
71e5cbb3 KW |
2126 | foreach my $j (0 .. $#list) { |
2127 | my $char = $list[$j]; | |
0099bb8d KW |
2128 | |
2129 | for my $encoded_in_utf8 (0 .. 1) { | |
faf0c248 KW |
2130 | my $should_be; |
2131 | my $changed; | |
2132 | if (! $is_utf8_locale) { | |
2133 | $should_be = ($j == $#list) | |
2134 | ? chr(ord($char) + $above_latin1_case_change_delta) | |
2135 | : (length $char == 0 || ord($char) > 127) | |
2136 | ? $char | |
2137 | : chr(ord($char) + $ascii_case_change_delta); | |
2138 | ||
2139 | # This monstrosity is in order to avoid using an eval, | |
2140 | # which might perturb the results | |
2141 | $changed = ($function eq "uc") | |
2142 | ? uc($char) | |
2143 | : ($function eq "ucfirst") | |
2144 | ? ucfirst($char) | |
2145 | : ($function eq "lc") | |
2146 | ? lc($char) | |
2147 | : ($function eq "lcfirst") | |
2148 | ? lcfirst($char) | |
26c1569f KW |
2149 | : ($function eq "fc") |
2150 | ? fc($char) | |
faf0c248 KW |
2151 | : die("Unexpected function \"$function\""); |
2152 | } | |
2153 | else { | |
2154 | { | |
2155 | no locale; | |
71e5cbb3 | 2156 | |
faf0c248 KW |
2157 | # For utf8-locales the case changing functions |
2158 | # should work just like they do outside of locale. | |
2159 | # Can use eval here because not testing it when | |
2160 | # not in locale. | |
2161 | $should_be = eval "$function('$char')"; | |
2162 | die "Unexpected eval error $@ from 'eval \"$function('$char')\"'" if $@; | |
71e5cbb3 | 2163 | |
faf0c248 KW |
2164 | } |
2165 | use locale ':not_characters'; | |
2166 | $changed = ($function eq "uc") | |
2167 | ? uc($char) | |
2168 | : ($function eq "ucfirst") | |
2169 | ? ucfirst($char) | |
2170 | : ($function eq "lc") | |
2171 | ? lc($char) | |
2172 | : ($function eq "lcfirst") | |
2173 | ? lcfirst($char) | |
26c1569f KW |
2174 | : ($function eq "fc") |
2175 | ? fc($char) | |
faf0c248 | 2176 | : die("Unexpected function \"$function\""); |
71e5cbb3 | 2177 | } |
faf0c248 KW |
2178 | ok($changed eq $should_be, |
2179 | "$function(\"$char\") in C locale " | |
2180 | . (($is_utf8_locale) | |
2181 | ? "(use locale ':not_characters'" | |
2182 | : "(use locale") | |
2183 | . (($encoded_in_utf8) | |
2184 | ? "; encoded in utf8)" | |
2185 | : "; not encoded in utf8)") | |
2186 | . " should be \"$should_be\", got \"$changed\""); | |
2187 | ||
2188 | # Tainting shouldn't happen for utf8 locales, empty | |
2189 | # strings, or those characters above 255. | |
2190 | (! $is_utf8_locale && length($char) > 0 && ord($char) < 256) | |
2191 | ? check_taint($changed) | |
2192 | : check_taint_not($changed); | |
2193 | ||
2194 | # Use UTF-8 next time through the loop | |
2195 | utf8::upgrade($char); | |
0099bb8d | 2196 | } |
66cbab2c | 2197 | } |
094a2f8c KW |
2198 | } |
2199 | } | |
2200 | } | |
2201 | ||
1bfe8fea KW |
2202 | # Give final advice. |
2203 | ||
2204 | my $didwarn = 0; | |
2205 | ||
2206 | foreach ($first_locales_test_number..$final_locales_test_number) { | |
2207 | if ($Problem{$_}) { | |
2208 | my @f = sort keys %{ $Problem{$_} }; | |
2209 | my $f = join(" ", @f); | |
2210 | $f =~ s/(.{50,60}) /$1\n#\t/g; | |
2211 | ||
2212 | "#\n", | |
2213 | "# The locale ", (@f == 1 ? "definition" : "definitions"), "\n#\n", | |
2214 | "#\t", $f, "\n#\n", | |
2215 | "# on your system may have errors because the locale test $_\n", | |
2216 | "# \"$test_names{$_}\"\n", | |
2217 | "# failed in ", (@f == 1 ? "that locale" : "those locales"), | |
2218 | ".\n"; | |
2219 | print <<EOW; | |
2220 | # | |
2221 | # If your users are not using these locales you are safe for the moment, | |
2222 | # but please report this failure first to perlbug\@perl.com using the | |
2223 | # perlbug script (as described in the INSTALL file) so that the exact | |
2224 | # details of the failures can be sorted out first and then your operating | |
2225 | # system supplier can be alerted about these anomalies. | |
2226 | # | |
2227 | EOW | |
2228 | $didwarn = 1; | |
2229 | } | |
2230 | } | |
2231 | ||
2232 | # Tell which locales were okay and which were not. | |
2233 | ||
2234 | if ($didwarn) { | |
2235 | my (@s, @F); | |
2236 | ||
2237 | foreach my $l (@Locale) { | |
2238 | my $p = 0; | |
2239 | if ($setlocale_failed{$l}) { | |
2240 | $p++; | |
2241 | } | |
2242 | else { | |
2243 | foreach my $t | |
2244 | ($first_locales_test_number..$final_locales_test_number) | |
2245 | { | |
2246 | $p++ if $Problem{$t}{$l}; | |
2247 | } | |
2248 | } | |
2249 | push @s, $l if $p == 0; | |
2250 | push @F, $l unless $p == 0; | |
2251 | } | |
2252 | ||
2253 | if (@s) { | |
2254 | my $s = join(" ", @s); | |
2255 | $s =~ s/(.{50,60}) /$1\n#\t/g; | |
2256 | ||
2257 | warn | |
2258 | "# The following locales\n#\n", | |
2259 | "#\t", $s, "\n#\n", | |
2260 | "# tested okay.\n#\n", | |
2261 | } else { | |
2262 | warn "# None of your locales were fully okay.\n"; | |
2263 | } | |
2264 | ||
2265 | if (@F) { | |
2266 | my $F = join(" ", @F); | |
2267 | $F =~ s/(.{50,60}) /$1\n#\t/g; | |
2268 | ||
c7e7f6b7 KW |
2269 | my $details = ""; |
2270 | unless ($debug) { | |
2271 | $details = "# For more details, rerun, with environment variable PERL_DEBUG_FULL_TEST=1.\n"; | |
2272 | } | |
2273 | elsif ($debug == 1) { | |
2274 | $details = "# For even more details, rerun, with environment variable PERL_DEBUG_FULL_TEST=2.\n"; | |
2275 | } | |
2276 | ||
1bfe8fea KW |
2277 | warn |
2278 | "# The following locales\n#\n", | |
2279 | "#\t", $F, "\n#\n", | |
2280 | "# had problems.\n#\n", | |
c7e7f6b7 | 2281 | $details; |
1bfe8fea KW |
2282 | } else { |
2283 | warn "# None of your locales were broken.\n"; | |
2284 | } | |
2285 | } | |
2286 | ||
fdf053ee | 2287 | print "1..$test_num\n"; |
906f284f | 2288 | |
90248788 | 2289 | # eof |