Commit | Line | Data |
---|---|---|
8ebc5c01 | 1 | #!./perl -wT |
2 | ||
66cbab2c KW |
3 | # This tests plain 'use locale' and adorned 'use locale ":not_characters"' |
4 | # Because these pragmas are compile time, and I (khw) am trying to test | |
5 | # without using 'eval' as much as possible, which might cloud the issue, the | |
6 | # crucial parts of the code are duplicated in a block for each pragma. | |
7 | ||
6c2e653d KW |
8 | # To make a TODO test, add the string 'TODO' to its %test_names value |
9 | ||
e3a2734b KW |
10 | binmode STDOUT, ':utf8'; |
11 | binmode STDERR, ':utf8'; | |
12 | ||
8ebc5c01 | 13 | BEGIN { |
14 | chdir 't' if -d 't'; | |
20822f61 | 15 | @INC = '../lib'; |
f9cbebe1 | 16 | unshift @INC, '.'; |
b002077a | 17 | require Config; import Config; |
97a0514d | 18 | if (!$Config{d_setlocale} || $Config{ccflags} =~ /\bD?NO_LOCALE\b/) { |
b002077a CS |
19 | print "1..0\n"; |
20 | exit; | |
21 | } | |
2de3dbcc | 22 | $| = 1; |
8ebc5c01 | 23 | } |
24 | ||
25 | use strict; | |
26c1569f | 26 | use feature 'fc'; |
8ebc5c01 | 27 | |
108a305e | 28 | my $debug = $ENV{PERL_DEBUG_FULL_TEST} // 0; |
284102e8 | 29 | |
6d5d702a KW |
30 | # Certain tests have been shown to be problematical for a few locales. Don't |
31 | # fail them unless at least this percentage of the tested locales fail. | |
32 | my $acceptable_fold_failure_percentage = 5; | |
33 | ||
db4b7445 A |
34 | use Dumpvalue; |
35 | ||
36 | my $dumper = Dumpvalue->new( | |
37 | tick => qq{"}, | |
38 | quoteHighBit => 0, | |
39 | unctrl => "quote" | |
40 | ); | |
6be75cd7 | 41 | sub debug { |
db4b7445 A |
42 | return unless $debug; |
43 | my($mess) = join "", @_; | |
44 | chop $mess; | |
45 | print $dumper->stringify($mess,1), "\n"; | |
6be75cd7 JH |
46 | } |
47 | ||
48 | sub debugf { | |
49 | printf @_ if $debug; | |
50 | } | |
51 | ||
8ebc5c01 | 52 | my $have_setlocale = 0; |
53 | eval { | |
54 | require POSIX; | |
55 | import POSIX ':locale_h'; | |
56 | $have_setlocale++; | |
57 | }; | |
58 | ||
6dead956 | 59 | # Visual C's CRT goes silly on strings of the form "en_US.ISO8859-1" |
f6c6487a | 60 | # and mingw32 uses said silly CRT |
3a2d1764 SH |
61 | # This doesn't seem to be an issue any more, at least on Windows XP, |
62 | # so re-enable the tests for Windows XP onwards. | |
63 | my $winxp = ($^O eq 'MSWin32' && defined &Win32::GetOSVersion && | |
64 | join('.', (Win32::GetOSVersion())[1..2]) >= 5.1); | |
65 | $have_setlocale = 0 if ((($^O eq 'MSWin32' && !$winxp) || $^O eq 'NetWare') && | |
66 | $Config{cc} =~ /^(cl|gcc)/i); | |
6dead956 | 67 | |
36a42ae7 | 68 | # UWIN seems to loop after taint tests, just skip for now |
cd19b65c JH |
69 | $have_setlocale = 0 if ($^O =~ /^uwin/); |
70 | ||
9a66ea41 | 71 | sub LC_ALL (); |
8ebc5c01 | 72 | |
0e053d1e | 73 | $a = 'abc %'; |
8ebc5c01 | 74 | |
c213d471 KW |
75 | my $test_num = 0; |
76 | ||
8ebc5c01 | 77 | sub ok { |
c213d471 | 78 | my ($result, $message) = @_; |
e3a2734b | 79 | $message = "" unless defined $message; |
8ebc5c01 | 80 | |
81 | print 'not ' unless ($result); | |
c213d471 | 82 | print "ok " . ++$test_num; |
e3a2734b KW |
83 | print " $message"; |
84 | print "\n"; | |
8ebc5c01 | 85 | } |
86 | ||
87 | # First we'll do a lot of taint checking for locales. | |
88 | # This is the easiest to test, actually, as any locale, | |
89 | # even the default locale will taint under 'use locale'. | |
90 | ||
91 | sub is_tainted { # hello, camel two. | |
9f1b1f2d | 92 | no warnings 'uninitialized' ; |
8ebc5c01 | 93 | my $dummy; |
ba74571d | 94 | local $@; |
8ebc5c01 | 95 | not eval { $dummy = join("", @_), kill 0; 1 } |
96 | } | |
97 | ||
a9b7c637 KW |
98 | sub check_taint ($;$) { |
99 | my $message_tail = $_[1] // ""; | |
100 | $message_tail = ": $message_tail" if $message_tail; | |
101 | ok is_tainted($_[0]), "verify that is tainted$message_tail"; | |
8ebc5c01 | 102 | } |
103 | ||
a9b7c637 KW |
104 | sub check_taint_not ($;$) { |
105 | my $message_tail = $_[1] // ""; | |
106 | $message_tail = ": $message_tail" if $message_tail; | |
107 | ok((not is_tainted($_[0])), "verify that isn't tainted$message_tail"); | |
8ebc5c01 | 108 | } |
109 | ||
bf3cd0e6 KW |
110 | "\tb\t" =~ /^m?(\s)(.*)\1$/; |
111 | check_taint_not $&, "not tainted outside 'use locale'"; | |
112 | ; | |
113 | ||
8ebc5c01 | 114 | use locale; # engage locale and therefore locale taint. |
115 | ||
36a42ae7 | 116 | check_taint_not $a; |
8ebc5c01 | 117 | |
36a42ae7 KW |
118 | check_taint uc($a); |
119 | check_taint "\U$a"; | |
120 | check_taint ucfirst($a); | |
121 | check_taint "\u$a"; | |
122 | check_taint lc($a); | |
26c1569f | 123 | check_taint fc($a); |
36a42ae7 | 124 | check_taint "\L$a"; |
26c1569f | 125 | check_taint "\F$a"; |
36a42ae7 KW |
126 | check_taint lcfirst($a); |
127 | check_taint "\l$a"; | |
8ebc5c01 | 128 | |
36a42ae7 KW |
129 | check_taint_not sprintf('%e', 123.456); |
130 | check_taint_not sprintf('%f', 123.456); | |
131 | check_taint_not sprintf('%g', 123.456); | |
132 | check_taint_not sprintf('%d', 123.456); | |
133 | check_taint_not sprintf('%x', 123.456); | |
8ebc5c01 | 134 | |
135 | $_ = $a; # untaint $_ | |
136 | ||
137 | $_ = uc($a); # taint $_ | |
138 | ||
36a42ae7 | 139 | check_taint $_; |
8ebc5c01 | 140 | |
141 | /(\w)/; # taint $&, $`, $', $+, $1. | |
36a42ae7 KW |
142 | check_taint $&; |
143 | check_taint $`; | |
144 | check_taint $'; | |
145 | check_taint $+; | |
146 | check_taint $1; | |
147 | check_taint_not $2; | |
8ebc5c01 | 148 | |
149 | /(.)/; # untaint $&, $`, $', $+, $1. | |
36a42ae7 KW |
150 | check_taint_not $&; |
151 | check_taint_not $`; | |
152 | check_taint_not $'; | |
153 | check_taint_not $+; | |
154 | check_taint_not $1; | |
155 | check_taint_not $2; | |
8ebc5c01 | 156 | |
157 | /(\W)/; # taint $&, $`, $', $+, $1. | |
36a42ae7 KW |
158 | check_taint $&; |
159 | check_taint $`; | |
160 | check_taint $'; | |
161 | check_taint $+; | |
162 | check_taint $1; | |
163 | check_taint_not $2; | |
8ebc5c01 | 164 | |
165 | /(\s)/; # taint $&, $`, $', $+, $1. | |
36a42ae7 KW |
166 | check_taint $&; |
167 | check_taint $`; | |
168 | check_taint $'; | |
169 | check_taint $+; | |
170 | check_taint $1; | |
171 | check_taint_not $2; | |
8ebc5c01 | 172 | |
173 | /(\S)/; # taint $&, $`, $', $+, $1. | |
36a42ae7 KW |
174 | check_taint $&; |
175 | check_taint $`; | |
176 | check_taint $'; | |
177 | check_taint $+; | |
178 | check_taint $1; | |
179 | check_taint_not $2; | |
8ebc5c01 | 180 | |
181 | $_ = $a; # untaint $_ | |
182 | ||
36a42ae7 | 183 | check_taint_not $_; |
8ebc5c01 | 184 | |
185 | /(b)/; # this must not taint | |
36a42ae7 KW |
186 | check_taint_not $&; |
187 | check_taint_not $`; | |
188 | check_taint_not $'; | |
189 | check_taint_not $+; | |
190 | check_taint_not $1; | |
191 | check_taint_not $2; | |
8ebc5c01 | 192 | |
193 | $_ = $a; # untaint $_ | |
194 | ||
36a42ae7 | 195 | check_taint_not $_; |
8ebc5c01 | 196 | |
197 | $b = uc($a); # taint $b | |
198 | s/(.+)/$b/; # this must taint only the $_ | |
199 | ||
36a42ae7 KW |
200 | check_taint $_; |
201 | check_taint_not $&; | |
202 | check_taint_not $`; | |
203 | check_taint_not $'; | |
204 | check_taint_not $+; | |
205 | check_taint_not $1; | |
206 | check_taint_not $2; | |
8ebc5c01 | 207 | |
208 | $_ = $a; # untaint $_ | |
209 | ||
210 | s/(.+)/b/; # this must not taint | |
36a42ae7 KW |
211 | check_taint_not $_; |
212 | check_taint_not $&; | |
213 | check_taint_not $`; | |
214 | check_taint_not $'; | |
215 | check_taint_not $+; | |
216 | check_taint_not $1; | |
217 | check_taint_not $2; | |
8ebc5c01 | 218 | |
219 | $b = $a; # untaint $b | |
220 | ||
221 | ($b = $a) =~ s/\w/$&/; | |
36a42ae7 KW |
222 | check_taint $b; # $b should be tainted. |
223 | check_taint_not $a; # $a should be not. | |
8ebc5c01 | 224 | |
225 | $_ = $a; # untaint $_ | |
226 | ||
227 | s/(\w)/\l$1/; # this must taint | |
36a42ae7 KW |
228 | check_taint $_; |
229 | check_taint $&; | |
230 | check_taint $`; | |
231 | check_taint $'; | |
232 | check_taint $+; | |
233 | check_taint $1; | |
234 | check_taint_not $2; | |
8ebc5c01 | 235 | |
236 | $_ = $a; # untaint $_ | |
237 | ||
238 | s/(\w)/\L$1/; # this must taint | |
36a42ae7 KW |
239 | check_taint $_; |
240 | check_taint $&; | |
241 | check_taint $`; | |
242 | check_taint $'; | |
243 | check_taint $+; | |
244 | check_taint $1; | |
245 | check_taint_not $2; | |
8ebc5c01 | 246 | |
247 | $_ = $a; # untaint $_ | |
248 | ||
249 | s/(\w)/\u$1/; # this must taint | |
36a42ae7 KW |
250 | check_taint $_; |
251 | check_taint $&; | |
252 | check_taint $`; | |
253 | check_taint $'; | |
254 | check_taint $+; | |
255 | check_taint $1; | |
256 | check_taint_not $2; | |
8ebc5c01 | 257 | |
258 | $_ = $a; # untaint $_ | |
259 | ||
260 | s/(\w)/\U$1/; # this must taint | |
36a42ae7 KW |
261 | check_taint $_; |
262 | check_taint $&; | |
263 | check_taint $`; | |
264 | check_taint $'; | |
265 | check_taint $+; | |
266 | check_taint $1; | |
267 | check_taint_not $2; | |
8ebc5c01 | 268 | |
269 | # After all this tainting $a should be cool. | |
270 | ||
36a42ae7 | 271 | check_taint_not $a; |
8ebc5c01 | 272 | |
66cbab2c KW |
273 | { # This is just the previous tests copied here with a different |
274 | # compile-time pragma. | |
275 | ||
276 | use locale ':not_characters'; # engage restricted locale with different | |
277 | # tainting rules | |
278 | ||
279 | check_taint_not $a; | |
280 | ||
281 | check_taint_not uc($a); | |
282 | check_taint_not "\U$a"; | |
283 | check_taint_not ucfirst($a); | |
284 | check_taint_not "\u$a"; | |
285 | check_taint_not lc($a); | |
26c1569f | 286 | check_taint_not fc($a); |
66cbab2c | 287 | check_taint_not "\L$a"; |
26c1569f | 288 | check_taint_not "\F$a"; |
66cbab2c KW |
289 | check_taint_not lcfirst($a); |
290 | check_taint_not "\l$a"; | |
291 | ||
292 | check_taint_not sprintf('%e', 123.456); | |
293 | check_taint_not sprintf('%f', 123.456); | |
294 | check_taint_not sprintf('%g', 123.456); | |
295 | check_taint_not sprintf('%d', 123.456); | |
296 | check_taint_not sprintf('%x', 123.456); | |
297 | ||
298 | $_ = $a; # untaint $_ | |
299 | ||
300 | $_ = uc($a); # taint $_ | |
301 | ||
302 | check_taint_not $_; | |
303 | ||
304 | /(\w)/; # taint $&, $`, $', $+, $1. | |
305 | check_taint_not $&; | |
306 | check_taint_not $`; | |
307 | check_taint_not $'; | |
308 | check_taint_not $+; | |
309 | check_taint_not $1; | |
310 | check_taint_not $2; | |
311 | ||
312 | /(.)/; # untaint $&, $`, $', $+, $1. | |
313 | check_taint_not $&; | |
314 | check_taint_not $`; | |
315 | check_taint_not $'; | |
316 | check_taint_not $+; | |
317 | check_taint_not $1; | |
318 | check_taint_not $2; | |
319 | ||
320 | /(\W)/; # taint $&, $`, $', $+, $1. | |
321 | check_taint_not $&; | |
322 | check_taint_not $`; | |
323 | check_taint_not $'; | |
324 | check_taint_not $+; | |
325 | check_taint_not $1; | |
326 | check_taint_not $2; | |
327 | ||
328 | /(\s)/; # taint $&, $`, $', $+, $1. | |
329 | check_taint_not $&; | |
330 | check_taint_not $`; | |
331 | check_taint_not $'; | |
332 | check_taint_not $+; | |
333 | check_taint_not $1; | |
334 | check_taint_not $2; | |
335 | ||
336 | /(\S)/; # taint $&, $`, $', $+, $1. | |
337 | check_taint_not $&; | |
338 | check_taint_not $`; | |
339 | check_taint_not $'; | |
340 | check_taint_not $+; | |
341 | check_taint_not $1; | |
342 | check_taint_not $2; | |
343 | ||
344 | $_ = $a; # untaint $_ | |
345 | ||
346 | check_taint_not $_; | |
347 | ||
348 | /(b)/; # this must not taint | |
349 | check_taint_not $&; | |
350 | check_taint_not $`; | |
351 | check_taint_not $'; | |
352 | check_taint_not $+; | |
353 | check_taint_not $1; | |
354 | check_taint_not $2; | |
355 | ||
356 | $_ = $a; # untaint $_ | |
357 | ||
358 | check_taint_not $_; | |
359 | ||
360 | $b = uc($a); # taint $b | |
361 | s/(.+)/$b/; # this must taint only the $_ | |
362 | ||
363 | check_taint_not $_; | |
364 | check_taint_not $&; | |
365 | check_taint_not $`; | |
366 | check_taint_not $'; | |
367 | check_taint_not $+; | |
368 | check_taint_not $1; | |
369 | check_taint_not $2; | |
370 | ||
371 | $_ = $a; # untaint $_ | |
372 | ||
373 | s/(.+)/b/; # this must not taint | |
374 | check_taint_not $_; | |
375 | check_taint_not $&; | |
376 | check_taint_not $`; | |
377 | check_taint_not $'; | |
378 | check_taint_not $+; | |
379 | check_taint_not $1; | |
380 | check_taint_not $2; | |
381 | ||
382 | $b = $a; # untaint $b | |
383 | ||
384 | ($b = $a) =~ s/\w/$&/; | |
385 | check_taint_not $b; # $b should be tainted. | |
386 | check_taint_not $a; # $a should be not. | |
387 | ||
388 | $_ = $a; # untaint $_ | |
389 | ||
390 | s/(\w)/\l$1/; # this must taint | |
391 | check_taint_not $_; | |
392 | check_taint_not $&; | |
393 | check_taint_not $`; | |
394 | check_taint_not $'; | |
395 | check_taint_not $+; | |
396 | check_taint_not $1; | |
397 | check_taint_not $2; | |
398 | ||
399 | $_ = $a; # untaint $_ | |
400 | ||
401 | s/(\w)/\L$1/; # this must taint | |
402 | check_taint_not $_; | |
403 | check_taint_not $&; | |
404 | check_taint_not $`; | |
405 | check_taint_not $'; | |
406 | check_taint_not $+; | |
407 | check_taint_not $1; | |
408 | check_taint_not $2; | |
409 | ||
410 | $_ = $a; # untaint $_ | |
411 | ||
412 | s/(\w)/\u$1/; # this must taint | |
413 | check_taint_not $_; | |
414 | check_taint_not $&; | |
415 | check_taint_not $`; | |
416 | check_taint_not $'; | |
417 | check_taint_not $+; | |
418 | check_taint_not $1; | |
419 | check_taint_not $2; | |
420 | ||
421 | $_ = $a; # untaint $_ | |
422 | ||
423 | s/(\w)/\U$1/; # this must taint | |
424 | check_taint_not $_; | |
425 | check_taint_not $&; | |
426 | check_taint_not $`; | |
427 | check_taint_not $'; | |
428 | check_taint_not $+; | |
429 | check_taint_not $1; | |
430 | check_taint_not $2; | |
431 | ||
432 | # After all this tainting $a should be cool. | |
433 | ||
434 | check_taint_not $a; | |
435 | } | |
436 | ||
437 | # Here are in scope of 'use locale' | |
438 | ||
8ebc5c01 | 439 | # I think we've seen quite enough of taint. |
440 | # Let us do some *real* locale work now, | |
284102e8 | 441 | # unless setlocale() is missing (i.e. minitest). |
8ebc5c01 | 442 | |
fdf053ee KW |
443 | unless ($have_setlocale) { |
444 | print "1..$test_num\n"; | |
445 | exit; | |
446 | } | |
8ebc5c01 | 447 | |
6cf0b567 | 448 | # The test number before our first setlocale() |
66330f13 | 449 | my $final_without_setlocale = $test_num; |
6cf0b567 | 450 | |
284102e8 JH |
451 | # Find locales. |
452 | ||
6be75cd7 JH |
453 | debug "# Scanning for locales...\n"; |
454 | ||
455 | # Note that it's okay that some languages have their native names | |
456 | # capitalized here even though that's not "right". They are lowercased | |
457 | # anyway later during the scanning process (and besides, some clueless | |
98dc9551 | 458 | # vendor might have them capitalized erroneously anyway). |
6be75cd7 | 459 | |
284102e8 | 460 | my $locales = <<EOF; |
6be75cd7 | 461 | Afrikaans:af:za:1 15 |
284102e8 | 462 | Arabic:ar:dz eg sa:6 arabic8 |
6be75cd7 JH |
463 | Brezhoneg Breton:br:fr:1 15 |
464 | Bulgarski Bulgarian:bg:bg:5 | |
dd8482fc | 465 | Chinese:zh:cn tw:cn.EUC eucCN eucTW euc.CN euc.TW Big5 GB2312 tw.EUC |
6be75cd7 JH |
466 | Hrvatski Croatian:hr:hr:2 |
467 | Cymraeg Welsh:cy:cy:1 14 15 | |
284102e8 | 468 | Czech:cs:cz:2 |
df8a53a3 | 469 | Dansk Danish:da:dk:1 15 |
6be75cd7 | 470 | Nederlands Dutch:nl:be nl:1 15 |
dd8482fc | 471 | English American British:en:au ca gb ie nz us uk zw:1 15 cp850 |
6be75cd7 JH |
472 | Esperanto:eo:eo:3 |
473 | Eesti Estonian:et:ee:4 6 13 | |
474 | Suomi Finnish:fi:fi:1 15 | |
475 | Flamish::fl:1 15 | |
6be75cd7 JH |
476 | Deutsch German:de:at be ch de lu:1 15 |
477 | Euskaraz Basque:eu:es fr:1 15 | |
6be75cd7 JH |
478 | Galego Galician:gl:es:1 15 |
479 | Ellada Greek:el:gr:7 g8 | |
6be75cd7 JH |
480 | Frysk:fy:nl:1 15 |
481 | Greenlandic:kl:gl:4 6 | |
284102e8 JH |
482 | Hebrew:iw:il:8 hebrew8 |
483 | Hungarian:hu:hu:2 | |
df8a53a3 | 484 | Indonesian:id:id:1 15 |
6be75cd7 JH |
485 | Gaeilge Irish:ga:IE:1 14 15 |
486 | Italiano Italian:it:ch it:1 15 | |
487 | Nihongo Japanese:ja:jp:euc eucJP jp.EUC sjis | |
284102e8 | 488 | Korean:ko:kr: |
6be75cd7 JH |
489 | Latine Latin:la:va:1 15 |
490 | Latvian:lv:lv:4 6 13 | |
491 | Lithuanian:lt:lt:4 6 13 | |
492 | Macedonian:mk:mk:1 15 | |
493 | Maltese:mt:mt:3 | |
dd8482fc | 494 | Moldovan:mo:mo:2 |
df8a53a3 | 495 | Norsk Norwegian:no no\@nynorsk nb nn:no:1 15 |
6be75cd7 JH |
496 | Occitan:oc:es:1 15 |
497 | Polski Polish:pl:pl:2 | |
284102e8 | 498 | Rumanian:ro:ro:2 |
a528dad0 | 499 | Russki Russian:ru:ru su ua:5 koi8 koi8r KOI8-R koi8u cp1251 cp866 |
6be75cd7 | 500 | Serbski Serbian:sr:yu:5 |
284102e8 | 501 | Slovak:sk:sk:2 |
6be75cd7 | 502 | Slovene Slovenian:sl:si:2 |
d43ce814 JH |
503 | Sqhip Albanian:sq:sq:1 15 |
504 | Svenska Swedish:sv:fi se:1 15 | |
6be75cd7 | 505 | Thai:th:th:11 tis620 |
284102e8 | 506 | Turkish:tr:tr:9 turkish8 |
dd8482fc | 507 | Yiddish:yi::1 15 |
284102e8 JH |
508 | EOF |
509 | ||
ee50adbe | 510 | if ($^O eq 'os390') { |
dd8482fc | 511 | # These cause heartburn. Broken locales? |
ee50adbe PP |
512 | $locales =~ s/Svenska Swedish:sv:fi se:1 15\n//; |
513 | $locales =~ s/Thai:th:th:11 tis620\n//; | |
514 | } | |
515 | ||
ef4a39e5 | 516 | sub in_utf8 () { $^H & 0x08 || (${^OPEN} || "") =~ /:utf8/ } |
f9cbebe1 JH |
517 | |
518 | if (in_utf8) { | |
8a6cb2cb | 519 | require "lib/locale/utf8"; |
f9cbebe1 | 520 | } else { |
8a6cb2cb | 521 | require "lib/locale/latin1"; |
f9cbebe1 JH |
522 | } |
523 | ||
284102e8 JH |
524 | my @Locale; |
525 | my $Locale; | |
526 | my @Alnum_; | |
527 | ||
284102e8 JH |
528 | sub trylocale { |
529 | my $locale = shift; | |
0b9f254b | 530 | return if grep { $locale eq $_ } @Locale; |
e439cacb KW |
531 | return unless setlocale(LC_ALL, $locale); |
532 | my $badutf8; | |
533 | { | |
534 | local $SIG{__WARN__} = sub { | |
535 | $badutf8 = $_[0] =~ /Malformed UTF-8/; | |
536 | }; | |
537 | $Locale =~ /UTF-?8/i; | |
284102e8 | 538 | } |
e439cacb KW |
539 | |
540 | if ($badutf8) { | |
541 | ok(0, "Locale name contains malformed utf8"); | |
542 | return; | |
543 | } | |
544 | push @Locale, $locale; | |
284102e8 | 545 | } |
8ebc5c01 | 546 | |
284102e8 JH |
547 | sub decode_encodings { |
548 | my @enc; | |
8ebc5c01 | 549 | |
284102e8 JH |
550 | foreach (split(/ /, shift)) { |
551 | if (/^(\d+)$/) { | |
552 | push @enc, "ISO8859-$1"; | |
553 | push @enc, "iso8859$1"; # HP | |
554 | if ($1 eq '1') { | |
555 | push @enc, "roman8"; # HP | |
556 | } | |
557 | } else { | |
558 | push @enc, $_; | |
dd8482fc | 559 | push @enc, "$_.UTF-8"; |
8ebc5c01 | 560 | } |
561 | } | |
ee50adbe PP |
562 | if ($^O eq 'os390') { |
563 | push @enc, qw(IBM-037 IBM-819 IBM-1047); | |
564 | } | |
8ebc5c01 | 565 | |
284102e8 | 566 | return @enc; |
8ebc5c01 | 567 | } |
568 | ||
284102e8 JH |
569 | trylocale("C"); |
570 | trylocale("POSIX"); | |
571 | foreach (0..15) { | |
572 | trylocale("ISO8859-$_"); | |
284102e8 | 573 | trylocale("iso8859$_"); |
097ee67d JH |
574 | trylocale("iso8859-$_"); |
575 | trylocale("iso_8859_$_"); | |
576 | trylocale("isolatin$_"); | |
577 | trylocale("isolatin-$_"); | |
578 | trylocale("iso_latin_$_"); | |
8ebc5c01 | 579 | } |
580 | ||
645e49ed JH |
581 | # Sanitize the environment so that we can run the external 'locale' |
582 | # program without the taint mode getting grumpy. | |
cce5967e JH |
583 | |
584 | # $ENV{PATH} is special in VMS. | |
585 | delete $ENV{PATH} if $^O ne 'VMS' or $Config{d_setenv}; | |
586 | ||
587 | # Other subversive stuff. | |
588 | delete @ENV{qw(IFS CDPATH ENV BASH_ENV)}; | |
dd8482fc | 589 | |
21477fb4 | 590 | if (-x "/usr/bin/locale" && open(LOCALES, "/usr/bin/locale -a 2>/dev/null|")) { |
dd8482fc | 591 | while (<LOCALES>) { |
d281a6ac NC |
592 | # It seems that /usr/bin/locale steadfastly outputs 8 bit data, which |
593 | # ain't great when we're running this testPERL_UNICODE= so that utf8 | |
594 | # locales will cause all IO hadles to default to (assume) utf8 | |
595 | next unless utf8::valid($_); | |
dd8482fc JH |
596 | chomp; |
597 | trylocale($_); | |
284102e8 | 598 | } |
dd8482fc | 599 | close(LOCALES); |
a6259068 | 600 | } elsif ($^O eq 'VMS' && defined($ENV{'SYS$I18N_LOCALE'}) && -d 'SYS$I18N_LOCALE') { |
71e5cbb3 | 601 | # The SYS$I18N_LOCALE logical name search list was not present on |
a6259068 PP |
602 | # VAX VMS V5.5-12, but was on AXP && VAX VMS V6.2 as well as later versions. |
603 | opendir(LOCALES, "SYS\$I18N_LOCALE:"); | |
604 | while ($_ = readdir(LOCALES)) { | |
605 | chomp; | |
606 | trylocale($_); | |
607 | } | |
608 | close(LOCALES); | |
87e33296 SP |
609 | } elsif ($^O eq 'openbsd' && -e '/usr/share/locale') { |
610 | ||
611 | # OpenBSD doesn't have a locale executable, so reading /usr/share/locale | |
612 | # is much easier and faster than the last resort method. | |
613 | ||
614 | opendir(LOCALES, '/usr/share/locale'); | |
615 | while ($_ = readdir(LOCALES)) { | |
616 | chomp; | |
617 | trylocale($_); | |
618 | } | |
619 | close(LOCALES); | |
dd8482fc JH |
620 | } else { |
621 | ||
622 | # This is going to be slow. | |
623 | ||
624 | foreach my $locale (split(/\n/, $locales)) { | |
625 | my ($locale_name, $language_codes, $country_codes, $encodings) = | |
626 | split(/:/, $locale); | |
627 | my @enc = decode_encodings($encodings); | |
628 | foreach my $loc (split(/ /, $locale_name)) { | |
629 | trylocale($loc); | |
284102e8 | 630 | foreach my $enc (@enc) { |
dd8482fc | 631 | trylocale("$loc.$enc"); |
284102e8 | 632 | } |
dd8482fc | 633 | $loc = lc $loc; |
284102e8 | 634 | foreach my $enc (@enc) { |
dd8482fc JH |
635 | trylocale("$loc.$enc"); |
636 | } | |
637 | } | |
638 | foreach my $lang (split(/ /, $language_codes)) { | |
639 | trylocale($lang); | |
640 | foreach my $country (split(/ /, $country_codes)) { | |
641 | my $lc = "${lang}_${country}"; | |
642 | trylocale($lc); | |
643 | foreach my $enc (@enc) { | |
644 | trylocale("$lc.$enc"); | |
645 | } | |
646 | my $lC = "${lang}_\U${country}"; | |
647 | trylocale($lC); | |
648 | foreach my $enc (@enc) { | |
649 | trylocale("$lC.$enc"); | |
650 | } | |
284102e8 JH |
651 | } |
652 | } | |
653 | } | |
654 | } | |
4599a1de | 655 | |
d43ce814 JH |
656 | setlocale(LC_ALL, "C"); |
657 | ||
86f50d7d | 658 | if ($^O eq 'darwin') { |
4373e181 | 659 | # Darwin 8/Mac OS X 10.4 and 10.5 have bad Basque locales: perl bug #35895, |
86f50d7d | 660 | # Apple bug ID# 4139653. It also has a problem in Byelorussian. |
4373e181 RGS |
661 | (my $v) = $Config{osvers} =~ /^(\d+)/; |
662 | if ($v >= 8 and $v < 10) { | |
86f50d7d | 663 | debug "# Skipping eu_ES, be_BY locales -- buggy in Darwin\n"; |
a5ec937f | 664 | @Locale = grep ! m/^(eu_ES(?:\..*)?|be_BY\.CP1131)$/, @Locale; |
dfa5c78f | 665 | } elsif ($v < 12) { |
a44d0896 NC |
666 | debug "# Skipping be_BY locales -- buggy in Darwin\n"; |
667 | @Locale = grep ! m/^be_BY\.CP1131$/, @Locale; | |
a5ec937f | 668 | } |
86f50d7d DD |
669 | } |
670 | ||
4599a1de JH |
671 | @Locale = sort @Locale; |
672 | ||
887ef7ed PP |
673 | debug "# Locales =\n"; |
674 | for ( @Locale ) { | |
675 | debug "# $_\n"; | |
676 | } | |
8ebc5c01 | 677 | |
284102e8 | 678 | my %Problem; |
2a680da6 JH |
679 | my %Okay; |
680 | my %Testing; | |
30032ef4 | 681 | my @Added_alpha; # Alphas that aren't in the C locale. |
c08acc4c | 682 | my %test_names; |
284102e8 | 683 | |
30032ef4 KW |
684 | sub report_result { |
685 | my ($Locale, $i, $pass_fail, $message) = @_; | |
15bbd6a2 KW |
686 | $message //= ""; |
687 | $message = " ($message)" if $message; | |
30032ef4 | 688 | unless ($pass_fail) { |
2a680da6 | 689 | $Problem{$i}{$Locale} = 1; |
baae13cb | 690 | debug "# failed $i ($test_names{$i}) with locale '$Locale'$message\n"; |
2a680da6 JH |
691 | } else { |
692 | push @{$Okay{$i}}, $Locale; | |
693 | } | |
694 | } | |
695 | ||
c4093d7d KW |
696 | my $first_locales_test_number = $final_without_setlocale + 1; |
697 | my $locales_test_number; | |
698 | my $not_necessarily_a_problem_test_number; | |
6d5d702a KW |
699 | my $first_casing_test_number; |
700 | my $final_casing_test_number; | |
c4093d7d KW |
701 | my %setlocale_failed; # List of locales that setlocale() didn't work on |
702 | ||
284102e8 | 703 | foreach $Locale (@Locale) { |
c4093d7d | 704 | $locales_test_number = $first_locales_test_number - 1; |
284102e8 | 705 | debug "# Locale = $Locale\n"; |
284102e8 JH |
706 | |
707 | unless (setlocale(LC_ALL, $Locale)) { | |
c4093d7d | 708 | $setlocale_failed{$Locale} = $Locale; |
284102e8 | 709 | next; |
8ebc5c01 | 710 | } |
8ebc5c01 | 711 | |
66cbab2c KW |
712 | # We test UTF-8 locales only under ':not_characters'; otherwise they have |
713 | # documented deficiencies. Non- UTF-8 locales are tested only under plain | |
714 | # 'use locale', as otherwise we would have to convert everything in them | |
715 | # to Unicode. | |
716 | my $is_utf8_locale = $Locale =~ /UTF-?8/i; | |
717 | ||
95eaa1bf KW |
718 | my %UPPER = (); # All alpha X for which uc(X) == X and lc(X) != X |
719 | my %lower = (); # All alpha X for which lc(X) == X and uc(X) != X | |
720 | my %BoThCaSe = (); # All alpha X for which uc(X) == lc(X) == X | |
66cbab2c KW |
721 | |
722 | if (! $is_utf8_locale) { | |
723 | use locale; | |
71e5cbb3 | 724 | @Alnum_ = sort grep /\w/, map { chr } 0..255; |
f07538a5 | 725 | |
71e5cbb3 | 726 | debug "# w = ", join("",@Alnum_), "\n"; |
e5272a46 | 727 | |
71e5cbb3 KW |
728 | # Sieve the uppercase and the lowercase. |
729 | ||
730 | for (@Alnum_) { | |
731 | if (/[^\d_]/) { # skip digits and the _ | |
732 | if (uc($_) eq $_) { | |
733 | $UPPER{$_} = $_; | |
734 | } | |
735 | if (lc($_) eq $_) { | |
736 | $lower{$_} = $_; | |
737 | } | |
738 | } | |
739 | } | |
66cbab2c KW |
740 | } |
741 | else { | |
742 | use locale ':not_characters'; | |
743 | @Alnum_ = sort grep /\w/, map { chr } 0..255; | |
744 | debug "# w = ", join("",@Alnum_), "\n"; | |
745 | for (@Alnum_) { | |
746 | if (/[^\d_]/) { # skip digits and the _ | |
747 | if (uc($_) eq $_) { | |
748 | $UPPER{$_} = $_; | |
749 | } | |
750 | if (lc($_) eq $_) { | |
751 | $lower{$_} = $_; | |
752 | } | |
753 | } | |
754 | } | |
755 | } | |
284102e8 | 756 | foreach (keys %UPPER) { |
097ee67d | 757 | $BoThCaSe{$_}++ if exists $lower{$_}; |
284102e8 JH |
758 | } |
759 | foreach (keys %lower) { | |
097ee67d | 760 | $BoThCaSe{$_}++ if exists $UPPER{$_}; |
284102e8 | 761 | } |
097ee67d | 762 | foreach (keys %BoThCaSe) { |
284102e8 JH |
763 | delete $UPPER{$_}; |
764 | delete $lower{$_}; | |
765 | } | |
766 | ||
db4b7445 A |
767 | debug "# UPPER = ", join("", sort keys %UPPER ), "\n"; |
768 | debug "# lower = ", join("", sort keys %lower ), "\n"; | |
769 | debug "# BoThCaSe = ", join("", sort keys %BoThCaSe), "\n"; | |
284102e8 | 770 | |
baa71cfd | 771 | my @failures; |
3da38613 | 772 | my @fold_failures; |
baa71cfd KW |
773 | foreach my $x (sort keys %UPPER) { |
774 | my $ok; | |
3da38613 | 775 | my $fold_ok; |
baa71cfd KW |
776 | if ($is_utf8_locale) { |
777 | use locale ':not_characters'; | |
778 | $ok = $x =~ /[[:upper:]]/; | |
3da38613 | 779 | $fold_ok = $x =~ /[[:lower:]]/i; |
baa71cfd KW |
780 | } |
781 | else { | |
782 | use locale; | |
783 | $ok = $x =~ /[[:upper:]]/; | |
3da38613 | 784 | $fold_ok = $x =~ /[[:lower:]]/i; |
baa71cfd KW |
785 | } |
786 | push @failures, $x unless $ok; | |
3da38613 | 787 | push @fold_failures, $x unless $fold_ok; |
baa71cfd KW |
788 | } |
789 | my $message = ""; | |
790 | $locales_test_number++; | |
6d5d702a | 791 | $first_casing_test_number = $locales_test_number; |
95eaa1bf | 792 | $test_names{$locales_test_number} = 'Verify that /[[:upper:]]/ matches all alpha X for which uc(X) == X and lc(X) != X'; |
baa71cfd | 793 | $message = 'Failed for ' . join ", ", @failures if @failures; |
30032ef4 | 794 | report_result($Locale, $locales_test_number, scalar @failures == 0, $message); |
6d5d702a | 795 | |
3da38613 KW |
796 | $message = ""; |
797 | $locales_test_number++; | |
6d5d702a | 798 | |
95eaa1bf | 799 | $test_names{$locales_test_number} = 'Verify that /[[:lower:]]/i matches all alpha X for which uc(X) == X and lc(X) != X'; |
3da38613 | 800 | $message = 'Failed for ' . join ", ", @fold_failures if @fold_failures; |
30032ef4 | 801 | report_result($Locale, $locales_test_number, scalar @fold_failures == 0, $message); |
baa71cfd KW |
802 | |
803 | $message = ""; | |
804 | undef @failures; | |
3da38613 | 805 | undef @fold_failures; |
baa71cfd KW |
806 | |
807 | foreach my $x (sort keys %lower) { | |
808 | my $ok; | |
3da38613 | 809 | my $fold_ok; |
baa71cfd KW |
810 | if ($is_utf8_locale) { |
811 | use locale ':not_characters'; | |
812 | $ok = $x =~ /[[:lower:]]/; | |
3da38613 | 813 | $fold_ok = $x =~ /[[:upper:]]/i; |
baa71cfd KW |
814 | } |
815 | else { | |
816 | use locale; | |
817 | $ok = $x =~ /[[:lower:]]/; | |
3da38613 | 818 | $fold_ok = $x =~ /[[:upper:]]/i; |
baa71cfd KW |
819 | } |
820 | push @failures, $x unless $ok; | |
3da38613 | 821 | push @fold_failures, $x unless $fold_ok; |
baa71cfd KW |
822 | } |
823 | ||
824 | $locales_test_number++; | |
95eaa1bf | 825 | $test_names{$locales_test_number} = 'Verify that /[[:lower:]]/ matches all alpha X for which lc(X) == X and uc(X) != X'; |
baa71cfd | 826 | $message = 'Failed for ' . join ", ", @failures if @failures; |
30032ef4 | 827 | report_result($Locale, $locales_test_number, scalar @failures == 0, $message); |
baa71cfd | 828 | $message = ""; |
3da38613 | 829 | $locales_test_number++; |
6d5d702a | 830 | $final_casing_test_number = $locales_test_number; |
95eaa1bf | 831 | $test_names{$locales_test_number} = 'Verify that /[[:upper:]]/i matches all alpha X for which lc(X) == X and uc(X) != X'; |
3dd9aa8b | 832 | $message = 'Failed for ' . join ", ", @fold_failures if @fold_failures; |
30032ef4 | 833 | report_result($Locale, $locales_test_number, scalar @fold_failures == 0, $message); |
baa71cfd | 834 | |
9445c837 KW |
835 | { # Find the alphabetic characters that are not considered alphabetics |
836 | # in the default (C) locale. | |
8ebc5c01 | 837 | |
284102e8 | 838 | no locale; |
71e5cbb3 | 839 | |
30032ef4 | 840 | @Added_alpha = (); |
5e7a1028 | 841 | for (keys %UPPER, keys %lower, keys %BoThCaSe) { |
30032ef4 | 842 | push(@Added_alpha, $_) if (/\W/); |
284102e8 | 843 | } |
8ebc5c01 | 844 | } |
8ebc5c01 | 845 | |
30032ef4 | 846 | @Added_alpha = sort @Added_alpha; |
8ebc5c01 | 847 | |
30032ef4 | 848 | debug "# Added_alpha = ", join("",@Added_alpha), "\n"; |
8ebc5c01 | 849 | |
30032ef4 KW |
850 | my $first_Added_alpha_test_number = $locales_test_number + 1; |
851 | my $final_Added_alpha_test_number = $first_Added_alpha_test_number + 3; | |
852 | if (@Added_alpha == 0) { | |
853 | # If we have no Added_alpha the remaining tests are no-ops. | |
854 | debug "# no Added_alpha, skipping tests $first_Added_alpha_test_number..$final_Added_alpha_test_number for locale '$Locale'\n"; | |
855 | foreach ($locales_test_number+1..$final_Added_alpha_test_number) { | |
a88c3d7c | 856 | push @{$Okay{$_}}, $Locale; |
c4093d7d | 857 | $locales_test_number++; |
a88c3d7c | 858 | } |
6be75cd7 | 859 | } else { |
8ebc5c01 | 860 | |
6be75cd7 | 861 | # Test \w. |
71e5cbb3 | 862 | |
30032ef4 | 863 | my $word = join('', @Added_alpha); |
8ebc5c01 | 864 | |
30032ef4 | 865 | # This test is likely pointless, as everything in @Added_alpha |
95eaa1bf | 866 | # matched \w in the first place. |
c4093d7d | 867 | ++$locales_test_number; |
95eaa1bf | 868 | $test_names{$locales_test_number} = 'Verify that alphas outside the C locale match \w'; |
66cbab2c KW |
869 | my $ok; |
870 | if ($is_utf8_locale) { | |
871 | use locale ':not_characters'; | |
872 | $ok = $word =~ /^(\w+)$/; | |
873 | } | |
874 | else { | |
875 | # Already in 'use locale'; this tests that exiting scopes works | |
876 | $ok = $word =~ /^(\w+)$/; | |
877 | } | |
30032ef4 | 878 | report_result($Locale, $locales_test_number, $ok); |
ef4a39e5 | 879 | |
2a680da6 | 880 | # Cross-check the whole 8-bit character set. |
8ebc5c01 | 881 | |
c4093d7d | 882 | ++$locales_test_number; |
c08acc4c | 883 | $test_names{$locales_test_number} = 'Verify that \w and \W are mutually exclusive, as are \d, \D; \s, \S'; |
6be75cd7 | 884 | for (map { chr } 0..255) { |
66cbab2c KW |
885 | if ($is_utf8_locale) { |
886 | use locale ':not_characters'; | |
887 | $ok = (/\w/ xor /\W/) || | |
888 | (/\d/ xor /\D/) || | |
889 | (/\s/ xor /\S/); | |
890 | } | |
891 | else { | |
892 | $ok = (/\w/ xor /\W/) || | |
2a680da6 | 893 | (/\d/ xor /\D/) || |
66cbab2c KW |
894 | (/\s/ xor /\S/); |
895 | } | |
30032ef4 | 896 | report_result($Locale, $locales_test_number, $ok); |
284102e8 | 897 | } |
8ebc5c01 | 898 | |
6be75cd7 | 899 | # Test for read-only scalars' locale vs non-locale comparisons. |
284102e8 | 900 | |
284102e8 | 901 | { |
6be75cd7 JH |
902 | no locale; |
903 | $a = "qwerty"; | |
66cbab2c KW |
904 | if ($is_utf8_locale) { |
905 | use locale ':not_characters'; | |
906 | $ok = ($a cmp "qwerty") == 0; | |
907 | } | |
908 | else { | |
909 | use locale; | |
910 | $ok = ($a cmp "qwerty") == 0; | |
911 | } | |
30032ef4 | 912 | report_result($Locale, ++$locales_test_number, $ok); |
66cbab2c | 913 | $test_names{$locales_test_number} = 'Verify that cmp works with a read-only scalar; no- vs locale'; |
8ebc5c01 | 914 | } |
8ebc5c01 | 915 | |
6be75cd7 JH |
916 | { |
917 | my ($from, $to, $lesser, $greater, | |
918 | @test, %test, $test, $yes, $no, $sign); | |
919 | ||
c4093d7d | 920 | ++$locales_test_number; |
c08acc4c | 921 | $test_names{$locales_test_number} = 'Verify that "le", "ne", etc work'; |
c4093d7d | 922 | $not_necessarily_a_problem_test_number = $locales_test_number; |
6be75cd7 JH |
923 | for (0..9) { |
924 | # Select a slice. | |
925 | $from = int(($_*@Alnum_)/10); | |
926 | $to = $from + int(@Alnum_/10); | |
927 | $to = $#Alnum_ if ($to > $#Alnum_); | |
928 | $lesser = join('', @Alnum_[$from..$to]); | |
929 | # Select a slice one character on. | |
930 | $from++; $to++; | |
931 | $to = $#Alnum_ if ($to > $#Alnum_); | |
932 | $greater = join('', @Alnum_[$from..$to]); | |
66cbab2c KW |
933 | if ($is_utf8_locale) { |
934 | use locale ':not_characters'; | |
935 | ($yes, $no, $sign) = ($lesser lt $greater | |
936 | ? (" ", "not ", 1) | |
937 | : ("not ", " ", -1)); | |
938 | } | |
939 | else { | |
940 | use locale; | |
71e5cbb3 | 941 | ($yes, $no, $sign) = ($lesser lt $greater |
6be75cd7 JH |
942 | ? (" ", "not ", 1) |
943 | : ("not ", " ", -1)); | |
66cbab2c | 944 | } |
6be75cd7 JH |
945 | # all these tests should FAIL (return 0). |
946 | # Exact lt or gt cannot be tested because | |
947 | # in some locales, say, eacute and E may test equal. | |
71e5cbb3 | 948 | @test = |
6be75cd7 JH |
949 | ( |
950 | $no.' ($lesser le $greater)', # 1 | |
951 | 'not ($lesser ne $greater)', # 2 | |
952 | ' ($lesser eq $greater)', # 3 | |
953 | $yes.' ($lesser ge $greater)', # 4 | |
954 | $yes.' ($lesser ge $greater)', # 5 | |
955 | $yes.' ($greater le $lesser )', # 7 | |
956 | 'not ($greater ne $lesser )', # 8 | |
957 | ' ($greater eq $lesser )', # 9 | |
958 | $no.' ($greater ge $lesser )', # 10 | |
0e053d1e | 959 | 'not (($lesser cmp $greater) == -($sign))' # 11 |
6be75cd7 JH |
960 | ); |
961 | @test{@test} = 0 x @test; | |
962 | $test = 0; | |
284102e8 | 963 | for my $ti (@test) { |
66cbab2c KW |
964 | if ($is_utf8_locale) { |
965 | use locale ':not_characters'; | |
966 | $test{$ti} = eval $ti; | |
967 | } | |
968 | else { | |
969 | # Already in 'use locale'; | |
71e5cbb3 | 970 | $test{$ti} = eval $ti; |
66cbab2c | 971 | } |
6be75cd7 | 972 | $test ||= $test{$ti} |
284102e8 | 973 | } |
30032ef4 | 974 | report_result($Locale, $locales_test_number, $test == 0); |
6be75cd7 | 975 | if ($test) { |
6be75cd7 JH |
976 | debug "# lesser = '$lesser'\n"; |
977 | debug "# greater = '$greater'\n"; | |
978 | debug "# lesser cmp greater = ", | |
979 | $lesser cmp $greater, "\n"; | |
980 | debug "# greater cmp lesser = ", | |
981 | $greater cmp $lesser, "\n"; | |
982 | debug "# (greater) from = $from, to = $to\n"; | |
983 | for my $ti (@test) { | |
984 | debugf("# %-40s %-4s", $ti, | |
985 | $test{$ti} ? 'FAIL' : 'ok'); | |
986 | if ($ti =~ /\(\.*(\$.+ +cmp +\$[^\)]+)\.*\)/) { | |
987 | debugf("(%s == %4d)", $1, eval $1); | |
988 | } | |
989 | debug "\n#"; | |
990 | } | |
284102e8 | 991 | |
6be75cd7 JH |
992 | last; |
993 | } | |
284102e8 | 994 | } |
8ebc5c01 | 995 | } |
996 | } | |
6be75cd7 | 997 | |
30032ef4 KW |
998 | if ($locales_test_number != $final_Added_alpha_test_number) { |
999 | die("The delta for \$final_Added_alpha needs to be updated from " | |
1000 | . ($final_Added_alpha_test_number - $first_Added_alpha_test_number) | |
c4093d7d | 1001 | . " to " |
30032ef4 | 1002 | . ($locales_test_number - $first_Added_alpha_test_number) |
c4093d7d KW |
1003 | ); |
1004 | } | |
1005 | ||
66cbab2c KW |
1006 | my $ok1; |
1007 | my $ok2; | |
1008 | my $ok3; | |
1009 | my $ok4; | |
1010 | my $ok5; | |
1011 | my $ok6; | |
1012 | my $ok7; | |
1013 | my $ok8; | |
1014 | my $ok9; | |
1015 | my $ok10; | |
1016 | my $ok11; | |
1017 | my $ok12; | |
1018 | my $ok13; | |
1500bd91 | 1019 | my $ok14; |
28acfe03 KW |
1020 | my $ok15; |
1021 | my $ok16; | |
66cbab2c KW |
1022 | |
1023 | my $c; | |
1024 | my $d; | |
1025 | my $e; | |
1026 | my $f; | |
1027 | my $g; | |
1028 | ||
1029 | if (! $is_utf8_locale) { | |
71e5cbb3 | 1030 | use locale; |
6be75cd7 | 1031 | |
71e5cbb3 | 1032 | my ($x, $y) = (1.23, 1.23); |
6be75cd7 | 1033 | |
71e5cbb3 KW |
1034 | $a = "$x"; |
1035 | printf ''; # printf used to reset locale to "C" | |
1036 | $b = "$y"; | |
1037 | $ok1 = $a eq $b; | |
6be75cd7 | 1038 | |
71e5cbb3 KW |
1039 | $c = "$x"; |
1040 | my $z = sprintf ''; # sprintf used to reset locale to "C" | |
1041 | $d = "$y"; | |
1042 | $ok2 = $c eq $d; | |
1043 | { | |
66cbab2c | 1044 | |
71e5cbb3 KW |
1045 | use warnings; |
1046 | my $w = 0; | |
1047 | local $SIG{__WARN__} = | |
1048 | sub { | |
1049 | print "# @_\n"; | |
1050 | $w++; | |
1051 | }; | |
6be75cd7 | 1052 | |
71e5cbb3 KW |
1053 | # The == (among other ops) used to warn for locales |
1054 | # that had something else than "." as the radix character. | |
6be75cd7 | 1055 | |
71e5cbb3 KW |
1056 | $ok3 = $c == 1.23; |
1057 | $ok4 = $c == $x; | |
1058 | $ok5 = $c == $d; | |
1059 | { | |
1060 | no locale; | |
66cbab2c | 1061 | |
b79536ea | 1062 | $e = "$x"; |
71e5cbb3 KW |
1063 | |
1064 | $ok6 = $e == 1.23; | |
1065 | $ok7 = $e == $x; | |
1066 | $ok8 = $e == $c; | |
1067 | } | |
66cbab2c | 1068 | |
71e5cbb3 KW |
1069 | $f = "1.23"; |
1070 | $g = 2.34; | |
66cbab2c | 1071 | |
71e5cbb3 KW |
1072 | $ok9 = $f == 1.23; |
1073 | $ok10 = $f == $x; | |
1074 | $ok11 = $f == $c; | |
1075 | $ok12 = abs(($f + $g) - 3.57) < 0.01; | |
1076 | $ok13 = $w == 0; | |
28acfe03 | 1077 | $ok14 = $ok15 = $ok16 = 1; # Skip for non-utf8 locales |
71e5cbb3 | 1078 | } |
66cbab2c KW |
1079 | } |
1080 | else { | |
1081 | use locale ':not_characters'; | |
1082 | ||
1083 | my ($x, $y) = (1.23, 1.23); | |
1084 | $a = "$x"; | |
1085 | printf ''; # printf used to reset locale to "C" | |
1086 | $b = "$y"; | |
1087 | $ok1 = $a eq $b; | |
1088 | ||
1089 | $c = "$x"; | |
1090 | my $z = sprintf ''; # sprintf used to reset locale to "C" | |
1091 | $d = "$y"; | |
1092 | $ok2 = $c eq $d; | |
1093 | { | |
1094 | use warnings; | |
1095 | my $w = 0; | |
1096 | local $SIG{__WARN__} = | |
1097 | sub { | |
1098 | print "# @_\n"; | |
1099 | $w++; | |
1100 | }; | |
1101 | $ok3 = $c == 1.23; | |
1102 | $ok4 = $c == $x; | |
1103 | $ok5 = $c == $d; | |
1104 | { | |
1105 | no locale; | |
b79536ea | 1106 | $e = "$x"; |
66cbab2c KW |
1107 | |
1108 | $ok6 = $e == 1.23; | |
1109 | $ok7 = $e == $x; | |
1110 | $ok8 = $e == $c; | |
1111 | } | |
1112 | ||
1113 | $f = "1.23"; | |
1114 | $g = 2.34; | |
1115 | ||
1116 | $ok9 = $f == 1.23; | |
1117 | $ok10 = $f == $x; | |
1118 | $ok11 = $f == $c; | |
1119 | $ok12 = abs(($f + $g) - 3.57) < 0.01; | |
1120 | $ok13 = $w == 0; | |
1500bd91 KW |
1121 | |
1122 | # Look for non-ASCII error messages, and verify that the first | |
1123 | # such is in UTF-8 (the others almost certainly will be like the | |
1124 | # first). | |
1125 | $ok14 = 1; | |
1126 | foreach my $err (keys %!) { | |
1127 | use Errno; | |
1128 | $! = eval "&Errno::$err"; # Convert to strerror() output | |
1129 | my $strerror = "$!"; | |
1130 | if ("$strerror" =~ /\P{ASCII}/) { | |
1131 | my $utf8_strerror = $strerror; | |
1132 | utf8::upgrade($utf8_strerror); | |
1133 | ||
1134 | # If $! was already in UTF-8, the upgrade was a no-op; | |
1135 | # otherwise they will be different byte strings. | |
1136 | use bytes; | |
1137 | $ok14 = $utf8_strerror eq $strerror; | |
1138 | last; | |
1139 | } | |
1140 | } | |
28acfe03 KW |
1141 | |
1142 | # Similarly, we verify that a non-ASCII radix is in UTF-8. This | |
1143 | # also catches if there is a disparity between sprintf and | |
1144 | # stringification. | |
1145 | ||
1146 | my $string_g = "$g"; | |
1147 | ||
1148 | my $utf8_string_g = "$g"; | |
1149 | utf8::upgrade($utf8_string_g); | |
1150 | ||
1151 | my $utf8_sprintf_g = sprintf("%g", $g); | |
1152 | utf8::upgrade($utf8_sprintf_g); | |
1153 | use bytes; | |
1154 | $ok15 = $utf8_string_g eq $string_g; | |
1155 | $ok16 = $utf8_sprintf_g eq $string_g; | |
66cbab2c KW |
1156 | } |
1157 | } | |
1158 | ||
30032ef4 | 1159 | report_result($Locale, ++$locales_test_number, $ok1); |
66cbab2c KW |
1160 | $test_names{$locales_test_number} = 'Verify that an intervening printf doesn\'t change assignment results'; |
1161 | my $first_a_test = $locales_test_number; | |
1162 | ||
1163 | debug "# $first_a_test..$locales_test_number: \$a = $a, \$b = $b, Locale = $Locale\n"; | |
1164 | ||
30032ef4 | 1165 | report_result($Locale, ++$locales_test_number, $ok2); |
66cbab2c KW |
1166 | $test_names{$locales_test_number} = 'Verify that an intervening sprintf doesn\'t change assignment results'; |
1167 | ||
1168 | my $first_c_test = $locales_test_number; | |
1169 | ||
30032ef4 | 1170 | report_result($Locale, ++$locales_test_number, $ok3); |
71e5cbb3 | 1171 | $test_names{$locales_test_number} = 'Verify that a different locale radix works when doing "==" with a constant'; |
6be75cd7 | 1172 | |
30032ef4 | 1173 | report_result($Locale, ++$locales_test_number, $ok4); |
71e5cbb3 | 1174 | $test_names{$locales_test_number} = 'Verify that a different locale radix works when doing "==" with a scalar'; |
66cbab2c | 1175 | |
30032ef4 | 1176 | report_result($Locale, ++$locales_test_number, $ok5); |
71e5cbb3 | 1177 | $test_names{$locales_test_number} = 'Verify that a different locale radix works when doing "==" with a scalar and an intervening sprintf'; |
66cbab2c | 1178 | |
71e5cbb3 | 1179 | debug "# $first_c_test..$locales_test_number: \$c = $c, \$d = $d, Locale = $Locale\n"; |
66cbab2c | 1180 | |
30032ef4 | 1181 | report_result($Locale, ++$locales_test_number, $ok6); |
b79536ea | 1182 | $test_names{$locales_test_number} = 'Verify that can assign stringified under inner no-locale block'; |
71e5cbb3 | 1183 | my $first_e_test = $locales_test_number; |
6be75cd7 | 1184 | |
30032ef4 | 1185 | report_result($Locale, ++$locales_test_number, $ok7); |
71e5cbb3 | 1186 | $test_names{$locales_test_number} = 'Verify that "==" with a scalar still works in inner no locale'; |
66cbab2c | 1187 | |
30032ef4 | 1188 | report_result($Locale, ++$locales_test_number, $ok8); |
71e5cbb3 | 1189 | $test_names{$locales_test_number} = 'Verify that "==" with a scalar and an intervening sprintf still works in inner no locale'; |
c4093d7d | 1190 | |
71e5cbb3 | 1191 | debug "# $first_e_test..$locales_test_number: \$e = $e, no locale\n"; |
2a680da6 | 1192 | |
30032ef4 | 1193 | report_result($Locale, ++$locales_test_number, $ok9); |
71e5cbb3 KW |
1194 | $test_names{$locales_test_number} = 'Verify that after a no-locale block, a different locale radix still works when doing "==" with a constant'; |
1195 | my $first_f_test = $locales_test_number; | |
6be75cd7 | 1196 | |
30032ef4 | 1197 | report_result($Locale, ++$locales_test_number, $ok10); |
71e5cbb3 | 1198 | $test_names{$locales_test_number} = 'Verify that after a no-locale block, a different locale radix still works when doing "==" with a scalar'; |
66cbab2c | 1199 | |
30032ef4 | 1200 | report_result($Locale, ++$locales_test_number, $ok11); |
71e5cbb3 | 1201 | $test_names{$locales_test_number} = 'Verify that after a no-locale block, a different locale radix still works when doing "==" with a scalar and an intervening sprintf'; |
906f284f | 1202 | |
30032ef4 | 1203 | report_result($Locale, ++$locales_test_number, $ok12); |
71e5cbb3 | 1204 | $test_names{$locales_test_number} = 'Verify that after a no-locale block, a different locale radix can participate in an addition and function call as numeric'; |
c4093d7d | 1205 | |
30032ef4 | 1206 | report_result($Locale, ++$locales_test_number, $ok13); |
71e5cbb3 | 1207 | $test_names{$locales_test_number} = 'Verify that don\'t get warning under "==" even if radix is not a dot'; |
c4093d7d | 1208 | |
30032ef4 | 1209 | report_result($Locale, ++$locales_test_number, $ok14); |
1500bd91 KW |
1210 | $test_names{$locales_test_number} = 'Verify that non-ASCII UTF-8 error messages are in UTF-8'; |
1211 | ||
30032ef4 | 1212 | report_result($Locale, ++$locales_test_number, $ok15); |
28acfe03 KW |
1213 | $test_names{$locales_test_number} = 'Verify that a number with a UTF-8 radix has a UTF-8 stringification'; |
1214 | ||
30032ef4 | 1215 | report_result($Locale, ++$locales_test_number, $ok16); |
28acfe03 KW |
1216 | $test_names{$locales_test_number} = 'Verify that a sprintf of a number with a UTF-8 radix yields UTF-8'; |
1217 | ||
71e5cbb3 | 1218 | debug "# $first_f_test..$locales_test_number: \$f = $f, \$g = $g, back to locale = $Locale\n"; |
906f284f | 1219 | |
26d80d95 LC |
1220 | # Does taking lc separately differ from taking |
1221 | # the lc "in-line"? (This was the bug 19990704.002, change #3568.) | |
1222 | # The bug was in the caching of the 'o'-magic. | |
66cbab2c | 1223 | if (! $is_utf8_locale) { |
2a680da6 | 1224 | use locale; |
6be75cd7 | 1225 | |
2a680da6 JH |
1226 | sub lcA { |
1227 | my $lc0 = lc $_[0]; | |
1228 | my $lc1 = lc $_[1]; | |
1229 | return $lc0 cmp $lc1; | |
1230 | } | |
6be75cd7 | 1231 | |
2a680da6 JH |
1232 | sub lcB { |
1233 | return lc($_[0]) cmp lc($_[1]); | |
1234 | } | |
6be75cd7 | 1235 | |
2a680da6 JH |
1236 | my $x = "ab"; |
1237 | my $y = "aa"; | |
1238 | my $z = "AB"; | |
6be75cd7 | 1239 | |
30032ef4 | 1240 | report_result($Locale, ++$locales_test_number, |
2a680da6 JH |
1241 | lcA($x, $y) == 1 && lcB($x, $y) == 1 || |
1242 | lcA($x, $z) == 0 && lcB($x, $z) == 0); | |
6be75cd7 | 1243 | } |
66cbab2c KW |
1244 | else { |
1245 | use locale ':not_characters'; | |
1246 | ||
1247 | sub lcC { | |
1248 | my $lc0 = lc $_[0]; | |
1249 | my $lc1 = lc $_[1]; | |
1250 | return $lc0 cmp $lc1; | |
1251 | } | |
1252 | ||
1253 | sub lcD { | |
1254 | return lc($_[0]) cmp lc($_[1]); | |
1255 | } | |
1256 | ||
1257 | my $x = "ab"; | |
1258 | my $y = "aa"; | |
1259 | my $z = "AB"; | |
1260 | ||
30032ef4 | 1261 | report_result($Locale, ++$locales_test_number, |
66cbab2c KW |
1262 | lcC($x, $y) == 1 && lcD($x, $y) == 1 || |
1263 | lcC($x, $z) == 0 && lcD($x, $z) == 0); | |
1264 | } | |
1265 | $test_names{$locales_test_number} = 'Verify "lc(foo) cmp lc(bar)" is the same as using intermediaries for the cmp'; | |
d8093b23 | 1266 | |
26d80d95 LC |
1267 | # Does lc of an UPPER (if different from the UPPER) match |
1268 | # case-insensitively the UPPER, and does the UPPER match | |
1269 | # case-insensitively the lc of the UPPER. And vice versa. | |
3ba0e062 | 1270 | { |
ef4a39e5 JH |
1271 | use locale; |
1272 | no utf8; | |
1273 | my $re = qr/[\[\(\{\*\+\?\|\^\$\\]/; | |
1274 | ||
1275 | my @f = (); | |
c4093d7d | 1276 | ++$locales_test_number; |
c08acc4c | 1277 | $test_names{$locales_test_number} = 'Verify case insensitive matching works'; |
f78d9f29 | 1278 | foreach my $x (sort keys %UPPER) { |
66cbab2c | 1279 | if (! $is_utf8_locale) { |
71e5cbb3 KW |
1280 | my $y = lc $x; |
1281 | next unless uc $y eq $x; | |
1282 | print "# UPPER $x lc $y ", | |
faf0c248 KW |
1283 | $x =~ /$y/i ? 1 : 0, " ", |
1284 | $y =~ /$x/i ? 1 : 0, "\n" if 0; | |
71e5cbb3 KW |
1285 | # |
1286 | # If $x and $y contain regular expression characters | |
1287 | # AND THEY lowercase (/i) to regular expression characters, | |
1288 | # regcomp() will be mightily confused. No, the \Q doesn't | |
1289 | # help here (maybe regex engine internal lowercasing | |
1290 | # is done after the \Q?) An example of this happening is | |
1291 | # the bg_BG (Bulgarian) locale under EBCDIC (OS/390 USS): | |
1292 | # the chr(173) (the "[") is the lowercase of the chr(235). | |
1293 | # | |
1294 | # Similarly losing EBCDIC locales include cs_cz, cs_CZ, | |
1295 | # el_gr, el_GR, en_us.IBM-037 (!), en_US.IBM-037 (!), | |
1296 | # et_ee, et_EE, hr_hr, hr_HR, hu_hu, hu_HU, lt_LT, | |
1297 | # mk_mk, mk_MK, nl_nl.IBM-037, nl_NL.IBM-037, | |
1298 | # pl_pl, pl_PL, ro_ro, ro_RO, ru_ru, ru_RU, | |
1299 | # sk_sk, sk_SK, sl_si, sl_SI, tr_tr, tr_TR. | |
1300 | # | |
1301 | # Similar things can happen even under (bastardised) | |
1302 | # non-EBCDIC locales: in many European countries before the | |
1303 | # advent of ISO 8859-x nationally customised versions of | |
1304 | # ISO 646 were devised, reusing certain punctuation | |
1305 | # characters for modified characters needed by the | |
1306 | # country/language. For example, the "|" might have | |
1307 | # stood for U+00F6 or LATIN SMALL LETTER O WITH DIAERESIS. | |
1308 | # | |
1309 | if ($x =~ $re || $y =~ $re) { | |
1310 | print "# Regex characters in '$x' or '$y', skipping test $locales_test_number for locale '$Locale'\n"; | |
1311 | next; | |
1312 | } | |
1313 | # With utf8 both will fail since the locale concept | |
1314 | # of upper/lower does not work well in Unicode. | |
1315 | push @f, $x unless $x =~ /$y/i == $y =~ /$x/i; | |
26c1569f KW |
1316 | |
1317 | # fc is not a locale concept, so Perl uses lc for it. | |
1318 | push @f, $x unless lc $x eq fc $x; | |
66cbab2c KW |
1319 | } |
1320 | else { | |
1321 | use locale ':not_characters'; | |
1322 | my $y = lc $x; | |
1323 | next unless uc $y eq $x; | |
1324 | print "# UPPER $x lc $y ", | |
faf0c248 KW |
1325 | $x =~ /$y/i ? 1 : 0, " ", |
1326 | $y =~ /$x/i ? 1 : 0, "\n" if 0; | |
66cbab2c KW |
1327 | |
1328 | # Here, we can fully test things, unlike plain 'use locale', | |
1329 | # because this form does work well with Unicode | |
1330 | push @f, $x unless $x =~ /$y/i && $y =~ /$x/i; | |
26c1569f KW |
1331 | |
1332 | # The places where Unicode's lc is different from fc are | |
1333 | # skipped here by virtue of the 'next unless uc...' line above | |
1334 | push @f, $x unless lc $x eq fc $x; | |
66cbab2c | 1335 | } |
c00ff1c7 | 1336 | } |
ef4a39e5 | 1337 | |
f78d9f29 | 1338 | foreach my $x (sort keys %lower) { |
66cbab2c | 1339 | if (! $is_utf8_locale) { |
71e5cbb3 KW |
1340 | my $y = uc $x; |
1341 | next unless lc $y eq $x; | |
1342 | print "# lower $x uc $y ", | |
faf0c248 KW |
1343 | $x =~ /$y/i ? 1 : 0, " ", |
1344 | $y =~ /$x/i ? 1 : 0, "\n" if 0; | |
71e5cbb3 KW |
1345 | if ($x =~ $re || $y =~ $re) { # See above. |
1346 | print "# Regex characters in '$x' or '$y', skipping test $locales_test_number for locale '$Locale'\n"; | |
1347 | next; | |
1348 | } | |
1349 | # With utf8 both will fail since the locale concept | |
1350 | # of upper/lower does not work well in Unicode. | |
1351 | push @f, $x unless $x =~ /$y/i == $y =~ /$x/i; | |
26c1569f KW |
1352 | |
1353 | push @f, $x unless lc $x eq fc $x; | |
66cbab2c KW |
1354 | } |
1355 | else { | |
1356 | use locale ':not_characters'; | |
1357 | my $y = uc $x; | |
1358 | next unless lc $y eq $x; | |
1359 | print "# lower $x uc $y ", | |
faf0c248 KW |
1360 | $x =~ /$y/i ? 1 : 0, " ", |
1361 | $y =~ /$x/i ? 1 : 0, "\n" if 0; | |
66cbab2c | 1362 | push @f, $x unless $x =~ /$y/i && $y =~ /$x/i; |
26c1569f KW |
1363 | |
1364 | push @f, $x unless lc $x eq fc $x; | |
66cbab2c | 1365 | } |
c00ff1c7 | 1366 | } |
30032ef4 | 1367 | report_result($Locale, $locales_test_number, @f == 0); |
c00ff1c7 | 1368 | if (@f) { |
c4093d7d | 1369 | print "# failed $locales_test_number locale '$Locale' characters @f\n" |
c00ff1c7 | 1370 | } |
d8093b23 | 1371 | } |
78787052 JL |
1372 | |
1373 | # [perl #109318] | |
1374 | { | |
1375 | my @f = (); | |
1376 | ++$locales_test_number; | |
1377 | $test_names{$locales_test_number} = 'Verify atof with locale radix and negative exponent'; | |
1378 | ||
1379 | my $radix = POSIX::localeconv()->{decimal_point}; | |
1380 | my @nums = ( | |
1381 | "3.14e+9", "3${radix}14e+9", "3.14e-9", "3${radix}14e-9", | |
1382 | "-3.14e+9", "-3${radix}14e+9", "-3.14e-9", "-3${radix}14e-9", | |
1383 | ); | |
1384 | ||
1385 | if (! $is_utf8_locale) { | |
1386 | use locale; | |
1387 | for my $num (@nums) { | |
1388 | push @f, $num | |
1389 | unless sprintf("%g", $num) =~ /3.+14/; | |
1390 | } | |
1391 | } | |
1392 | else { | |
1393 | use locale ':not_characters'; | |
1394 | for my $num (@nums) { | |
1395 | push @f, $num | |
1396 | unless sprintf("%g", $num) =~ /3.+14/; | |
1397 | } | |
1398 | } | |
1399 | ||
30032ef4 | 1400 | report_result($Locale, $locales_test_number, @f == 0); |
78787052 JL |
1401 | if (@f) { |
1402 | print "# failed $locales_test_number locale '$Locale' numbers @f\n" | |
1403 | } | |
1404 | } | |
8ebc5c01 | 1405 | } |
284102e8 | 1406 | |
c4093d7d | 1407 | my $final_locales_test_number = $locales_test_number; |
6cf0b567 | 1408 | |
2a680da6 JH |
1409 | # Recount the errors. |
1410 | ||
c4093d7d KW |
1411 | foreach ($first_locales_test_number..$final_locales_test_number) { |
1412 | if (%setlocale_failed) { | |
1413 | print "not "; | |
1414 | } | |
1415 | elsif ($Problem{$_} || !defined $Okay{$_} || !@{$Okay{$_}}) { | |
1416 | if (defined $not_necessarily_a_problem_test_number | |
1417 | && $_ == $not_necessarily_a_problem_test_number) | |
1418 | { | |
1419 | print "# The failure of test $not_necessarily_a_problem_test_number is not necessarily fatal.\n"; | |
b4e009be | 1420 | print "# It usually indicates a problem in the environment,\n"; |
284102e8 JH |
1421 | print "# not in Perl itself.\n"; |
1422 | } | |
6d5d702a KW |
1423 | if ($Okay{$_} && ($_ >= $first_casing_test_number |
1424 | && $_ <= $final_casing_test_number)) | |
1425 | { | |
1426 | my $percent_fail = int(.5 + (100 * scalar(keys $Problem{$_}) | |
1427 | / scalar(@{$Okay{$_}}))); | |
1428 | if ($percent_fail < $acceptable_fold_failure_percentage) { | |
1429 | $test_names{$_} .= 'TODO'; | |
1430 | print "# ", 100 - $percent_fail, "% of locales pass the following test, so it is likely that the failures\n"; | |
1431 | print "# are errors in the locale definitions. The test is marked TODO, as the\n"; | |
1432 | print "# problem is not likely to be Perl's\n"; | |
1433 | } | |
1434 | } | |
108a305e KW |
1435 | unless ($debug) { |
1436 | print "#\nFor more details, rerun, with environment variable PERL_DEBUG_FULL_TEST=1\n"; | |
1437 | } | |
284102e8 | 1438 | print "not "; |
8ebc5c01 | 1439 | } |
c4093d7d | 1440 | print "ok $_"; |
6c2e653d KW |
1441 | if (defined $test_names{$_}) { |
1442 | # If TODO is in the test name, make it thus | |
1443 | my $todo = $test_names{$_} =~ s/TODO\s*//; | |
1444 | print " $test_names{$_}"; | |
1445 | print " # TODO" if $todo; | |
1446 | } | |
c4093d7d | 1447 | print "\n"; |
8ebc5c01 | 1448 | } |
fb73857a | 1449 | |
2a680da6 JH |
1450 | # Give final advice. |
1451 | ||
284102e8 JH |
1452 | my $didwarn = 0; |
1453 | ||
c4093d7d | 1454 | foreach ($first_locales_test_number..$final_locales_test_number) { |
284102e8 JH |
1455 | if ($Problem{$_}) { |
1456 | my @f = sort keys %{ $Problem{$_} }; | |
1457 | my $f = join(" ", @f); | |
1458 | $f =~ s/(.{50,60}) /$1\n#\t/g; | |
2a680da6 JH |
1459 | |
1460 | "#\n", | |
1461 | "# The locale ", (@f == 1 ? "definition" : "definitions"), "\n#\n", | |
284102e8 JH |
1462 | "#\t", $f, "\n#\n", |
1463 | "# on your system may have errors because the locale test $_\n", | |
baae13cb | 1464 | "# \"$test_names{$_}\"\n", |
284102e8 JH |
1465 | "# failed in ", (@f == 1 ? "that locale" : "those locales"), |
1466 | ".\n"; | |
2a680da6 | 1467 | print <<EOW; |
284102e8 JH |
1468 | # |
1469 | # If your users are not using these locales you are safe for the moment, | |
1470 | # but please report this failure first to perlbug\@perl.com using the | |
1471 | # perlbug script (as described in the INSTALL file) so that the exact | |
1472 | # details of the failures can be sorted out first and then your operating | |
1473 | # system supplier can be alerted about these anomalies. | |
1474 | # | |
1475 | EOW | |
1476 | $didwarn = 1; | |
fb73857a | 1477 | } |
1478 | } | |
774d564b | 1479 | |
26d80d95 | 1480 | # Tell which locales were okay and which were not. |
2a680da6 | 1481 | |
284102e8 | 1482 | if ($didwarn) { |
26d80d95 | 1483 | my (@s, @F); |
71e5cbb3 | 1484 | |
284102e8 JH |
1485 | foreach my $l (@Locale) { |
1486 | my $p = 0; | |
c4093d7d KW |
1487 | if ($setlocale_failed{$l}) { |
1488 | $p++; | |
1489 | } | |
1490 | else { | |
1f5852c9 KW |
1491 | foreach my $t |
1492 | ($first_locales_test_number..$final_locales_test_number) | |
1493 | { | |
1494 | $p++ if $Problem{$t}{$l}; | |
1495 | } | |
c4093d7d | 1496 | } |
284102e8 | 1497 | push @s, $l if $p == 0; |
9445c837 | 1498 | push @F, $l unless $p == 0; |
8ebc5c01 | 1499 | } |
71e5cbb3 | 1500 | |
68d47915 CK |
1501 | if (@s) { |
1502 | my $s = join(" ", @s); | |
1503 | $s =~ s/(.{50,60}) /$1\n#\t/g; | |
1504 | ||
1505 | warn | |
1506 | "# The following locales\n#\n", | |
1507 | "#\t", $s, "\n#\n", | |
1508 | "# tested okay.\n#\n", | |
1509 | } else { | |
26d80d95 LC |
1510 | warn "# None of your locales were fully okay.\n"; |
1511 | } | |
1512 | ||
1513 | if (@F) { | |
1514 | my $F = join(" ", @F); | |
1515 | $F =~ s/(.{50,60}) /$1\n#\t/g; | |
1516 | ||
1517 | warn | |
1518 | "# The following locales\n#\n", | |
0e053d1e | 1519 | "#\t", $F, "\n#\n", |
26d80d95 LC |
1520 | "# had problems.\n#\n", |
1521 | } else { | |
1522 | warn "# None of your locales were broken.\n"; | |
68d47915 | 1523 | } |
8ebc5c01 | 1524 | } |
90248788 | 1525 | |
c4093d7d | 1526 | $test_num = $final_locales_test_number; |
c213d471 | 1527 | |
fbd840df KW |
1528 | { # perl #115808 |
1529 | use warnings; | |
1530 | my $warned = 0; | |
1531 | local $SIG{__WARN__} = sub { | |
1532 | $warned = $_[0] =~ /uninitialized/; | |
1533 | }; | |
1534 | my $z = "y" . setlocale(&POSIX::LC_ALL, "xyzzy"); | |
1535 | ok($warned, "variable set to setlocale(BAD LOCALE) is considered uninitialized"); | |
1536 | } | |
1537 | ||
094a2f8c | 1538 | # Test that tainting and case changing works on utf8 strings. These tests are |
1f5852c9 KW |
1539 | # placed last to avoid disturbing the hard-coded test numbers that existed at |
1540 | # the time these were added above this in this file. | |
0099bb8d KW |
1541 | # This also tests that locale overrides unicode_strings in the same scope for |
1542 | # non-utf8 strings. | |
094a2f8c KW |
1543 | setlocale(LC_ALL, "C"); |
1544 | { | |
1545 | use locale; | |
0099bb8d | 1546 | use feature 'unicode_strings'; |
094a2f8c | 1547 | |
26c1569f | 1548 | foreach my $function ("uc", "ucfirst", "lc", "lcfirst", "fc") { |
094a2f8c KW |
1549 | my @list; # List of code points to test for $function |
1550 | ||
1551 | # Used to calculate the changed case for ASCII characters by using the | |
1552 | # ord, instead of using one of the functions under test. | |
1553 | my $ascii_case_change_delta; | |
1554 | my $above_latin1_case_change_delta; # Same for the specific ords > 255 | |
1555 | # that we use | |
1556 | ||
1557 | # We test an ASCII character, which should change case and be tainted; | |
1558 | # a Latin1 character, which shouldn't change case under this C locale, | |
1559 | # and is tainted. | |
1560 | # an above-Latin1 character that when the case is changed would cross | |
1561 | # the 255/256 boundary, so doesn't change case and isn't tainted | |
1562 | # (the \x{149} is one of these, but changes into 2 characters, the | |
1563 | # first one of which doesn't cross the boundary. | |
1564 | # the final one in each list is an above-Latin1 character whose case | |
1565 | # does change, and shouldn't be tainted. The code below uses its | |
1566 | # position in its list as a marker to indicate that it, unlike the | |
1567 | # other code points above ASCII, has a successful case change | |
1568 | if ($function =~ /^u/) { | |
094a2f8c KW |
1569 | @list = ("", "a", "\xe0", "\xff", "\x{fb00}", "\x{149}", "\x{101}"); |
1570 | $ascii_case_change_delta = -32; | |
1571 | $above_latin1_case_change_delta = -1; | |
1572 | } | |
1573 | else { | |
1ca267a5 | 1574 | @list = ("", "A", "\xC0", "\x{17F}", "\x{100}"); |
094a2f8c KW |
1575 | $ascii_case_change_delta = +32; |
1576 | $above_latin1_case_change_delta = +1; | |
1577 | } | |
66cbab2c | 1578 | foreach my $is_utf8_locale (0 .. 1) { |
71e5cbb3 KW |
1579 | foreach my $j (0 .. $#list) { |
1580 | my $char = $list[$j]; | |
0099bb8d KW |
1581 | |
1582 | for my $encoded_in_utf8 (0 .. 1) { | |
faf0c248 KW |
1583 | my $should_be; |
1584 | my $changed; | |
1585 | if (! $is_utf8_locale) { | |
1586 | $should_be = ($j == $#list) | |
1587 | ? chr(ord($char) + $above_latin1_case_change_delta) | |
1588 | : (length $char == 0 || ord($char) > 127) | |
1589 | ? $char | |
1590 | : chr(ord($char) + $ascii_case_change_delta); | |
1591 | ||
1592 | # This monstrosity is in order to avoid using an eval, | |
1593 | # which might perturb the results | |
1594 | $changed = ($function eq "uc") | |
1595 | ? uc($char) | |
1596 | : ($function eq "ucfirst") | |
1597 | ? ucfirst($char) | |
1598 | : ($function eq "lc") | |
1599 | ? lc($char) | |
1600 | : ($function eq "lcfirst") | |
1601 | ? lcfirst($char) | |
26c1569f KW |
1602 | : ($function eq "fc") |
1603 | ? fc($char) | |
faf0c248 KW |
1604 | : die("Unexpected function \"$function\""); |
1605 | } | |
1606 | else { | |
1607 | { | |
1608 | no locale; | |
71e5cbb3 | 1609 | |
faf0c248 KW |
1610 | # For utf8-locales the case changing functions |
1611 | # should work just like they do outside of locale. | |
1612 | # Can use eval here because not testing it when | |
1613 | # not in locale. | |
1614 | $should_be = eval "$function('$char')"; | |
1615 | die "Unexpected eval error $@ from 'eval \"$function('$char')\"'" if $@; | |
71e5cbb3 | 1616 | |
faf0c248 KW |
1617 | } |
1618 | use locale ':not_characters'; | |
1619 | $changed = ($function eq "uc") | |
1620 | ? uc($char) | |
1621 | : ($function eq "ucfirst") | |
1622 | ? ucfirst($char) | |
1623 | : ($function eq "lc") | |
1624 | ? lc($char) | |
1625 | : ($function eq "lcfirst") | |
1626 | ? lcfirst($char) | |
26c1569f KW |
1627 | : ($function eq "fc") |
1628 | ? fc($char) | |
faf0c248 | 1629 | : die("Unexpected function \"$function\""); |
71e5cbb3 | 1630 | } |
faf0c248 KW |
1631 | ok($changed eq $should_be, |
1632 | "$function(\"$char\") in C locale " | |
1633 | . (($is_utf8_locale) | |
1634 | ? "(use locale ':not_characters'" | |
1635 | : "(use locale") | |
1636 | . (($encoded_in_utf8) | |
1637 | ? "; encoded in utf8)" | |
1638 | : "; not encoded in utf8)") | |
1639 | . " should be \"$should_be\", got \"$changed\""); | |
1640 | ||
1641 | # Tainting shouldn't happen for utf8 locales, empty | |
1642 | # strings, or those characters above 255. | |
1643 | (! $is_utf8_locale && length($char) > 0 && ord($char) < 256) | |
1644 | ? check_taint($changed) | |
1645 | : check_taint_not($changed); | |
1646 | ||
1647 | # Use UTF-8 next time through the loop | |
1648 | utf8::upgrade($char); | |
0099bb8d | 1649 | } |
66cbab2c | 1650 | } |
094a2f8c KW |
1651 | } |
1652 | } | |
1653 | } | |
1654 | ||
fdf053ee | 1655 | print "1..$test_num\n"; |
906f284f | 1656 | |
90248788 | 1657 | # eof |