Commit | Line | Data |
---|---|---|
8ebc5c01 | 1 | #!./perl -wT |
2 | ||
66cbab2c KW |
3 | # This tests plain 'use locale' and adorned 'use locale ":not_characters"' |
4 | # Because these pragmas are compile time, and I (khw) am trying to test | |
5 | # without using 'eval' as much as possible, which might cloud the issue, the | |
6 | # crucial parts of the code are duplicated in a block for each pragma. | |
7 | ||
6c2e653d KW |
8 | # To make a TODO test, add the string 'TODO' to its %test_names value |
9 | ||
e3a2734b KW |
10 | binmode STDOUT, ':utf8'; |
11 | binmode STDERR, ':utf8'; | |
12 | ||
8ebc5c01 | 13 | BEGIN { |
14 | chdir 't' if -d 't'; | |
20822f61 | 15 | @INC = '../lib'; |
f9cbebe1 | 16 | unshift @INC, '.'; |
b002077a | 17 | require Config; import Config; |
97a0514d | 18 | if (!$Config{d_setlocale} || $Config{ccflags} =~ /\bD?NO_LOCALE\b/) { |
b002077a CS |
19 | print "1..0\n"; |
20 | exit; | |
21 | } | |
2de3dbcc | 22 | $| = 1; |
8ebc5c01 | 23 | } |
24 | ||
25 | use strict; | |
26c1569f | 26 | use feature 'fc'; |
8ebc5c01 | 27 | |
108a305e | 28 | my $debug = $ENV{PERL_DEBUG_FULL_TEST} // 0; |
284102e8 | 29 | |
6d5d702a KW |
30 | # Certain tests have been shown to be problematical for a few locales. Don't |
31 | # fail them unless at least this percentage of the tested locales fail. | |
32 | my $acceptable_fold_failure_percentage = 5; | |
33 | ||
db4b7445 A |
34 | use Dumpvalue; |
35 | ||
36 | my $dumper = Dumpvalue->new( | |
37 | tick => qq{"}, | |
38 | quoteHighBit => 0, | |
39 | unctrl => "quote" | |
40 | ); | |
6be75cd7 | 41 | sub debug { |
db4b7445 A |
42 | return unless $debug; |
43 | my($mess) = join "", @_; | |
44 | chop $mess; | |
45 | print $dumper->stringify($mess,1), "\n"; | |
6be75cd7 JH |
46 | } |
47 | ||
48 | sub debugf { | |
49 | printf @_ if $debug; | |
50 | } | |
51 | ||
8ebc5c01 | 52 | my $have_setlocale = 0; |
53 | eval { | |
54 | require POSIX; | |
55 | import POSIX ':locale_h'; | |
56 | $have_setlocale++; | |
57 | }; | |
58 | ||
6dead956 | 59 | # Visual C's CRT goes silly on strings of the form "en_US.ISO8859-1" |
f6c6487a | 60 | # and mingw32 uses said silly CRT |
3a2d1764 SH |
61 | # This doesn't seem to be an issue any more, at least on Windows XP, |
62 | # so re-enable the tests for Windows XP onwards. | |
63 | my $winxp = ($^O eq 'MSWin32' && defined &Win32::GetOSVersion && | |
64 | join('.', (Win32::GetOSVersion())[1..2]) >= 5.1); | |
65 | $have_setlocale = 0 if ((($^O eq 'MSWin32' && !$winxp) || $^O eq 'NetWare') && | |
66 | $Config{cc} =~ /^(cl|gcc)/i); | |
6dead956 | 67 | |
36a42ae7 | 68 | # UWIN seems to loop after taint tests, just skip for now |
cd19b65c JH |
69 | $have_setlocale = 0 if ($^O =~ /^uwin/); |
70 | ||
0e053d1e | 71 | $a = 'abc %'; |
8ebc5c01 | 72 | |
c213d471 KW |
73 | my $test_num = 0; |
74 | ||
8ebc5c01 | 75 | sub ok { |
c213d471 | 76 | my ($result, $message) = @_; |
e3a2734b | 77 | $message = "" unless defined $message; |
8ebc5c01 | 78 | |
79 | print 'not ' unless ($result); | |
c213d471 | 80 | print "ok " . ++$test_num; |
e3a2734b KW |
81 | print " $message"; |
82 | print "\n"; | |
8ebc5c01 | 83 | } |
84 | ||
85 | # First we'll do a lot of taint checking for locales. | |
86 | # This is the easiest to test, actually, as any locale, | |
87 | # even the default locale will taint under 'use locale'. | |
88 | ||
89 | sub is_tainted { # hello, camel two. | |
9f1b1f2d | 90 | no warnings 'uninitialized' ; |
8ebc5c01 | 91 | my $dummy; |
ba74571d | 92 | local $@; |
8ebc5c01 | 93 | not eval { $dummy = join("", @_), kill 0; 1 } |
94 | } | |
95 | ||
a9b7c637 KW |
96 | sub check_taint ($;$) { |
97 | my $message_tail = $_[1] // ""; | |
98 | $message_tail = ": $message_tail" if $message_tail; | |
99 | ok is_tainted($_[0]), "verify that is tainted$message_tail"; | |
8ebc5c01 | 100 | } |
101 | ||
a9b7c637 KW |
102 | sub check_taint_not ($;$) { |
103 | my $message_tail = $_[1] // ""; | |
104 | $message_tail = ": $message_tail" if $message_tail; | |
105 | ok((not is_tainted($_[0])), "verify that isn't tainted$message_tail"); | |
8ebc5c01 | 106 | } |
107 | ||
bf3cd0e6 KW |
108 | "\tb\t" =~ /^m?(\s)(.*)\1$/; |
109 | check_taint_not $&, "not tainted outside 'use locale'"; | |
110 | ; | |
111 | ||
8ebc5c01 | 112 | use locale; # engage locale and therefore locale taint. |
113 | ||
36a42ae7 | 114 | check_taint_not $a; |
8ebc5c01 | 115 | |
36a42ae7 KW |
116 | check_taint uc($a); |
117 | check_taint "\U$a"; | |
118 | check_taint ucfirst($a); | |
119 | check_taint "\u$a"; | |
120 | check_taint lc($a); | |
26c1569f | 121 | check_taint fc($a); |
36a42ae7 | 122 | check_taint "\L$a"; |
26c1569f | 123 | check_taint "\F$a"; |
36a42ae7 KW |
124 | check_taint lcfirst($a); |
125 | check_taint "\l$a"; | |
8ebc5c01 | 126 | |
36a42ae7 KW |
127 | check_taint_not sprintf('%e', 123.456); |
128 | check_taint_not sprintf('%f', 123.456); | |
129 | check_taint_not sprintf('%g', 123.456); | |
130 | check_taint_not sprintf('%d', 123.456); | |
131 | check_taint_not sprintf('%x', 123.456); | |
8ebc5c01 | 132 | |
133 | $_ = $a; # untaint $_ | |
134 | ||
135 | $_ = uc($a); # taint $_ | |
136 | ||
36a42ae7 | 137 | check_taint $_; |
8ebc5c01 | 138 | |
139 | /(\w)/; # taint $&, $`, $', $+, $1. | |
36a42ae7 KW |
140 | check_taint $&; |
141 | check_taint $`; | |
142 | check_taint $'; | |
143 | check_taint $+; | |
144 | check_taint $1; | |
145 | check_taint_not $2; | |
8ebc5c01 | 146 | |
147 | /(.)/; # untaint $&, $`, $', $+, $1. | |
36a42ae7 KW |
148 | check_taint_not $&; |
149 | check_taint_not $`; | |
150 | check_taint_not $'; | |
151 | check_taint_not $+; | |
152 | check_taint_not $1; | |
153 | check_taint_not $2; | |
8ebc5c01 | 154 | |
155 | /(\W)/; # taint $&, $`, $', $+, $1. | |
36a42ae7 KW |
156 | check_taint $&; |
157 | check_taint $`; | |
158 | check_taint $'; | |
159 | check_taint $+; | |
160 | check_taint $1; | |
161 | check_taint_not $2; | |
8ebc5c01 | 162 | |
163 | /(\s)/; # taint $&, $`, $', $+, $1. | |
36a42ae7 KW |
164 | check_taint $&; |
165 | check_taint $`; | |
166 | check_taint $'; | |
167 | check_taint $+; | |
168 | check_taint $1; | |
169 | check_taint_not $2; | |
8ebc5c01 | 170 | |
171 | /(\S)/; # taint $&, $`, $', $+, $1. | |
36a42ae7 KW |
172 | check_taint $&; |
173 | check_taint $`; | |
174 | check_taint $'; | |
175 | check_taint $+; | |
176 | check_taint $1; | |
177 | check_taint_not $2; | |
8ebc5c01 | 178 | |
179 | $_ = $a; # untaint $_ | |
180 | ||
36a42ae7 | 181 | check_taint_not $_; |
8ebc5c01 | 182 | |
183 | /(b)/; # this must not taint | |
36a42ae7 KW |
184 | check_taint_not $&; |
185 | check_taint_not $`; | |
186 | check_taint_not $'; | |
187 | check_taint_not $+; | |
188 | check_taint_not $1; | |
189 | check_taint_not $2; | |
8ebc5c01 | 190 | |
191 | $_ = $a; # untaint $_ | |
192 | ||
36a42ae7 | 193 | check_taint_not $_; |
8ebc5c01 | 194 | |
195 | $b = uc($a); # taint $b | |
196 | s/(.+)/$b/; # this must taint only the $_ | |
197 | ||
36a42ae7 KW |
198 | check_taint $_; |
199 | check_taint_not $&; | |
200 | check_taint_not $`; | |
201 | check_taint_not $'; | |
202 | check_taint_not $+; | |
203 | check_taint_not $1; | |
204 | check_taint_not $2; | |
8ebc5c01 | 205 | |
206 | $_ = $a; # untaint $_ | |
207 | ||
208 | s/(.+)/b/; # this must not taint | |
36a42ae7 KW |
209 | check_taint_not $_; |
210 | check_taint_not $&; | |
211 | check_taint_not $`; | |
212 | check_taint_not $'; | |
213 | check_taint_not $+; | |
214 | check_taint_not $1; | |
215 | check_taint_not $2; | |
8ebc5c01 | 216 | |
217 | $b = $a; # untaint $b | |
218 | ||
219 | ($b = $a) =~ s/\w/$&/; | |
36a42ae7 KW |
220 | check_taint $b; # $b should be tainted. |
221 | check_taint_not $a; # $a should be not. | |
8ebc5c01 | 222 | |
223 | $_ = $a; # untaint $_ | |
224 | ||
225 | s/(\w)/\l$1/; # this must taint | |
36a42ae7 KW |
226 | check_taint $_; |
227 | check_taint $&; | |
228 | check_taint $`; | |
229 | check_taint $'; | |
230 | check_taint $+; | |
231 | check_taint $1; | |
232 | check_taint_not $2; | |
8ebc5c01 | 233 | |
234 | $_ = $a; # untaint $_ | |
235 | ||
236 | s/(\w)/\L$1/; # this must taint | |
36a42ae7 KW |
237 | check_taint $_; |
238 | check_taint $&; | |
239 | check_taint $`; | |
240 | check_taint $'; | |
241 | check_taint $+; | |
242 | check_taint $1; | |
243 | check_taint_not $2; | |
8ebc5c01 | 244 | |
245 | $_ = $a; # untaint $_ | |
246 | ||
247 | s/(\w)/\u$1/; # this must taint | |
36a42ae7 KW |
248 | check_taint $_; |
249 | check_taint $&; | |
250 | check_taint $`; | |
251 | check_taint $'; | |
252 | check_taint $+; | |
253 | check_taint $1; | |
254 | check_taint_not $2; | |
8ebc5c01 | 255 | |
256 | $_ = $a; # untaint $_ | |
257 | ||
258 | s/(\w)/\U$1/; # this must taint | |
36a42ae7 KW |
259 | check_taint $_; |
260 | check_taint $&; | |
261 | check_taint $`; | |
262 | check_taint $'; | |
263 | check_taint $+; | |
264 | check_taint $1; | |
265 | check_taint_not $2; | |
8ebc5c01 | 266 | |
267 | # After all this tainting $a should be cool. | |
268 | ||
36a42ae7 | 269 | check_taint_not $a; |
8ebc5c01 | 270 | |
66cbab2c KW |
271 | { # This is just the previous tests copied here with a different |
272 | # compile-time pragma. | |
273 | ||
274 | use locale ':not_characters'; # engage restricted locale with different | |
275 | # tainting rules | |
276 | ||
277 | check_taint_not $a; | |
278 | ||
279 | check_taint_not uc($a); | |
280 | check_taint_not "\U$a"; | |
281 | check_taint_not ucfirst($a); | |
282 | check_taint_not "\u$a"; | |
283 | check_taint_not lc($a); | |
26c1569f | 284 | check_taint_not fc($a); |
66cbab2c | 285 | check_taint_not "\L$a"; |
26c1569f | 286 | check_taint_not "\F$a"; |
66cbab2c KW |
287 | check_taint_not lcfirst($a); |
288 | check_taint_not "\l$a"; | |
289 | ||
290 | check_taint_not sprintf('%e', 123.456); | |
291 | check_taint_not sprintf('%f', 123.456); | |
292 | check_taint_not sprintf('%g', 123.456); | |
293 | check_taint_not sprintf('%d', 123.456); | |
294 | check_taint_not sprintf('%x', 123.456); | |
295 | ||
296 | $_ = $a; # untaint $_ | |
297 | ||
298 | $_ = uc($a); # taint $_ | |
299 | ||
300 | check_taint_not $_; | |
301 | ||
302 | /(\w)/; # taint $&, $`, $', $+, $1. | |
303 | check_taint_not $&; | |
304 | check_taint_not $`; | |
305 | check_taint_not $'; | |
306 | check_taint_not $+; | |
307 | check_taint_not $1; | |
308 | check_taint_not $2; | |
309 | ||
310 | /(.)/; # untaint $&, $`, $', $+, $1. | |
311 | check_taint_not $&; | |
312 | check_taint_not $`; | |
313 | check_taint_not $'; | |
314 | check_taint_not $+; | |
315 | check_taint_not $1; | |
316 | check_taint_not $2; | |
317 | ||
318 | /(\W)/; # taint $&, $`, $', $+, $1. | |
319 | check_taint_not $&; | |
320 | check_taint_not $`; | |
321 | check_taint_not $'; | |
322 | check_taint_not $+; | |
323 | check_taint_not $1; | |
324 | check_taint_not $2; | |
325 | ||
326 | /(\s)/; # taint $&, $`, $', $+, $1. | |
327 | check_taint_not $&; | |
328 | check_taint_not $`; | |
329 | check_taint_not $'; | |
330 | check_taint_not $+; | |
331 | check_taint_not $1; | |
332 | check_taint_not $2; | |
333 | ||
334 | /(\S)/; # taint $&, $`, $', $+, $1. | |
335 | check_taint_not $&; | |
336 | check_taint_not $`; | |
337 | check_taint_not $'; | |
338 | check_taint_not $+; | |
339 | check_taint_not $1; | |
340 | check_taint_not $2; | |
341 | ||
342 | $_ = $a; # untaint $_ | |
343 | ||
344 | check_taint_not $_; | |
345 | ||
346 | /(b)/; # this must not taint | |
347 | check_taint_not $&; | |
348 | check_taint_not $`; | |
349 | check_taint_not $'; | |
350 | check_taint_not $+; | |
351 | check_taint_not $1; | |
352 | check_taint_not $2; | |
353 | ||
354 | $_ = $a; # untaint $_ | |
355 | ||
356 | check_taint_not $_; | |
357 | ||
358 | $b = uc($a); # taint $b | |
359 | s/(.+)/$b/; # this must taint only the $_ | |
360 | ||
361 | check_taint_not $_; | |
362 | check_taint_not $&; | |
363 | check_taint_not $`; | |
364 | check_taint_not $'; | |
365 | check_taint_not $+; | |
366 | check_taint_not $1; | |
367 | check_taint_not $2; | |
368 | ||
369 | $_ = $a; # untaint $_ | |
370 | ||
371 | s/(.+)/b/; # this must not taint | |
372 | check_taint_not $_; | |
373 | check_taint_not $&; | |
374 | check_taint_not $`; | |
375 | check_taint_not $'; | |
376 | check_taint_not $+; | |
377 | check_taint_not $1; | |
378 | check_taint_not $2; | |
379 | ||
380 | $b = $a; # untaint $b | |
381 | ||
382 | ($b = $a) =~ s/\w/$&/; | |
383 | check_taint_not $b; # $b should be tainted. | |
384 | check_taint_not $a; # $a should be not. | |
385 | ||
386 | $_ = $a; # untaint $_ | |
387 | ||
388 | s/(\w)/\l$1/; # this must taint | |
389 | check_taint_not $_; | |
390 | check_taint_not $&; | |
391 | check_taint_not $`; | |
392 | check_taint_not $'; | |
393 | check_taint_not $+; | |
394 | check_taint_not $1; | |
395 | check_taint_not $2; | |
396 | ||
397 | $_ = $a; # untaint $_ | |
398 | ||
399 | s/(\w)/\L$1/; # this must taint | |
400 | check_taint_not $_; | |
401 | check_taint_not $&; | |
402 | check_taint_not $`; | |
403 | check_taint_not $'; | |
404 | check_taint_not $+; | |
405 | check_taint_not $1; | |
406 | check_taint_not $2; | |
407 | ||
408 | $_ = $a; # untaint $_ | |
409 | ||
410 | s/(\w)/\u$1/; # this must taint | |
411 | check_taint_not $_; | |
412 | check_taint_not $&; | |
413 | check_taint_not $`; | |
414 | check_taint_not $'; | |
415 | check_taint_not $+; | |
416 | check_taint_not $1; | |
417 | check_taint_not $2; | |
418 | ||
419 | $_ = $a; # untaint $_ | |
420 | ||
421 | s/(\w)/\U$1/; # this must taint | |
422 | check_taint_not $_; | |
423 | check_taint_not $&; | |
424 | check_taint_not $`; | |
425 | check_taint_not $'; | |
426 | check_taint_not $+; | |
427 | check_taint_not $1; | |
428 | check_taint_not $2; | |
429 | ||
430 | # After all this tainting $a should be cool. | |
431 | ||
432 | check_taint_not $a; | |
433 | } | |
434 | ||
435 | # Here are in scope of 'use locale' | |
436 | ||
8ebc5c01 | 437 | # I think we've seen quite enough of taint. |
438 | # Let us do some *real* locale work now, | |
284102e8 | 439 | # unless setlocale() is missing (i.e. minitest). |
8ebc5c01 | 440 | |
fdf053ee KW |
441 | unless ($have_setlocale) { |
442 | print "1..$test_num\n"; | |
443 | exit; | |
444 | } | |
8ebc5c01 | 445 | |
6cf0b567 | 446 | # The test number before our first setlocale() |
66330f13 | 447 | my $final_without_setlocale = $test_num; |
6cf0b567 | 448 | |
284102e8 JH |
449 | # Find locales. |
450 | ||
6be75cd7 JH |
451 | debug "# Scanning for locales...\n"; |
452 | ||
453 | # Note that it's okay that some languages have their native names | |
454 | # capitalized here even though that's not "right". They are lowercased | |
455 | # anyway later during the scanning process (and besides, some clueless | |
98dc9551 | 456 | # vendor might have them capitalized erroneously anyway). |
6be75cd7 | 457 | |
284102e8 | 458 | my $locales = <<EOF; |
6be75cd7 | 459 | Afrikaans:af:za:1 15 |
284102e8 | 460 | Arabic:ar:dz eg sa:6 arabic8 |
6be75cd7 JH |
461 | Brezhoneg Breton:br:fr:1 15 |
462 | Bulgarski Bulgarian:bg:bg:5 | |
dd8482fc | 463 | Chinese:zh:cn tw:cn.EUC eucCN eucTW euc.CN euc.TW Big5 GB2312 tw.EUC |
6be75cd7 JH |
464 | Hrvatski Croatian:hr:hr:2 |
465 | Cymraeg Welsh:cy:cy:1 14 15 | |
284102e8 | 466 | Czech:cs:cz:2 |
df8a53a3 | 467 | Dansk Danish:da:dk:1 15 |
6be75cd7 | 468 | Nederlands Dutch:nl:be nl:1 15 |
dd8482fc | 469 | English American British:en:au ca gb ie nz us uk zw:1 15 cp850 |
6be75cd7 JH |
470 | Esperanto:eo:eo:3 |
471 | Eesti Estonian:et:ee:4 6 13 | |
472 | Suomi Finnish:fi:fi:1 15 | |
473 | Flamish::fl:1 15 | |
6be75cd7 JH |
474 | Deutsch German:de:at be ch de lu:1 15 |
475 | Euskaraz Basque:eu:es fr:1 15 | |
6be75cd7 JH |
476 | Galego Galician:gl:es:1 15 |
477 | Ellada Greek:el:gr:7 g8 | |
6be75cd7 JH |
478 | Frysk:fy:nl:1 15 |
479 | Greenlandic:kl:gl:4 6 | |
284102e8 JH |
480 | Hebrew:iw:il:8 hebrew8 |
481 | Hungarian:hu:hu:2 | |
df8a53a3 | 482 | Indonesian:id:id:1 15 |
6be75cd7 JH |
483 | Gaeilge Irish:ga:IE:1 14 15 |
484 | Italiano Italian:it:ch it:1 15 | |
485 | Nihongo Japanese:ja:jp:euc eucJP jp.EUC sjis | |
284102e8 | 486 | Korean:ko:kr: |
6be75cd7 JH |
487 | Latine Latin:la:va:1 15 |
488 | Latvian:lv:lv:4 6 13 | |
489 | Lithuanian:lt:lt:4 6 13 | |
490 | Macedonian:mk:mk:1 15 | |
491 | Maltese:mt:mt:3 | |
dd8482fc | 492 | Moldovan:mo:mo:2 |
df8a53a3 | 493 | Norsk Norwegian:no no\@nynorsk nb nn:no:1 15 |
6be75cd7 JH |
494 | Occitan:oc:es:1 15 |
495 | Polski Polish:pl:pl:2 | |
284102e8 | 496 | Rumanian:ro:ro:2 |
a528dad0 | 497 | Russki Russian:ru:ru su ua:5 koi8 koi8r KOI8-R koi8u cp1251 cp866 |
6be75cd7 | 498 | Serbski Serbian:sr:yu:5 |
284102e8 | 499 | Slovak:sk:sk:2 |
6be75cd7 | 500 | Slovene Slovenian:sl:si:2 |
d43ce814 JH |
501 | Sqhip Albanian:sq:sq:1 15 |
502 | Svenska Swedish:sv:fi se:1 15 | |
6be75cd7 | 503 | Thai:th:th:11 tis620 |
284102e8 | 504 | Turkish:tr:tr:9 turkish8 |
dd8482fc | 505 | Yiddish:yi::1 15 |
284102e8 JH |
506 | EOF |
507 | ||
ee50adbe | 508 | if ($^O eq 'os390') { |
dd8482fc | 509 | # These cause heartburn. Broken locales? |
ee50adbe PP |
510 | $locales =~ s/Svenska Swedish:sv:fi se:1 15\n//; |
511 | $locales =~ s/Thai:th:th:11 tis620\n//; | |
512 | } | |
513 | ||
ef4a39e5 | 514 | sub in_utf8 () { $^H & 0x08 || (${^OPEN} || "") =~ /:utf8/ } |
f9cbebe1 JH |
515 | |
516 | if (in_utf8) { | |
8a6cb2cb | 517 | require "lib/locale/utf8"; |
f9cbebe1 | 518 | } else { |
8a6cb2cb | 519 | require "lib/locale/latin1"; |
f9cbebe1 JH |
520 | } |
521 | ||
284102e8 JH |
522 | my @Locale; |
523 | my $Locale; | |
519c0534 | 524 | my @Word_; |
a160ac48 KW |
525 | my @Digit_; |
526 | my @Space_; | |
527 | my @Alpha_; | |
528 | my @Alnum_; | |
529 | my @Ascii_; | |
530 | my @Blank_; | |
531 | my @Cntrl_; | |
532 | my @Graph_; | |
533 | my @Lower_; | |
534 | my @Print_; | |
535 | my @Upper_; | |
536 | my @Xdigit_; | |
537 | my @Cased_; | |
284102e8 | 538 | |
284102e8 JH |
539 | sub trylocale { |
540 | my $locale = shift; | |
0b9f254b | 541 | return if grep { $locale eq $_ } @Locale; |
a810e350 | 542 | return unless setlocale(&POSIX::LC_ALL, $locale); |
e439cacb KW |
543 | my $badutf8; |
544 | { | |
545 | local $SIG{__WARN__} = sub { | |
546 | $badutf8 = $_[0] =~ /Malformed UTF-8/; | |
547 | }; | |
548 | $Locale =~ /UTF-?8/i; | |
284102e8 | 549 | } |
e439cacb KW |
550 | |
551 | if ($badutf8) { | |
552 | ok(0, "Locale name contains malformed utf8"); | |
553 | return; | |
554 | } | |
555 | push @Locale, $locale; | |
284102e8 | 556 | } |
8ebc5c01 | 557 | |
284102e8 JH |
558 | sub decode_encodings { |
559 | my @enc; | |
8ebc5c01 | 560 | |
284102e8 JH |
561 | foreach (split(/ /, shift)) { |
562 | if (/^(\d+)$/) { | |
563 | push @enc, "ISO8859-$1"; | |
564 | push @enc, "iso8859$1"; # HP | |
565 | if ($1 eq '1') { | |
566 | push @enc, "roman8"; # HP | |
567 | } | |
568 | } else { | |
569 | push @enc, $_; | |
dd8482fc | 570 | push @enc, "$_.UTF-8"; |
8ebc5c01 | 571 | } |
572 | } | |
ee50adbe PP |
573 | if ($^O eq 'os390') { |
574 | push @enc, qw(IBM-037 IBM-819 IBM-1047); | |
575 | } | |
8ebc5c01 | 576 | |
284102e8 | 577 | return @enc; |
8ebc5c01 | 578 | } |
579 | ||
284102e8 JH |
580 | trylocale("C"); |
581 | trylocale("POSIX"); | |
582 | foreach (0..15) { | |
583 | trylocale("ISO8859-$_"); | |
284102e8 | 584 | trylocale("iso8859$_"); |
097ee67d JH |
585 | trylocale("iso8859-$_"); |
586 | trylocale("iso_8859_$_"); | |
587 | trylocale("isolatin$_"); | |
588 | trylocale("isolatin-$_"); | |
589 | trylocale("iso_latin_$_"); | |
8ebc5c01 | 590 | } |
591 | ||
645e49ed JH |
592 | # Sanitize the environment so that we can run the external 'locale' |
593 | # program without the taint mode getting grumpy. | |
cce5967e JH |
594 | |
595 | # $ENV{PATH} is special in VMS. | |
596 | delete $ENV{PATH} if $^O ne 'VMS' or $Config{d_setenv}; | |
597 | ||
598 | # Other subversive stuff. | |
599 | delete @ENV{qw(IFS CDPATH ENV BASH_ENV)}; | |
dd8482fc | 600 | |
21477fb4 | 601 | if (-x "/usr/bin/locale" && open(LOCALES, "/usr/bin/locale -a 2>/dev/null|")) { |
dd8482fc | 602 | while (<LOCALES>) { |
d281a6ac NC |
603 | # It seems that /usr/bin/locale steadfastly outputs 8 bit data, which |
604 | # ain't great when we're running this testPERL_UNICODE= so that utf8 | |
605 | # locales will cause all IO hadles to default to (assume) utf8 | |
606 | next unless utf8::valid($_); | |
dd8482fc JH |
607 | chomp; |
608 | trylocale($_); | |
284102e8 | 609 | } |
dd8482fc | 610 | close(LOCALES); |
a6259068 | 611 | } elsif ($^O eq 'VMS' && defined($ENV{'SYS$I18N_LOCALE'}) && -d 'SYS$I18N_LOCALE') { |
71e5cbb3 | 612 | # The SYS$I18N_LOCALE logical name search list was not present on |
a6259068 PP |
613 | # VAX VMS V5.5-12, but was on AXP && VAX VMS V6.2 as well as later versions. |
614 | opendir(LOCALES, "SYS\$I18N_LOCALE:"); | |
615 | while ($_ = readdir(LOCALES)) { | |
616 | chomp; | |
617 | trylocale($_); | |
618 | } | |
619 | close(LOCALES); | |
87e33296 SP |
620 | } elsif ($^O eq 'openbsd' && -e '/usr/share/locale') { |
621 | ||
622 | # OpenBSD doesn't have a locale executable, so reading /usr/share/locale | |
623 | # is much easier and faster than the last resort method. | |
624 | ||
625 | opendir(LOCALES, '/usr/share/locale'); | |
626 | while ($_ = readdir(LOCALES)) { | |
627 | chomp; | |
628 | trylocale($_); | |
629 | } | |
630 | close(LOCALES); | |
dd8482fc JH |
631 | } else { |
632 | ||
633 | # This is going to be slow. | |
634 | ||
635 | foreach my $locale (split(/\n/, $locales)) { | |
636 | my ($locale_name, $language_codes, $country_codes, $encodings) = | |
637 | split(/:/, $locale); | |
638 | my @enc = decode_encodings($encodings); | |
639 | foreach my $loc (split(/ /, $locale_name)) { | |
640 | trylocale($loc); | |
284102e8 | 641 | foreach my $enc (@enc) { |
dd8482fc | 642 | trylocale("$loc.$enc"); |
284102e8 | 643 | } |
dd8482fc | 644 | $loc = lc $loc; |
284102e8 | 645 | foreach my $enc (@enc) { |
dd8482fc JH |
646 | trylocale("$loc.$enc"); |
647 | } | |
648 | } | |
649 | foreach my $lang (split(/ /, $language_codes)) { | |
650 | trylocale($lang); | |
651 | foreach my $country (split(/ /, $country_codes)) { | |
652 | my $lc = "${lang}_${country}"; | |
653 | trylocale($lc); | |
654 | foreach my $enc (@enc) { | |
655 | trylocale("$lc.$enc"); | |
656 | } | |
657 | my $lC = "${lang}_\U${country}"; | |
658 | trylocale($lC); | |
659 | foreach my $enc (@enc) { | |
660 | trylocale("$lC.$enc"); | |
661 | } | |
284102e8 JH |
662 | } |
663 | } | |
664 | } | |
665 | } | |
4599a1de | 666 | |
a810e350 | 667 | setlocale(&POSIX::LC_ALL, "C"); |
d43ce814 | 668 | |
86f50d7d | 669 | if ($^O eq 'darwin') { |
4373e181 | 670 | # Darwin 8/Mac OS X 10.4 and 10.5 have bad Basque locales: perl bug #35895, |
86f50d7d | 671 | # Apple bug ID# 4139653. It also has a problem in Byelorussian. |
4373e181 RGS |
672 | (my $v) = $Config{osvers} =~ /^(\d+)/; |
673 | if ($v >= 8 and $v < 10) { | |
86f50d7d | 674 | debug "# Skipping eu_ES, be_BY locales -- buggy in Darwin\n"; |
a5ec937f | 675 | @Locale = grep ! m/^(eu_ES(?:\..*)?|be_BY\.CP1131)$/, @Locale; |
dfa5c78f | 676 | } elsif ($v < 12) { |
a44d0896 NC |
677 | debug "# Skipping be_BY locales -- buggy in Darwin\n"; |
678 | @Locale = grep ! m/^be_BY\.CP1131$/, @Locale; | |
a5ec937f | 679 | } |
86f50d7d DD |
680 | } |
681 | ||
4599a1de JH |
682 | @Locale = sort @Locale; |
683 | ||
887ef7ed PP |
684 | debug "# Locales =\n"; |
685 | for ( @Locale ) { | |
686 | debug "# $_\n"; | |
687 | } | |
8ebc5c01 | 688 | |
284102e8 | 689 | my %Problem; |
2a680da6 JH |
690 | my %Okay; |
691 | my %Testing; | |
30032ef4 | 692 | my @Added_alpha; # Alphas that aren't in the C locale. |
c08acc4c | 693 | my %test_names; |
284102e8 | 694 | |
019bf7dd KW |
695 | sub display_characters { |
696 | # This returns a display string denoting the input parameter @_, each | |
697 | # entry of which is a single character in the range 0-255. The first part | |
698 | # of the output is a string of the characters in @_ that are ASCII | |
699 | # graphics, and hence unambiguously displayable. They are given by code | |
700 | # point order. The second part is the remaining code points, the ordinals | |
701 | # of which are each displayed as 2-digit hex. Blanks are inserted so as | |
702 | # to keep anything from the first part looking like a 2-digit hex number. | |
703 | ||
704 | no locale; | |
705 | my @chars = sort { ord $a <=> ord $b } @_; | |
706 | my $output = ""; | |
707 | my $hex = ""; | |
708 | my $range_start; | |
709 | my $start_class; | |
710 | push @chars, chr(258); # This sentinel simplifies the loop termination | |
711 | # logic | |
712 | foreach my $i (0 .. @chars - 1) { | |
713 | my $char = $chars[$i]; | |
714 | my $range_end; | |
715 | my $class; | |
716 | ||
717 | # We avoid using [:posix:] classes, as these are being tested in this | |
718 | # file. Each equivalence class below is for things that can appear in | |
719 | # a range; those that can't be in a range have class -1. 0 for those | |
720 | # which should be output in hex; and >0 for the other ranges | |
721 | if ($char =~ /[A-Z]/) { | |
722 | $class = 2; | |
723 | } | |
724 | elsif ($char =~ /[a-z]/) { | |
725 | $class = 3; | |
726 | } | |
727 | elsif ($char =~ /[0-9]/) { | |
728 | $class = 4; | |
729 | } | |
730 | elsif ($char =~ /[[\]!"#\$\%&\'()*+,.\/:\\;<=>?\@\^_`{|}~-]/) { | |
731 | $class = -1; # Punct never appears in a range | |
732 | } | |
733 | else { | |
734 | $class = 0; # Output in hex | |
735 | } | |
736 | ||
737 | if (! defined $range_start) { | |
738 | if ($class < 0) { | |
739 | $output .= $char; | |
740 | } | |
741 | else { | |
742 | $range_start = ord $char; | |
743 | $start_class = $class; | |
744 | } | |
745 | } # A range ends if not consecutive, or the class-type changes | |
746 | elsif (ord $char != ($range_end = ord($chars[$i-1])) + 1 | |
747 | || $class != $start_class) | |
748 | { | |
749 | ||
750 | # Here, the current character is not in the range. This means the | |
751 | # previous character must have been. Output the range up through | |
752 | # that one. | |
753 | my $range_length = $range_end - $range_start + 1; | |
754 | if ($start_class > 0) { | |
755 | $output .= " " . chr($range_start); | |
756 | $output .= "-" . chr($range_end) if $range_length > 1; | |
757 | } | |
758 | else { | |
759 | $hex .= sprintf(" %02X", $range_start); | |
760 | $hex .= sprintf("-%02X", $range_end) if $range_length > 1; | |
761 | } | |
762 | ||
763 | # Handle the new current character, as potentially beginning a new | |
764 | # range | |
765 | undef $range_start; | |
766 | redo; | |
767 | } | |
768 | } | |
769 | ||
770 | $output =~ s/^ //; | |
771 | $hex =~ s/^ // if ! length $output; | |
772 | return "$output$hex"; | |
773 | } | |
774 | ||
30032ef4 KW |
775 | sub report_result { |
776 | my ($Locale, $i, $pass_fail, $message) = @_; | |
15bbd6a2 KW |
777 | $message //= ""; |
778 | $message = " ($message)" if $message; | |
30032ef4 | 779 | unless ($pass_fail) { |
2a680da6 | 780 | $Problem{$i}{$Locale} = 1; |
baae13cb | 781 | debug "# failed $i ($test_names{$i}) with locale '$Locale'$message\n"; |
2a680da6 JH |
782 | } else { |
783 | push @{$Okay{$i}}, $Locale; | |
784 | } | |
785 | } | |
786 | ||
7c844d17 KW |
787 | sub report_multi_result { |
788 | my ($Locale, $i, $results_ref) = @_; | |
789 | ||
790 | # $results_ref points to an array, each element of which is a character that was | |
791 | # in error for this test numbered '$i'. If empty, the test passed | |
792 | ||
793 | my $message = ""; | |
794 | if (@$results_ref) { | |
019bf7dd | 795 | $message = join " ", "for", display_characters(@$results_ref); |
7c844d17 KW |
796 | } |
797 | report_result($Locale, $i, @$results_ref == 0, $message); | |
798 | } | |
799 | ||
c4093d7d KW |
800 | my $first_locales_test_number = $final_without_setlocale + 1; |
801 | my $locales_test_number; | |
802 | my $not_necessarily_a_problem_test_number; | |
6d5d702a KW |
803 | my $first_casing_test_number; |
804 | my $final_casing_test_number; | |
c4093d7d KW |
805 | my %setlocale_failed; # List of locales that setlocale() didn't work on |
806 | ||
284102e8 | 807 | foreach $Locale (@Locale) { |
c4093d7d | 808 | $locales_test_number = $first_locales_test_number - 1; |
284102e8 | 809 | debug "# Locale = $Locale\n"; |
284102e8 | 810 | |
a810e350 | 811 | unless (setlocale(&POSIX::LC_ALL, $Locale)) { |
c4093d7d | 812 | $setlocale_failed{$Locale} = $Locale; |
284102e8 | 813 | next; |
8ebc5c01 | 814 | } |
8ebc5c01 | 815 | |
66cbab2c KW |
816 | # We test UTF-8 locales only under ':not_characters'; otherwise they have |
817 | # documented deficiencies. Non- UTF-8 locales are tested only under plain | |
818 | # 'use locale', as otherwise we would have to convert everything in them | |
819 | # to Unicode. | |
9ecfa8e4 KW |
820 | # The locale name doesn't necessarily have to have "utf8" in it to be a |
821 | # UTF-8 locale, but it works mostly. | |
66cbab2c KW |
822 | my $is_utf8_locale = $Locale =~ /UTF-?8/i; |
823 | ||
95eaa1bf KW |
824 | my %UPPER = (); # All alpha X for which uc(X) == X and lc(X) != X |
825 | my %lower = (); # All alpha X for which lc(X) == X and uc(X) != X | |
826 | my %BoThCaSe = (); # All alpha X for which uc(X) == lc(X) == X | |
66cbab2c KW |
827 | |
828 | if (! $is_utf8_locale) { | |
829 | use locale; | |
a160ac48 | 830 | @Word_ = grep /\w/, map { chr } 0..255; |
aac995d2 | 831 | @Digit_ = grep /\d/, map { chr } 0..255; |
a160ac48 KW |
832 | @Space_ = grep /\s/, map { chr } 0..255; |
833 | @Alpha_ = grep /[[:alpha:]]/, map {chr } 0..255; | |
834 | @Alnum_ = grep /[[:alnum:]]/, map {chr } 0..255; | |
835 | @Ascii_ = grep /[[:ascii:]]/, map {chr } 0..255; | |
836 | @Blank_ = grep /[[:blank:]]/, map {chr } 0..255; | |
837 | @Cntrl_ = grep /[[:cntrl:]]/, map {chr } 0..255; | |
838 | @Graph_ = grep /[[:graph:]]/, map {chr } 0..255; | |
839 | @Lower_ = grep /[[:lower:]]/, map {chr } 0..255; | |
840 | @Print_ = grep /[[:print:]]/, map {chr } 0..255; | |
841 | @Upper_ = grep /[[:upper:]]/, map {chr } 0..255; | |
842 | @Xdigit_ = grep /[[:xdigit:]]/, map {chr } 0..255; | |
843 | @Cased_ = grep /[[:upper:]]/i, map {chr } 0..255; | |
e5272a46 | 844 | |
71e5cbb3 KW |
845 | # Sieve the uppercase and the lowercase. |
846 | ||
519c0534 | 847 | for (@Word_) { |
71e5cbb3 KW |
848 | if (/[^\d_]/) { # skip digits and the _ |
849 | if (uc($_) eq $_) { | |
850 | $UPPER{$_} = $_; | |
851 | } | |
852 | if (lc($_) eq $_) { | |
853 | $lower{$_} = $_; | |
854 | } | |
855 | } | |
856 | } | |
66cbab2c KW |
857 | } |
858 | else { | |
859 | use locale ':not_characters'; | |
a160ac48 | 860 | @Word_ = grep /\w/, map { chr } 0..255; |
aac995d2 | 861 | @Digit_ = grep /\d/, map { chr } 0..255; |
a160ac48 KW |
862 | @Space_ = grep /\s/, map { chr } 0..255; |
863 | @Alpha_ = grep /[[:alpha:]]/, map {chr } 0..255; | |
864 | @Alnum_ = grep /[[:alnum:]]/, map {chr } 0..255; | |
865 | @Ascii_ = grep /[[:ascii:]]/, map {chr } 0..255; | |
866 | @Blank_ = grep /[[:blank:]]/, map {chr } 0..255; | |
867 | @Cntrl_ = grep /[[:cntrl:]]/, map {chr } 0..255; | |
868 | @Graph_ = grep /[[:graph:]]/, map {chr } 0..255; | |
869 | @Lower_ = grep /[[:lower:]]/, map {chr } 0..255; | |
870 | @Print_ = grep /[[:print:]]/, map {chr } 0..255; | |
871 | @Upper_ = grep /[[:upper:]]/, map {chr } 0..255; | |
872 | @Xdigit_ = grep /[[:xdigit:]]/, map {chr } 0..255; | |
873 | @Cased_ = grep /[[:upper:]]/i, map {chr } 0..255; | |
519c0534 | 874 | for (@Word_) { |
66cbab2c KW |
875 | if (/[^\d_]/) { # skip digits and the _ |
876 | if (uc($_) eq $_) { | |
877 | $UPPER{$_} = $_; | |
878 | } | |
879 | if (lc($_) eq $_) { | |
880 | $lower{$_} = $_; | |
881 | } | |
882 | } | |
883 | } | |
884 | } | |
a160ac48 KW |
885 | |
886 | debug "# :upper: = ", display_characters(@Upper_), "\n"; | |
887 | debug "# :lower: = ", display_characters(@Lower_), "\n"; | |
888 | debug "# :cased: = ", display_characters(@Cased_), "\n"; | |
889 | debug "# :alpha: = ", display_characters(@Alpha_), "\n"; | |
890 | debug "# :alnum: = ", display_characters(@Alnum_), "\n"; | |
891 | debug "# w = ", display_characters(@Word_), "\n"; | |
892 | debug "# :graph: = ", display_characters(@Graph_), "\n"; | |
893 | debug "# :print: = ", display_characters(@Print_), "\n"; | |
894 | debug "# d = ", display_characters(@Digit_), "\n"; | |
895 | debug "# :xdigit: = ", display_characters(@Xdigit_), "\n"; | |
896 | debug "# :blank: = ", display_characters(@Blank_), "\n"; | |
897 | debug "# s = ", display_characters(@Space_), "\n"; | |
898 | debug "# :cntrl: = ", display_characters(@Cntrl_), "\n"; | |
899 | debug "# :ascii: = ", display_characters(@Ascii_), "\n"; | |
900 | ||
284102e8 | 901 | foreach (keys %UPPER) { |
a160ac48 | 902 | |
097ee67d | 903 | $BoThCaSe{$_}++ if exists $lower{$_}; |
284102e8 JH |
904 | } |
905 | foreach (keys %lower) { | |
097ee67d | 906 | $BoThCaSe{$_}++ if exists $UPPER{$_}; |
284102e8 | 907 | } |
097ee67d | 908 | foreach (keys %BoThCaSe) { |
284102e8 JH |
909 | delete $UPPER{$_}; |
910 | delete $lower{$_}; | |
911 | } | |
912 | ||
019bf7dd KW |
913 | debug "# UPPER = ", display_characters(keys %UPPER), "\n"; |
914 | debug "# lower = ", display_characters(keys %lower), "\n"; | |
915 | debug "# BoThCaSe = ", display_characters(keys %BoThCaSe), "\n"; | |
284102e8 | 916 | |
baa71cfd | 917 | my @failures; |
3da38613 | 918 | my @fold_failures; |
baa71cfd KW |
919 | foreach my $x (sort keys %UPPER) { |
920 | my $ok; | |
3da38613 | 921 | my $fold_ok; |
baa71cfd KW |
922 | if ($is_utf8_locale) { |
923 | use locale ':not_characters'; | |
924 | $ok = $x =~ /[[:upper:]]/; | |
3da38613 | 925 | $fold_ok = $x =~ /[[:lower:]]/i; |
baa71cfd KW |
926 | } |
927 | else { | |
928 | use locale; | |
929 | $ok = $x =~ /[[:upper:]]/; | |
3da38613 | 930 | $fold_ok = $x =~ /[[:lower:]]/i; |
baa71cfd KW |
931 | } |
932 | push @failures, $x unless $ok; | |
3da38613 | 933 | push @fold_failures, $x unless $fold_ok; |
baa71cfd | 934 | } |
baa71cfd | 935 | $locales_test_number++; |
6d5d702a | 936 | $first_casing_test_number = $locales_test_number; |
95eaa1bf | 937 | $test_names{$locales_test_number} = 'Verify that /[[:upper:]]/ matches all alpha X for which uc(X) == X and lc(X) != X'; |
7c844d17 | 938 | report_multi_result($Locale, $locales_test_number, \@failures); |
6d5d702a | 939 | |
3da38613 | 940 | $locales_test_number++; |
6d5d702a | 941 | |
95eaa1bf | 942 | $test_names{$locales_test_number} = 'Verify that /[[:lower:]]/i matches all alpha X for which uc(X) == X and lc(X) != X'; |
7c844d17 | 943 | report_multi_result($Locale, $locales_test_number, \@fold_failures); |
baa71cfd | 944 | |
baa71cfd | 945 | undef @failures; |
3da38613 | 946 | undef @fold_failures; |
baa71cfd KW |
947 | |
948 | foreach my $x (sort keys %lower) { | |
949 | my $ok; | |
3da38613 | 950 | my $fold_ok; |
baa71cfd KW |
951 | if ($is_utf8_locale) { |
952 | use locale ':not_characters'; | |
953 | $ok = $x =~ /[[:lower:]]/; | |
3da38613 | 954 | $fold_ok = $x =~ /[[:upper:]]/i; |
baa71cfd KW |
955 | } |
956 | else { | |
957 | use locale; | |
958 | $ok = $x =~ /[[:lower:]]/; | |
3da38613 | 959 | $fold_ok = $x =~ /[[:upper:]]/i; |
baa71cfd KW |
960 | } |
961 | push @failures, $x unless $ok; | |
3da38613 | 962 | push @fold_failures, $x unless $fold_ok; |
baa71cfd KW |
963 | } |
964 | ||
965 | $locales_test_number++; | |
95eaa1bf | 966 | $test_names{$locales_test_number} = 'Verify that /[[:lower:]]/ matches all alpha X for which lc(X) == X and uc(X) != X'; |
7c844d17 KW |
967 | report_multi_result($Locale, $locales_test_number, \@failures); |
968 | ||
3da38613 | 969 | $locales_test_number++; |
95eaa1bf | 970 | $test_names{$locales_test_number} = 'Verify that /[[:upper:]]/i matches all alpha X for which lc(X) == X and uc(X) != X'; |
7c844d17 | 971 | report_multi_result($Locale, $locales_test_number, \@fold_failures); |
baa71cfd | 972 | |
9445c837 KW |
973 | { # Find the alphabetic characters that are not considered alphabetics |
974 | # in the default (C) locale. | |
8ebc5c01 | 975 | |
284102e8 | 976 | no locale; |
71e5cbb3 | 977 | |
30032ef4 | 978 | @Added_alpha = (); |
5e7a1028 | 979 | for (keys %UPPER, keys %lower, keys %BoThCaSe) { |
30032ef4 | 980 | push(@Added_alpha, $_) if (/\W/); |
284102e8 | 981 | } |
8ebc5c01 | 982 | } |
8ebc5c01 | 983 | |
30032ef4 | 984 | @Added_alpha = sort @Added_alpha; |
8ebc5c01 | 985 | |
019bf7dd | 986 | debug "# Added_alpha = ", display_characters(@Added_alpha), "\n"; |
8ebc5c01 | 987 | |
db31898d | 988 | # Cross-check the whole 8-bit character set. |
8ebc5c01 | 989 | |
db31898d KW |
990 | ++$locales_test_number; |
991 | my @f; | |
992 | $test_names{$locales_test_number} = 'Verify that \w and [:word:] are identical'; | |
993 | for (map { chr } 0..255) { | |
994 | if ($is_utf8_locale) { | |
995 | use locale ':not_characters'; | |
996 | push @f, $_ unless /[[:word:]]/ == /\w/; | |
997 | } | |
998 | else { | |
999 | push @f, $_ unless /[[:word:]]/ == /\w/; | |
1000 | } | |
1001 | } | |
1002 | report_multi_result($Locale, $locales_test_number, \@f); | |
8ebc5c01 | 1003 | |
db31898d KW |
1004 | ++$locales_test_number; |
1005 | undef @f; | |
1006 | $test_names{$locales_test_number} = 'Verify that \d and [:digit:] are identical'; | |
1007 | for (map { chr } 0..255) { | |
1008 | if ($is_utf8_locale) { | |
1009 | use locale ':not_characters'; | |
1010 | push @f, $_ unless /[[:digit:]]/ == /\d/; | |
1011 | } | |
1012 | else { | |
1013 | push @f, $_ unless /[[:digit:]]/ == /\d/; | |
1014 | } | |
1015 | } | |
1016 | report_multi_result($Locale, $locales_test_number, \@f); | |
32e8aa3f | 1017 | |
db31898d KW |
1018 | ++$locales_test_number; |
1019 | undef @f; | |
1020 | $test_names{$locales_test_number} = 'Verify that \s and [:space:] are identical'; | |
1021 | for (map { chr } 0..255) { | |
1022 | if ($is_utf8_locale) { | |
1023 | use locale ':not_characters'; | |
1024 | push @f, $_ unless /[[:space:]]/ == /\s/; | |
1025 | } | |
1026 | else { | |
1027 | push @f, $_ unless /[[:space:]]/ == /\s/; | |
1028 | } | |
1029 | } | |
1030 | report_multi_result($Locale, $locales_test_number, \@f); | |
32e8aa3f | 1031 | |
db31898d KW |
1032 | ++$locales_test_number; |
1033 | undef @f; | |
1034 | $test_names{$locales_test_number} = 'Verify that [:posix:] and [:^posix:] are mutually exclusive'; | |
1035 | for (map { chr } 0..255) { | |
1036 | if ($is_utf8_locale) { | |
1037 | use locale ':not_characters'; | |
1038 | push @f, $_ unless (/[[:alpha:]]/ xor /[[:^alpha:]]/) || | |
1039 | (/[[:alnum:]]/ xor /[[:^alnum:]]/) || | |
1040 | (/[[:ascii:]]/ xor /[[:^ascii:]]/) || | |
1041 | (/[[:blank:]]/ xor /[[:^blank:]]/) || | |
1042 | (/[[:cntrl:]]/ xor /[[:^cntrl:]]/) || | |
1043 | (/[[:digit:]]/ xor /[[:^digit:]]/) || | |
1044 | (/[[:graph:]]/ xor /[[:^graph:]]/) || | |
1045 | (/[[:lower:]]/ xor /[[:^lower:]]/) || | |
1046 | (/[[:print:]]/ xor /[[:^print:]]/) || | |
1047 | (/[[:space:]]/ xor /[[:^space:]]/) || | |
1048 | (/[[:upper:]]/ xor /[[:^upper:]]/) || | |
1049 | (/[[:word:]]/ xor /[[:^word:]]/) || | |
1050 | (/[[:xdigit:]]/ xor /[[:^xdigit:]]/) || | |
1051 | ||
1052 | # effectively is what [:cased:] would be if it existed. | |
1053 | (/[[:upper:]]/i xor /[[:^upper:]]/i); | |
1054 | } | |
1055 | else { | |
1056 | push @f, $_ unless (/[[:alpha:]]/ xor /[[:^alpha:]]/) || | |
1057 | (/[[:alnum:]]/ xor /[[:^alnum:]]/) || | |
1058 | (/[[:ascii:]]/ xor /[[:^ascii:]]/) || | |
1059 | (/[[:blank:]]/ xor /[[:^blank:]]/) || | |
1060 | (/[[:cntrl:]]/ xor /[[:^cntrl:]]/) || | |
1061 | (/[[:digit:]]/ xor /[[:^digit:]]/) || | |
1062 | (/[[:graph:]]/ xor /[[:^graph:]]/) || | |
1063 | (/[[:lower:]]/ xor /[[:^lower:]]/) || | |
1064 | (/[[:print:]]/ xor /[[:^print:]]/) || | |
1065 | (/[[:space:]]/ xor /[[:^space:]]/) || | |
1066 | (/[[:upper:]]/ xor /[[:^upper:]]/) || | |
1067 | (/[[:word:]]/ xor /[[:^word:]]/) || | |
1068 | (/[[:xdigit:]]/ xor /[[:^xdigit:]]/) || | |
1069 | (/[[:upper:]]/i xor /[[:^upper:]]/i); | |
1070 | } | |
1071 | } | |
1072 | report_multi_result($Locale, $locales_test_number, \@f); | |
32e8aa3f | 1073 | |
db31898d KW |
1074 | # The rules for the relationships are given in: |
1075 | # http://www.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap07.html | |
32e8aa3f | 1076 | |
db31898d KW |
1077 | ++$locales_test_number; |
1078 | undef @f; | |
1079 | $test_names{$locales_test_number} = 'Verify that [:lower:] is a subset of [:alpha:]'; | |
1080 | for (map { chr } 0..255) { | |
1081 | if ($is_utf8_locale) { | |
1082 | use locale ':not_characters'; | |
1083 | push @f, $_ if /[[:lower:]]/ and ! /[[:alpha:]]/; | |
1084 | } | |
1085 | else { | |
1086 | push @f, $_ if /[[:lower:]]/ and ! /[[:alpha:]]/; | |
1087 | } | |
1088 | } | |
1089 | report_multi_result($Locale, $locales_test_number, \@f); | |
32e8aa3f | 1090 | |
db31898d KW |
1091 | ++$locales_test_number; |
1092 | undef @f; | |
1093 | $test_names{$locales_test_number} = 'Verify that [:upper:] is a subset of [:alpha:]'; | |
1094 | for (map { chr } 0..255) { | |
1095 | if ($is_utf8_locale) { | |
1096 | use locale ':not_characters'; | |
1097 | push @f, $_ if /[[:upper:]]/ and ! /[[:alpha:]]/; | |
1098 | } | |
1099 | else { | |
1100 | push @f, $_ if /[[:upper:]]/ and ! /[[:alpha:]]/; | |
1101 | } | |
1102 | } | |
1103 | report_multi_result($Locale, $locales_test_number, \@f); | |
32e8aa3f | 1104 | |
db31898d KW |
1105 | ++$locales_test_number; |
1106 | undef @f; | |
1107 | $test_names{$locales_test_number} = 'Verify that /[[:lower:]]/i is a subset of [:alpha:]'; | |
1108 | for (map { chr } 0..255) { | |
1109 | if ($is_utf8_locale) { | |
1110 | use locale ':not_characters'; | |
1111 | push @f, $_ if /[[:lower:]]/i and ! /[[:alpha:]]/; | |
1112 | } | |
1113 | else { | |
1114 | push @f, $_ if /[[:lower:]]/i and ! /[[:alpha:]]/; | |
1115 | } | |
1116 | } | |
1117 | report_multi_result($Locale, $locales_test_number, \@f); | |
32e8aa3f | 1118 | |
db31898d KW |
1119 | ++$locales_test_number; |
1120 | undef @f; | |
1121 | $test_names{$locales_test_number} = 'Verify that [:alpha:] is a subset of [:alnum:]'; | |
1122 | for (map { chr } 0..255) { | |
1123 | if ($is_utf8_locale) { | |
1124 | use locale ':not_characters'; | |
1125 | push @f, $_ if /[[:alpha:]]/ and ! /[[:alnum:]]/; | |
1126 | } | |
1127 | else { | |
1128 | push @f, $_ if /[[:alpha:]]/ and ! /[[:alnum:]]/; | |
1129 | } | |
1130 | } | |
1131 | report_multi_result($Locale, $locales_test_number, \@f); | |
32e8aa3f | 1132 | |
db31898d KW |
1133 | ++$locales_test_number; |
1134 | undef @f; | |
1135 | $test_names{$locales_test_number} = 'Verify that [:digit:] is a subset of [:alnum:]'; | |
1136 | for (map { chr } 0..255) { | |
1137 | if ($is_utf8_locale) { | |
1138 | use locale ':not_characters'; | |
1139 | push @f, $_ if /[[:digit:]]/ and ! /[[:alnum:]]/; | |
1140 | } | |
1141 | else { | |
1142 | push @f, $_ if /[[:digit:]]/ and ! /[[:alnum:]]/; | |
1143 | } | |
1144 | } | |
1145 | report_multi_result($Locale, $locales_test_number, \@f); | |
32e8aa3f | 1146 | |
db31898d KW |
1147 | ++$locales_test_number; |
1148 | undef @f; | |
aac995d2 KW |
1149 | $test_names{$locales_test_number} = 'Verify that [:digit:] matches either 10 or 20 code points'; |
1150 | report_result($Locale, $locales_test_number, @Digit_ == 10 || @Digit_ ==20); | |
1151 | ||
1152 | ++$locales_test_number; | |
1153 | undef @f; | |
1154 | $test_names{$locales_test_number} = 'Verify that [:digit:] (if is 10 code points) is a subset of [:xdigit:]'; | |
1155 | if (@Digit_ == 10) { | |
fc81f5f2 KW |
1156 | for (map { chr } 0..255) { |
1157 | if ($is_utf8_locale) { | |
1158 | use locale ':not_characters'; | |
1159 | push @f, $_ if /[[:digit:]]/ and ! /[[:xdigit:]]/; | |
1160 | } | |
1161 | else { | |
1162 | push @f, $_ if /[[:digit:]]/ and ! /[[:xdigit:]]/; | |
1163 | } | |
db31898d KW |
1164 | } |
1165 | } | |
1166 | report_multi_result($Locale, $locales_test_number, \@f); | |
32e8aa3f | 1167 | |
db31898d KW |
1168 | ++$locales_test_number; |
1169 | undef @f; | |
1170 | $test_names{$locales_test_number} = 'Verify that [:alnum:] is a subset of [:graph:]'; | |
1171 | for (map { chr } 0..255) { | |
1172 | if ($is_utf8_locale) { | |
1173 | use locale ':not_characters'; | |
1174 | push @f, $_ if /[[:alnum:]]/ and ! /[[:graph:]]/; | |
1175 | } | |
1176 | else { | |
1177 | push @f, $_ if /[[:alnum:]]/ and ! /[[:graph:]]/; | |
1178 | } | |
1179 | } | |
1180 | report_multi_result($Locale, $locales_test_number, \@f); | |
32e8aa3f | 1181 | |
db31898d | 1182 | # Note that xdigit doesn't have to be a subset of alnum |
32e8aa3f | 1183 | |
db31898d KW |
1184 | ++$locales_test_number; |
1185 | undef @f; | |
1186 | $test_names{$locales_test_number} = 'Verify that [:xdigit:] is a subset of [:graph:]'; | |
1187 | for (map { chr } 0..255) { | |
1188 | if ($is_utf8_locale) { | |
1189 | use locale ':not_characters'; | |
1190 | push @f, $_ if /[[:xdigit:]]/ and ! /[[:graph:]]/; | |
1191 | } | |
1192 | else { | |
1193 | push @f, $_ if /[[:xdigit:]]/ and ! /[[:graph:]]/; | |
1194 | } | |
1195 | } | |
1196 | report_multi_result($Locale, $locales_test_number, \@f); | |
32e8aa3f | 1197 | |
db31898d KW |
1198 | ++$locales_test_number; |
1199 | undef @f; | |
1200 | $test_names{$locales_test_number} = 'Verify that [:punct:] is a subset of [:graph:]'; | |
1201 | for (map { chr } 0..255) { | |
1202 | if ($is_utf8_locale) { | |
1203 | use locale ':not_characters'; | |
1204 | push @f, $_ if /[[:punct:]]/ and ! /[[:graph:]]/; | |
1205 | } | |
1206 | else { | |
1207 | push @f, $_ if /[[:punct:]]/ and ! /[[:graph:]]/; | |
1208 | } | |
1209 | } | |
1210 | report_multi_result($Locale, $locales_test_number, \@f); | |
32e8aa3f | 1211 | |
db31898d KW |
1212 | ++$locales_test_number; |
1213 | undef @f; | |
1214 | $test_names{$locales_test_number} = 'Verify that [:blank:] is a subset of [:space:]'; | |
1215 | for (map { chr } 0..255) { | |
1216 | if ($is_utf8_locale) { | |
1217 | use locale ':not_characters'; | |
1218 | push @f, $_ if /[[:blank:]]/ and ! /[[:space:]]/; | |
1219 | } | |
1220 | else { | |
1221 | push @f, $_ if /[[:blank:]]/ and ! /[[:space:]]/; | |
1222 | } | |
1223 | } | |
1224 | report_multi_result($Locale, $locales_test_number, \@f); | |
32e8aa3f | 1225 | |
db31898d KW |
1226 | ++$locales_test_number; |
1227 | undef @f; | |
1228 | $test_names{$locales_test_number} = 'Verify that [:graph:] is a subset of [:print:]'; | |
1229 | for (map { chr } 0..255) { | |
1230 | if ($is_utf8_locale) { | |
1231 | use locale ':not_characters'; | |
1232 | push @f, $_ if /[[:graph:]]/ and ! /[[:print:]]/; | |
1233 | } | |
1234 | else { | |
1235 | push @f, $_ if /[[:graph:]]/ and ! /[[:print:]]/; | |
1236 | } | |
1237 | } | |
1238 | report_multi_result($Locale, $locales_test_number, \@f); | |
32e8aa3f | 1239 | |
db31898d KW |
1240 | ++$locales_test_number; |
1241 | undef @f; | |
1242 | $test_names{$locales_test_number} = 'Verify that isn\'t both [:cntrl:] and [:print:]'; | |
1243 | for (map { chr } 0..255) { | |
1244 | if ($is_utf8_locale) { | |
1245 | use locale ':not_characters'; | |
1246 | push @f, $_ if (/[[:print:]]/ and /[[:cntrl:]]/); | |
1247 | } | |
1248 | else { | |
1249 | push @f, $_ if (/[[:print:]]/ and /[[:cntrl:]]/); | |
1250 | } | |
1251 | } | |
1252 | report_multi_result($Locale, $locales_test_number, \@f); | |
32e8aa3f | 1253 | |
db31898d KW |
1254 | ++$locales_test_number; |
1255 | undef @f; | |
1256 | $test_names{$locales_test_number} = 'Verify that isn\'t both [:alnum:] and [:punct:]'; | |
1257 | for (map { chr } 0..255) { | |
1258 | if ($is_utf8_locale) { | |
1259 | use locale ':not_characters'; | |
1260 | push @f, $_ if /[[:alnum:]]/ and /[[:punct:]]/; | |
1261 | } | |
1262 | else { | |
1263 | push @f, $_ if /[[:alnum:]]/ and /[[:punct:]]/; | |
1264 | } | |
1265 | } | |
1266 | report_multi_result($Locale, $locales_test_number, \@f); | |
32e8aa3f | 1267 | |
db31898d KW |
1268 | ++$locales_test_number; |
1269 | undef @f; | |
1270 | $test_names{$locales_test_number} = 'Verify that isn\'t both [:xdigit:] and [:punct:]'; | |
1271 | for (map { chr } 0..255) { | |
1272 | if ($is_utf8_locale) { | |
1273 | use locale ':not_characters'; | |
1274 | push @f, $_ if (/[[:punct:]]/ and /[[:xdigit:]]/); | |
1275 | } | |
1276 | else { | |
1277 | push @f, $_ if (/[[:punct:]]/ and /[[:xdigit:]]/); | |
1278 | } | |
1279 | } | |
1280 | report_multi_result($Locale, $locales_test_number, \@f); | |
32e8aa3f | 1281 | |
db31898d KW |
1282 | ++$locales_test_number; |
1283 | undef @f; | |
1284 | $test_names{$locales_test_number} = 'Verify that isn\'t both [:graph:] and [:space:]'; | |
1285 | for (map { chr } 0..255) { | |
1286 | if ($is_utf8_locale) { | |
1287 | use locale ':not_characters'; | |
1288 | push @f, $_ if (/[[:graph:]]/ and /[[:space:]]/); | |
1289 | } | |
1290 | else { | |
1291 | push @f, $_ if (/[[:graph:]]/ and /[[:space:]]/); | |
1292 | } | |
1293 | } | |
1294 | report_multi_result($Locale, $locales_test_number, \@f); | |
32e8aa3f | 1295 | |
db31898d | 1296 | $final_casing_test_number = $locales_test_number; |
32e8aa3f | 1297 | |
db31898d KW |
1298 | # Test for read-only scalars' locale vs non-locale comparisons. |
1299 | ||
1300 | { | |
1301 | no locale; | |
1302 | my $ok; | |
1303 | $a = "qwerty"; | |
1304 | if ($is_utf8_locale) { | |
1305 | use locale ':not_characters'; | |
1306 | $ok = ($a cmp "qwerty") == 0; | |
1307 | } | |
1308 | else { | |
1309 | use locale; | |
1310 | $ok = ($a cmp "qwerty") == 0; | |
1311 | } | |
1312 | report_result($Locale, ++$locales_test_number, $ok); | |
1313 | $test_names{$locales_test_number} = 'Verify that cmp works with a read-only scalar; no- vs locale'; | |
1314 | } | |
8ebc5c01 | 1315 | |
db31898d KW |
1316 | { |
1317 | my ($from, $to, $lesser, $greater, | |
1318 | @test, %test, $test, $yes, $no, $sign); | |
284102e8 | 1319 | |
db31898d KW |
1320 | ++$locales_test_number; |
1321 | $test_names{$locales_test_number} = 'Verify that "le", "ne", etc work'; | |
1322 | $not_necessarily_a_problem_test_number = $locales_test_number; | |
1323 | for (0..9) { | |
1324 | # Select a slice. | |
519c0534 KW |
1325 | $from = int(($_*@Word_)/10); |
1326 | $to = $from + int(@Word_/10); | |
1327 | $to = $#Word_ if ($to > $#Word_); | |
1328 | $lesser = join('', @Word_[$from..$to]); | |
db31898d KW |
1329 | # Select a slice one character on. |
1330 | $from++; $to++; | |
519c0534 KW |
1331 | $to = $#Word_ if ($to > $#Word_); |
1332 | $greater = join('', @Word_[$from..$to]); | |
66cbab2c KW |
1333 | if ($is_utf8_locale) { |
1334 | use locale ':not_characters'; | |
db31898d KW |
1335 | ($yes, $no, $sign) = ($lesser lt $greater |
1336 | ? (" ", "not ", 1) | |
1337 | : ("not ", " ", -1)); | |
66cbab2c KW |
1338 | } |
1339 | else { | |
1340 | use locale; | |
db31898d KW |
1341 | ($yes, $no, $sign) = ($lesser lt $greater |
1342 | ? (" ", "not ", 1) | |
1343 | : ("not ", " ", -1)); | |
66cbab2c | 1344 | } |
db31898d KW |
1345 | # all these tests should FAIL (return 0). Exact lt or gt cannot |
1346 | # be tested because in some locales, say, eacute and E may test | |
1347 | # equal. | |
1348 | @test = | |
1349 | ( | |
1350 | $no.' ($lesser le $greater)', # 1 | |
1351 | 'not ($lesser ne $greater)', # 2 | |
1352 | ' ($lesser eq $greater)', # 3 | |
1353 | $yes.' ($lesser ge $greater)', # 4 | |
1354 | $yes.' ($lesser ge $greater)', # 5 | |
1355 | $yes.' ($greater le $lesser )', # 7 | |
1356 | 'not ($greater ne $lesser )', # 8 | |
1357 | ' ($greater eq $lesser )', # 9 | |
1358 | $no.' ($greater ge $lesser )', # 10 | |
1359 | 'not (($lesser cmp $greater) == -($sign))' # 11 | |
1360 | ); | |
1361 | @test{@test} = 0 x @test; | |
1362 | $test = 0; | |
1363 | for my $ti (@test) { | |
66cbab2c KW |
1364 | if ($is_utf8_locale) { |
1365 | use locale ':not_characters'; | |
db31898d | 1366 | $test{$ti} = eval $ti; |
66cbab2c KW |
1367 | } |
1368 | else { | |
db31898d KW |
1369 | # Already in 'use locale'; |
1370 | $test{$ti} = eval $ti; | |
66cbab2c | 1371 | } |
db31898d KW |
1372 | $test ||= $test{$ti} |
1373 | } | |
1374 | report_result($Locale, $locales_test_number, $test == 0); | |
1375 | if ($test) { | |
1376 | debug "# lesser = '$lesser'\n"; | |
1377 | debug "# greater = '$greater'\n"; | |
1378 | debug "# lesser cmp greater = ", | |
1379 | $lesser cmp $greater, "\n"; | |
1380 | debug "# greater cmp lesser = ", | |
1381 | $greater cmp $lesser, "\n"; | |
1382 | debug "# (greater) from = $from, to = $to\n"; | |
1383 | for my $ti (@test) { | |
1384 | debugf("# %-40s %-4s", $ti, | |
1385 | $test{$ti} ? 'FAIL' : 'ok'); | |
1386 | if ($ti =~ /\(\.*(\$.+ +cmp +\$[^\)]+)\.*\)/) { | |
1387 | debugf("(%s == %4d)", $1, eval $1); | |
66cbab2c | 1388 | } |
db31898d KW |
1389 | debug "\n#"; |
1390 | } | |
1391 | ||
1392 | last; | |
1393 | } | |
1394 | } | |
1395 | } | |
c4093d7d | 1396 | |
66cbab2c KW |
1397 | my $ok1; |
1398 | my $ok2; | |
1399 | my $ok3; | |
1400 | my $ok4; | |
1401 | my $ok5; | |
1402 | my $ok6; | |
1403 | my $ok7; | |
1404 | my $ok8; | |
1405 | my $ok9; | |
1406 | my $ok10; | |
1407 | my $ok11; | |
1408 | my $ok12; | |
1409 | my $ok13; | |
1500bd91 | 1410 | my $ok14; |
28acfe03 KW |
1411 | my $ok15; |
1412 | my $ok16; | |
66cbab2c KW |
1413 | |
1414 | my $c; | |
1415 | my $d; | |
1416 | my $e; | |
1417 | my $f; | |
1418 | my $g; | |
1419 | ||
1420 | if (! $is_utf8_locale) { | |
71e5cbb3 | 1421 | use locale; |
6be75cd7 | 1422 | |
71e5cbb3 | 1423 | my ($x, $y) = (1.23, 1.23); |
6be75cd7 | 1424 | |
71e5cbb3 KW |
1425 | $a = "$x"; |
1426 | printf ''; # printf used to reset locale to "C" | |
1427 | $b = "$y"; | |
1428 | $ok1 = $a eq $b; | |
6be75cd7 | 1429 | |
71e5cbb3 KW |
1430 | $c = "$x"; |
1431 | my $z = sprintf ''; # sprintf used to reset locale to "C" | |
1432 | $d = "$y"; | |
1433 | $ok2 = $c eq $d; | |
1434 | { | |
66cbab2c | 1435 | |
71e5cbb3 KW |
1436 | use warnings; |
1437 | my $w = 0; | |
1438 | local $SIG{__WARN__} = | |
1439 | sub { | |
1440 | print "# @_\n"; | |
1441 | $w++; | |
1442 | }; | |
6be75cd7 | 1443 | |
71e5cbb3 KW |
1444 | # The == (among other ops) used to warn for locales |
1445 | # that had something else than "." as the radix character. | |
6be75cd7 | 1446 | |
71e5cbb3 KW |
1447 | $ok3 = $c == 1.23; |
1448 | $ok4 = $c == $x; | |
1449 | $ok5 = $c == $d; | |
1450 | { | |
1451 | no locale; | |
66cbab2c | 1452 | |
b79536ea | 1453 | $e = "$x"; |
71e5cbb3 KW |
1454 | |
1455 | $ok6 = $e == 1.23; | |
1456 | $ok7 = $e == $x; | |
1457 | $ok8 = $e == $c; | |
1458 | } | |
66cbab2c | 1459 | |
71e5cbb3 KW |
1460 | $f = "1.23"; |
1461 | $g = 2.34; | |
66cbab2c | 1462 | |
71e5cbb3 KW |
1463 | $ok9 = $f == 1.23; |
1464 | $ok10 = $f == $x; | |
1465 | $ok11 = $f == $c; | |
1466 | $ok12 = abs(($f + $g) - 3.57) < 0.01; | |
1467 | $ok13 = $w == 0; | |
28acfe03 | 1468 | $ok14 = $ok15 = $ok16 = 1; # Skip for non-utf8 locales |
71e5cbb3 | 1469 | } |
66cbab2c KW |
1470 | } |
1471 | else { | |
1472 | use locale ':not_characters'; | |
1473 | ||
1474 | my ($x, $y) = (1.23, 1.23); | |
1475 | $a = "$x"; | |
1476 | printf ''; # printf used to reset locale to "C" | |
1477 | $b = "$y"; | |
1478 | $ok1 = $a eq $b; | |
1479 | ||
1480 | $c = "$x"; | |
1481 | my $z = sprintf ''; # sprintf used to reset locale to "C" | |
1482 | $d = "$y"; | |
1483 | $ok2 = $c eq $d; | |
1484 | { | |
1485 | use warnings; | |
1486 | my $w = 0; | |
1487 | local $SIG{__WARN__} = | |
1488 | sub { | |
1489 | print "# @_\n"; | |
1490 | $w++; | |
1491 | }; | |
1492 | $ok3 = $c == 1.23; | |
1493 | $ok4 = $c == $x; | |
1494 | $ok5 = $c == $d; | |
1495 | { | |
1496 | no locale; | |
b79536ea | 1497 | $e = "$x"; |
66cbab2c KW |
1498 | |
1499 | $ok6 = $e == 1.23; | |
1500 | $ok7 = $e == $x; | |
1501 | $ok8 = $e == $c; | |
1502 | } | |
1503 | ||
1504 | $f = "1.23"; | |
1505 | $g = 2.34; | |
1506 | ||
1507 | $ok9 = $f == 1.23; | |
1508 | $ok10 = $f == $x; | |
1509 | $ok11 = $f == $c; | |
1510 | $ok12 = abs(($f + $g) - 3.57) < 0.01; | |
1511 | $ok13 = $w == 0; | |
1500bd91 KW |
1512 | |
1513 | # Look for non-ASCII error messages, and verify that the first | |
1514 | # such is in UTF-8 (the others almost certainly will be like the | |
1515 | # first). | |
1516 | $ok14 = 1; | |
1517 | foreach my $err (keys %!) { | |
1518 | use Errno; | |
1519 | $! = eval "&Errno::$err"; # Convert to strerror() output | |
1520 | my $strerror = "$!"; | |
1521 | if ("$strerror" =~ /\P{ASCII}/) { | |
1522 | my $utf8_strerror = $strerror; | |
1523 | utf8::upgrade($utf8_strerror); | |
1524 | ||
1525 | # If $! was already in UTF-8, the upgrade was a no-op; | |
1526 | # otherwise they will be different byte strings. | |
1527 | use bytes; | |
1528 | $ok14 = $utf8_strerror eq $strerror; | |
1529 | last; | |
1530 | } | |
1531 | } | |
28acfe03 KW |
1532 | |
1533 | # Similarly, we verify that a non-ASCII radix is in UTF-8. This | |
1534 | # also catches if there is a disparity between sprintf and | |
1535 | # stringification. | |
1536 | ||
1537 | my $string_g = "$g"; | |
1538 | ||
1539 | my $utf8_string_g = "$g"; | |
1540 | utf8::upgrade($utf8_string_g); | |
1541 | ||
1542 | my $utf8_sprintf_g = sprintf("%g", $g); | |
1543 | utf8::upgrade($utf8_sprintf_g); | |
1544 | use bytes; | |
1545 | $ok15 = $utf8_string_g eq $string_g; | |
1546 | $ok16 = $utf8_sprintf_g eq $string_g; | |
66cbab2c KW |
1547 | } |
1548 | } | |
1549 | ||
30032ef4 | 1550 | report_result($Locale, ++$locales_test_number, $ok1); |
66cbab2c KW |
1551 | $test_names{$locales_test_number} = 'Verify that an intervening printf doesn\'t change assignment results'; |
1552 | my $first_a_test = $locales_test_number; | |
1553 | ||
1554 | debug "# $first_a_test..$locales_test_number: \$a = $a, \$b = $b, Locale = $Locale\n"; | |
1555 | ||
30032ef4 | 1556 | report_result($Locale, ++$locales_test_number, $ok2); |
66cbab2c KW |
1557 | $test_names{$locales_test_number} = 'Verify that an intervening sprintf doesn\'t change assignment results'; |
1558 | ||
1559 | my $first_c_test = $locales_test_number; | |
1560 | ||
30032ef4 | 1561 | report_result($Locale, ++$locales_test_number, $ok3); |
71e5cbb3 | 1562 | $test_names{$locales_test_number} = 'Verify that a different locale radix works when doing "==" with a constant'; |
6be75cd7 | 1563 | |
30032ef4 | 1564 | report_result($Locale, ++$locales_test_number, $ok4); |
71e5cbb3 | 1565 | $test_names{$locales_test_number} = 'Verify that a different locale radix works when doing "==" with a scalar'; |
66cbab2c | 1566 | |
30032ef4 | 1567 | report_result($Locale, ++$locales_test_number, $ok5); |
71e5cbb3 | 1568 | $test_names{$locales_test_number} = 'Verify that a different locale radix works when doing "==" with a scalar and an intervening sprintf'; |
66cbab2c | 1569 | |
71e5cbb3 | 1570 | debug "# $first_c_test..$locales_test_number: \$c = $c, \$d = $d, Locale = $Locale\n"; |
66cbab2c | 1571 | |
30032ef4 | 1572 | report_result($Locale, ++$locales_test_number, $ok6); |
b79536ea | 1573 | $test_names{$locales_test_number} = 'Verify that can assign stringified under inner no-locale block'; |
71e5cbb3 | 1574 | my $first_e_test = $locales_test_number; |
6be75cd7 | 1575 | |
30032ef4 | 1576 | report_result($Locale, ++$locales_test_number, $ok7); |
71e5cbb3 | 1577 | $test_names{$locales_test_number} = 'Verify that "==" with a scalar still works in inner no locale'; |
66cbab2c | 1578 | |
30032ef4 | 1579 | report_result($Locale, ++$locales_test_number, $ok8); |
71e5cbb3 | 1580 | $test_names{$locales_test_number} = 'Verify that "==" with a scalar and an intervening sprintf still works in inner no locale'; |
c4093d7d | 1581 | |
71e5cbb3 | 1582 | debug "# $first_e_test..$locales_test_number: \$e = $e, no locale\n"; |
2a680da6 | 1583 | |
30032ef4 | 1584 | report_result($Locale, ++$locales_test_number, $ok9); |
71e5cbb3 KW |
1585 | $test_names{$locales_test_number} = 'Verify that after a no-locale block, a different locale radix still works when doing "==" with a constant'; |
1586 | my $first_f_test = $locales_test_number; | |
6be75cd7 | 1587 | |
30032ef4 | 1588 | report_result($Locale, ++$locales_test_number, $ok10); |
71e5cbb3 | 1589 | $test_names{$locales_test_number} = 'Verify that after a no-locale block, a different locale radix still works when doing "==" with a scalar'; |
66cbab2c | 1590 | |
30032ef4 | 1591 | report_result($Locale, ++$locales_test_number, $ok11); |
71e5cbb3 | 1592 | $test_names{$locales_test_number} = 'Verify that after a no-locale block, a different locale radix still works when doing "==" with a scalar and an intervening sprintf'; |
906f284f | 1593 | |
30032ef4 | 1594 | report_result($Locale, ++$locales_test_number, $ok12); |
71e5cbb3 | 1595 | $test_names{$locales_test_number} = 'Verify that after a no-locale block, a different locale radix can participate in an addition and function call as numeric'; |
c4093d7d | 1596 | |
30032ef4 | 1597 | report_result($Locale, ++$locales_test_number, $ok13); |
71e5cbb3 | 1598 | $test_names{$locales_test_number} = 'Verify that don\'t get warning under "==" even if radix is not a dot'; |
c4093d7d | 1599 | |
30032ef4 | 1600 | report_result($Locale, ++$locales_test_number, $ok14); |
1500bd91 KW |
1601 | $test_names{$locales_test_number} = 'Verify that non-ASCII UTF-8 error messages are in UTF-8'; |
1602 | ||
30032ef4 | 1603 | report_result($Locale, ++$locales_test_number, $ok15); |
28acfe03 KW |
1604 | $test_names{$locales_test_number} = 'Verify that a number with a UTF-8 radix has a UTF-8 stringification'; |
1605 | ||
30032ef4 | 1606 | report_result($Locale, ++$locales_test_number, $ok16); |
28acfe03 KW |
1607 | $test_names{$locales_test_number} = 'Verify that a sprintf of a number with a UTF-8 radix yields UTF-8'; |
1608 | ||
71e5cbb3 | 1609 | debug "# $first_f_test..$locales_test_number: \$f = $f, \$g = $g, back to locale = $Locale\n"; |
906f284f | 1610 | |
26d80d95 LC |
1611 | # Does taking lc separately differ from taking |
1612 | # the lc "in-line"? (This was the bug 19990704.002, change #3568.) | |
1613 | # The bug was in the caching of the 'o'-magic. | |
66cbab2c | 1614 | if (! $is_utf8_locale) { |
2a680da6 | 1615 | use locale; |
6be75cd7 | 1616 | |
2a680da6 JH |
1617 | sub lcA { |
1618 | my $lc0 = lc $_[0]; | |
1619 | my $lc1 = lc $_[1]; | |
1620 | return $lc0 cmp $lc1; | |
1621 | } | |
6be75cd7 | 1622 | |
2a680da6 JH |
1623 | sub lcB { |
1624 | return lc($_[0]) cmp lc($_[1]); | |
1625 | } | |
6be75cd7 | 1626 | |
2a680da6 JH |
1627 | my $x = "ab"; |
1628 | my $y = "aa"; | |
1629 | my $z = "AB"; | |
6be75cd7 | 1630 | |
30032ef4 | 1631 | report_result($Locale, ++$locales_test_number, |
2a680da6 JH |
1632 | lcA($x, $y) == 1 && lcB($x, $y) == 1 || |
1633 | lcA($x, $z) == 0 && lcB($x, $z) == 0); | |
6be75cd7 | 1634 | } |
66cbab2c KW |
1635 | else { |
1636 | use locale ':not_characters'; | |
1637 | ||
1638 | sub lcC { | |
1639 | my $lc0 = lc $_[0]; | |
1640 | my $lc1 = lc $_[1]; | |
1641 | return $lc0 cmp $lc1; | |
1642 | } | |
1643 | ||
1644 | sub lcD { | |
1645 | return lc($_[0]) cmp lc($_[1]); | |
1646 | } | |
1647 | ||
1648 | my $x = "ab"; | |
1649 | my $y = "aa"; | |
1650 | my $z = "AB"; | |
1651 | ||
30032ef4 | 1652 | report_result($Locale, ++$locales_test_number, |
66cbab2c KW |
1653 | lcC($x, $y) == 1 && lcD($x, $y) == 1 || |
1654 | lcC($x, $z) == 0 && lcD($x, $z) == 0); | |
1655 | } | |
1656 | $test_names{$locales_test_number} = 'Verify "lc(foo) cmp lc(bar)" is the same as using intermediaries for the cmp'; | |
d8093b23 | 1657 | |
26d80d95 LC |
1658 | # Does lc of an UPPER (if different from the UPPER) match |
1659 | # case-insensitively the UPPER, and does the UPPER match | |
1660 | # case-insensitively the lc of the UPPER. And vice versa. | |
3ba0e062 | 1661 | { |
ef4a39e5 JH |
1662 | use locale; |
1663 | no utf8; | |
1664 | my $re = qr/[\[\(\{\*\+\?\|\^\$\\]/; | |
1665 | ||
1666 | my @f = (); | |
c4093d7d | 1667 | ++$locales_test_number; |
c08acc4c | 1668 | $test_names{$locales_test_number} = 'Verify case insensitive matching works'; |
f78d9f29 | 1669 | foreach my $x (sort keys %UPPER) { |
66cbab2c | 1670 | if (! $is_utf8_locale) { |
71e5cbb3 KW |
1671 | my $y = lc $x; |
1672 | next unless uc $y eq $x; | |
1673 | print "# UPPER $x lc $y ", | |
faf0c248 KW |
1674 | $x =~ /$y/i ? 1 : 0, " ", |
1675 | $y =~ /$x/i ? 1 : 0, "\n" if 0; | |
71e5cbb3 KW |
1676 | # |
1677 | # If $x and $y contain regular expression characters | |
1678 | # AND THEY lowercase (/i) to regular expression characters, | |
1679 | # regcomp() will be mightily confused. No, the \Q doesn't | |
1680 | # help here (maybe regex engine internal lowercasing | |
1681 | # is done after the \Q?) An example of this happening is | |
1682 | # the bg_BG (Bulgarian) locale under EBCDIC (OS/390 USS): | |
1683 | # the chr(173) (the "[") is the lowercase of the chr(235). | |
1684 | # | |
1685 | # Similarly losing EBCDIC locales include cs_cz, cs_CZ, | |
1686 | # el_gr, el_GR, en_us.IBM-037 (!), en_US.IBM-037 (!), | |
1687 | # et_ee, et_EE, hr_hr, hr_HR, hu_hu, hu_HU, lt_LT, | |
1688 | # mk_mk, mk_MK, nl_nl.IBM-037, nl_NL.IBM-037, | |
1689 | # pl_pl, pl_PL, ro_ro, ro_RO, ru_ru, ru_RU, | |
1690 | # sk_sk, sk_SK, sl_si, sl_SI, tr_tr, tr_TR. | |
1691 | # | |
1692 | # Similar things can happen even under (bastardised) | |
1693 | # non-EBCDIC locales: in many European countries before the | |
1694 | # advent of ISO 8859-x nationally customised versions of | |
1695 | # ISO 646 were devised, reusing certain punctuation | |
1696 | # characters for modified characters needed by the | |
1697 | # country/language. For example, the "|" might have | |
1698 | # stood for U+00F6 or LATIN SMALL LETTER O WITH DIAERESIS. | |
1699 | # | |
1700 | if ($x =~ $re || $y =~ $re) { | |
1701 | print "# Regex characters in '$x' or '$y', skipping test $locales_test_number for locale '$Locale'\n"; | |
1702 | next; | |
1703 | } | |
1704 | # With utf8 both will fail since the locale concept | |
1705 | # of upper/lower does not work well in Unicode. | |
1706 | push @f, $x unless $x =~ /$y/i == $y =~ /$x/i; | |
26c1569f KW |
1707 | |
1708 | # fc is not a locale concept, so Perl uses lc for it. | |
1709 | push @f, $x unless lc $x eq fc $x; | |
66cbab2c KW |
1710 | } |
1711 | else { | |
1712 | use locale ':not_characters'; | |
1713 | my $y = lc $x; | |
1714 | next unless uc $y eq $x; | |
1715 | print "# UPPER $x lc $y ", | |
faf0c248 KW |
1716 | $x =~ /$y/i ? 1 : 0, " ", |
1717 | $y =~ /$x/i ? 1 : 0, "\n" if 0; | |
66cbab2c KW |
1718 | |
1719 | # Here, we can fully test things, unlike plain 'use locale', | |
1720 | # because this form does work well with Unicode | |
1721 | push @f, $x unless $x =~ /$y/i && $y =~ /$x/i; | |
26c1569f KW |
1722 | |
1723 | # The places where Unicode's lc is different from fc are | |
1724 | # skipped here by virtue of the 'next unless uc...' line above | |
1725 | push @f, $x unless lc $x eq fc $x; | |
66cbab2c | 1726 | } |
c00ff1c7 | 1727 | } |
ef4a39e5 | 1728 | |
f78d9f29 | 1729 | foreach my $x (sort keys %lower) { |
66cbab2c | 1730 | if (! $is_utf8_locale) { |
71e5cbb3 KW |
1731 | my $y = uc $x; |
1732 | next unless lc $y eq $x; | |
1733 | print "# lower $x uc $y ", | |
faf0c248 KW |
1734 | $x =~ /$y/i ? 1 : 0, " ", |
1735 | $y =~ /$x/i ? 1 : 0, "\n" if 0; | |
71e5cbb3 KW |
1736 | if ($x =~ $re || $y =~ $re) { # See above. |
1737 | print "# Regex characters in '$x' or '$y', skipping test $locales_test_number for locale '$Locale'\n"; | |
1738 | next; | |
1739 | } | |
1740 | # With utf8 both will fail since the locale concept | |
1741 | # of upper/lower does not work well in Unicode. | |
1742 | push @f, $x unless $x =~ /$y/i == $y =~ /$x/i; | |
26c1569f KW |
1743 | |
1744 | push @f, $x unless lc $x eq fc $x; | |
66cbab2c KW |
1745 | } |
1746 | else { | |
1747 | use locale ':not_characters'; | |
1748 | my $y = uc $x; | |
1749 | next unless lc $y eq $x; | |
1750 | print "# lower $x uc $y ", | |
faf0c248 KW |
1751 | $x =~ /$y/i ? 1 : 0, " ", |
1752 | $y =~ /$x/i ? 1 : 0, "\n" if 0; | |
66cbab2c | 1753 | push @f, $x unless $x =~ /$y/i && $y =~ /$x/i; |
26c1569f KW |
1754 | |
1755 | push @f, $x unless lc $x eq fc $x; | |
66cbab2c | 1756 | } |
c00ff1c7 | 1757 | } |
7c844d17 | 1758 | report_multi_result($Locale, $locales_test_number, \@f); |
d8093b23 | 1759 | } |
78787052 JL |
1760 | |
1761 | # [perl #109318] | |
1762 | { | |
1763 | my @f = (); | |
1764 | ++$locales_test_number; | |
1765 | $test_names{$locales_test_number} = 'Verify atof with locale radix and negative exponent'; | |
1766 | ||
1767 | my $radix = POSIX::localeconv()->{decimal_point}; | |
1768 | my @nums = ( | |
1769 | "3.14e+9", "3${radix}14e+9", "3.14e-9", "3${radix}14e-9", | |
1770 | "-3.14e+9", "-3${radix}14e+9", "-3.14e-9", "-3${radix}14e-9", | |
1771 | ); | |
1772 | ||
1773 | if (! $is_utf8_locale) { | |
1774 | use locale; | |
1775 | for my $num (@nums) { | |
1776 | push @f, $num | |
1777 | unless sprintf("%g", $num) =~ /3.+14/; | |
1778 | } | |
1779 | } | |
1780 | else { | |
1781 | use locale ':not_characters'; | |
1782 | for my $num (@nums) { | |
1783 | push @f, $num | |
1784 | unless sprintf("%g", $num) =~ /3.+14/; | |
1785 | } | |
1786 | } | |
1787 | ||
30032ef4 | 1788 | report_result($Locale, $locales_test_number, @f == 0); |
78787052 JL |
1789 | if (@f) { |
1790 | print "# failed $locales_test_number locale '$Locale' numbers @f\n" | |
1791 | } | |
1792 | } | |
8ebc5c01 | 1793 | } |
284102e8 | 1794 | |
c4093d7d | 1795 | my $final_locales_test_number = $locales_test_number; |
6cf0b567 | 1796 | |
2a680da6 JH |
1797 | # Recount the errors. |
1798 | ||
c4093d7d KW |
1799 | foreach ($first_locales_test_number..$final_locales_test_number) { |
1800 | if (%setlocale_failed) { | |
1801 | print "not "; | |
1802 | } | |
1803 | elsif ($Problem{$_} || !defined $Okay{$_} || !@{$Okay{$_}}) { | |
1804 | if (defined $not_necessarily_a_problem_test_number | |
1805 | && $_ == $not_necessarily_a_problem_test_number) | |
1806 | { | |
1807 | print "# The failure of test $not_necessarily_a_problem_test_number is not necessarily fatal.\n"; | |
b4e009be | 1808 | print "# It usually indicates a problem in the environment,\n"; |
284102e8 JH |
1809 | print "# not in Perl itself.\n"; |
1810 | } | |
6d5d702a KW |
1811 | if ($Okay{$_} && ($_ >= $first_casing_test_number |
1812 | && $_ <= $final_casing_test_number)) | |
1813 | { | |
0a974e2d KW |
1814 | # Round to nearest .1% |
1815 | my $percent_fail = (int(.5 + (1000 * scalar(keys $Problem{$_}) | |
1816 | / scalar(@Locale)))) | |
1817 | / 10; | |
f5627fc1 KW |
1818 | if (! $debug && $percent_fail < $acceptable_fold_failure_percentage) |
1819 | { | |
6d5d702a KW |
1820 | $test_names{$_} .= 'TODO'; |
1821 | print "# ", 100 - $percent_fail, "% of locales pass the following test, so it is likely that the failures\n"; | |
1822 | print "# are errors in the locale definitions. The test is marked TODO, as the\n"; | |
1823 | print "# problem is not likely to be Perl's\n"; | |
1824 | } | |
1825 | } | |
f5627fc1 KW |
1826 | print "#\n"; |
1827 | if ($debug) { | |
1828 | print "# The code points that had this failure are given above. Look for lines\n"; | |
1829 | print "# that match 'failed $_'\n"; | |
1830 | } | |
1831 | else { | |
1832 | print "# For more details, rerun, with environment variable PERL_DEBUG_FULL_TEST=1.\n"; | |
1833 | print "# Then look at that output for lines that match 'failed $_'\n"; | |
108a305e | 1834 | } |
284102e8 | 1835 | print "not "; |
8ebc5c01 | 1836 | } |
c4093d7d | 1837 | print "ok $_"; |
6c2e653d KW |
1838 | if (defined $test_names{$_}) { |
1839 | # If TODO is in the test name, make it thus | |
1840 | my $todo = $test_names{$_} =~ s/TODO\s*//; | |
1841 | print " $test_names{$_}"; | |
1842 | print " # TODO" if $todo; | |
1843 | } | |
c4093d7d | 1844 | print "\n"; |
8ebc5c01 | 1845 | } |
fb73857a | 1846 | |
c4093d7d | 1847 | $test_num = $final_locales_test_number; |
c213d471 | 1848 | |
fbd840df KW |
1849 | { # perl #115808 |
1850 | use warnings; | |
1851 | my $warned = 0; | |
1852 | local $SIG{__WARN__} = sub { | |
1853 | $warned = $_[0] =~ /uninitialized/; | |
1854 | }; | |
1855 | my $z = "y" . setlocale(&POSIX::LC_ALL, "xyzzy"); | |
1856 | ok($warned, "variable set to setlocale(BAD LOCALE) is considered uninitialized"); | |
1857 | } | |
1858 | ||
094a2f8c | 1859 | # Test that tainting and case changing works on utf8 strings. These tests are |
1f5852c9 KW |
1860 | # placed last to avoid disturbing the hard-coded test numbers that existed at |
1861 | # the time these were added above this in this file. | |
0099bb8d KW |
1862 | # This also tests that locale overrides unicode_strings in the same scope for |
1863 | # non-utf8 strings. | |
a810e350 | 1864 | setlocale(&POSIX::LC_ALL, "C"); |
094a2f8c KW |
1865 | { |
1866 | use locale; | |
0099bb8d | 1867 | use feature 'unicode_strings'; |
094a2f8c | 1868 | |
26c1569f | 1869 | foreach my $function ("uc", "ucfirst", "lc", "lcfirst", "fc") { |
094a2f8c KW |
1870 | my @list; # List of code points to test for $function |
1871 | ||
1872 | # Used to calculate the changed case for ASCII characters by using the | |
1873 | # ord, instead of using one of the functions under test. | |
1874 | my $ascii_case_change_delta; | |
1875 | my $above_latin1_case_change_delta; # Same for the specific ords > 255 | |
1876 | # that we use | |
1877 | ||
1878 | # We test an ASCII character, which should change case and be tainted; | |
1879 | # a Latin1 character, which shouldn't change case under this C locale, | |
1880 | # and is tainted. | |
1881 | # an above-Latin1 character that when the case is changed would cross | |
1882 | # the 255/256 boundary, so doesn't change case and isn't tainted | |
1883 | # (the \x{149} is one of these, but changes into 2 characters, the | |
1884 | # first one of which doesn't cross the boundary. | |
1885 | # the final one in each list is an above-Latin1 character whose case | |
1886 | # does change, and shouldn't be tainted. The code below uses its | |
1887 | # position in its list as a marker to indicate that it, unlike the | |
1888 | # other code points above ASCII, has a successful case change | |
1889 | if ($function =~ /^u/) { | |
094a2f8c KW |
1890 | @list = ("", "a", "\xe0", "\xff", "\x{fb00}", "\x{149}", "\x{101}"); |
1891 | $ascii_case_change_delta = -32; | |
1892 | $above_latin1_case_change_delta = -1; | |
1893 | } | |
1894 | else { | |
1ca267a5 | 1895 | @list = ("", "A", "\xC0", "\x{17F}", "\x{100}"); |
094a2f8c KW |
1896 | $ascii_case_change_delta = +32; |
1897 | $above_latin1_case_change_delta = +1; | |
1898 | } | |
66cbab2c | 1899 | foreach my $is_utf8_locale (0 .. 1) { |
71e5cbb3 KW |
1900 | foreach my $j (0 .. $#list) { |
1901 | my $char = $list[$j]; | |
0099bb8d KW |
1902 | |
1903 | for my $encoded_in_utf8 (0 .. 1) { | |
faf0c248 KW |
1904 | my $should_be; |
1905 | my $changed; | |
1906 | if (! $is_utf8_locale) { | |
1907 | $should_be = ($j == $#list) | |
1908 | ? chr(ord($char) + $above_latin1_case_change_delta) | |
1909 | : (length $char == 0 || ord($char) > 127) | |
1910 | ? $char | |
1911 | : chr(ord($char) + $ascii_case_change_delta); | |
1912 | ||
1913 | # This monstrosity is in order to avoid using an eval, | |
1914 | # which might perturb the results | |
1915 | $changed = ($function eq "uc") | |
1916 | ? uc($char) | |
1917 | : ($function eq "ucfirst") | |
1918 | ? ucfirst($char) | |
1919 | : ($function eq "lc") | |
1920 | ? lc($char) | |
1921 | : ($function eq "lcfirst") | |
1922 | ? lcfirst($char) | |
26c1569f KW |
1923 | : ($function eq "fc") |
1924 | ? fc($char) | |
faf0c248 KW |
1925 | : die("Unexpected function \"$function\""); |
1926 | } | |
1927 | else { | |
1928 | { | |
1929 | no locale; | |
71e5cbb3 | 1930 | |
faf0c248 KW |
1931 | # For utf8-locales the case changing functions |
1932 | # should work just like they do outside of locale. | |
1933 | # Can use eval here because not testing it when | |
1934 | # not in locale. | |
1935 | $should_be = eval "$function('$char')"; | |
1936 | die "Unexpected eval error $@ from 'eval \"$function('$char')\"'" if $@; | |
71e5cbb3 | 1937 | |
faf0c248 KW |
1938 | } |
1939 | use locale ':not_characters'; | |
1940 | $changed = ($function eq "uc") | |
1941 | ? uc($char) | |
1942 | : ($function eq "ucfirst") | |
1943 | ? ucfirst($char) | |
1944 | : ($function eq "lc") | |
1945 | ? lc($char) | |
1946 | : ($function eq "lcfirst") | |
1947 | ? lcfirst($char) | |
26c1569f KW |
1948 | : ($function eq "fc") |
1949 | ? fc($char) | |
faf0c248 | 1950 | : die("Unexpected function \"$function\""); |
71e5cbb3 | 1951 | } |
faf0c248 KW |
1952 | ok($changed eq $should_be, |
1953 | "$function(\"$char\") in C locale " | |
1954 | . (($is_utf8_locale) | |
1955 | ? "(use locale ':not_characters'" | |
1956 | : "(use locale") | |
1957 | . (($encoded_in_utf8) | |
1958 | ? "; encoded in utf8)" | |
1959 | : "; not encoded in utf8)") | |
1960 | . " should be \"$should_be\", got \"$changed\""); | |
1961 | ||
1962 | # Tainting shouldn't happen for utf8 locales, empty | |
1963 | # strings, or those characters above 255. | |
1964 | (! $is_utf8_locale && length($char) > 0 && ord($char) < 256) | |
1965 | ? check_taint($changed) | |
1966 | : check_taint_not($changed); | |
1967 | ||
1968 | # Use UTF-8 next time through the loop | |
1969 | utf8::upgrade($char); | |
0099bb8d | 1970 | } |
66cbab2c | 1971 | } |
094a2f8c KW |
1972 | } |
1973 | } | |
1974 | } | |
1975 | ||
1bfe8fea KW |
1976 | # Give final advice. |
1977 | ||
1978 | my $didwarn = 0; | |
1979 | ||
1980 | foreach ($first_locales_test_number..$final_locales_test_number) { | |
1981 | if ($Problem{$_}) { | |
1982 | my @f = sort keys %{ $Problem{$_} }; | |
1983 | my $f = join(" ", @f); | |
1984 | $f =~ s/(.{50,60}) /$1\n#\t/g; | |
1985 | ||
1986 | "#\n", | |
1987 | "# The locale ", (@f == 1 ? "definition" : "definitions"), "\n#\n", | |
1988 | "#\t", $f, "\n#\n", | |
1989 | "# on your system may have errors because the locale test $_\n", | |
1990 | "# \"$test_names{$_}\"\n", | |
1991 | "# failed in ", (@f == 1 ? "that locale" : "those locales"), | |
1992 | ".\n"; | |
1993 | print <<EOW; | |
1994 | # | |
1995 | # If your users are not using these locales you are safe for the moment, | |
1996 | # but please report this failure first to perlbug\@perl.com using the | |
1997 | # perlbug script (as described in the INSTALL file) so that the exact | |
1998 | # details of the failures can be sorted out first and then your operating | |
1999 | # system supplier can be alerted about these anomalies. | |
2000 | # | |
2001 | EOW | |
2002 | $didwarn = 1; | |
2003 | } | |
2004 | } | |
2005 | ||
2006 | # Tell which locales were okay and which were not. | |
2007 | ||
2008 | if ($didwarn) { | |
2009 | my (@s, @F); | |
2010 | ||
2011 | foreach my $l (@Locale) { | |
2012 | my $p = 0; | |
2013 | if ($setlocale_failed{$l}) { | |
2014 | $p++; | |
2015 | } | |
2016 | else { | |
2017 | foreach my $t | |
2018 | ($first_locales_test_number..$final_locales_test_number) | |
2019 | { | |
2020 | $p++ if $Problem{$t}{$l}; | |
2021 | } | |
2022 | } | |
2023 | push @s, $l if $p == 0; | |
2024 | push @F, $l unless $p == 0; | |
2025 | } | |
2026 | ||
2027 | if (@s) { | |
2028 | my $s = join(" ", @s); | |
2029 | $s =~ s/(.{50,60}) /$1\n#\t/g; | |
2030 | ||
2031 | warn | |
2032 | "# The following locales\n#\n", | |
2033 | "#\t", $s, "\n#\n", | |
2034 | "# tested okay.\n#\n", | |
2035 | } else { | |
2036 | warn "# None of your locales were fully okay.\n"; | |
2037 | } | |
2038 | ||
2039 | if (@F) { | |
2040 | my $F = join(" ", @F); | |
2041 | $F =~ s/(.{50,60}) /$1\n#\t/g; | |
2042 | ||
2043 | warn | |
2044 | "# The following locales\n#\n", | |
2045 | "#\t", $F, "\n#\n", | |
2046 | "# had problems.\n#\n", | |
f5627fc1 | 2047 | "# For more details, rerun, with environment variable PERL_DEBUG_FULL_TEST=1.\n"; |
1bfe8fea KW |
2048 | } else { |
2049 | warn "# None of your locales were broken.\n"; | |
2050 | } | |
2051 | } | |
2052 | ||
fdf053ee | 2053 | print "1..$test_num\n"; |
906f284f | 2054 | |
90248788 | 2055 | # eof |