Commit | Line | Data |
---|---|---|
8ebc5c01 | 1 | #!./perl -wT |
2 | ||
3 | BEGIN { | |
4 | chdir 't' if -d 't'; | |
20822f61 | 5 | @INC = '../lib'; |
f9cbebe1 | 6 | unshift @INC, '.'; |
b002077a | 7 | require Config; import Config; |
97a0514d | 8 | if (!$Config{d_setlocale} || $Config{ccflags} =~ /\bD?NO_LOCALE\b/) { |
b002077a CS |
9 | print "1..0\n"; |
10 | exit; | |
11 | } | |
2de3dbcc | 12 | $| = 1; |
8ebc5c01 | 13 | } |
14 | ||
15 | use strict; | |
16 | ||
284102e8 JH |
17 | my $debug = 1; |
18 | ||
db4b7445 A |
19 | use Dumpvalue; |
20 | ||
21 | my $dumper = Dumpvalue->new( | |
22 | tick => qq{"}, | |
23 | quoteHighBit => 0, | |
24 | unctrl => "quote" | |
25 | ); | |
6be75cd7 | 26 | sub debug { |
db4b7445 A |
27 | return unless $debug; |
28 | my($mess) = join "", @_; | |
29 | chop $mess; | |
30 | print $dumper->stringify($mess,1), "\n"; | |
6be75cd7 JH |
31 | } |
32 | ||
33 | sub debugf { | |
34 | printf @_ if $debug; | |
35 | } | |
36 | ||
8ebc5c01 | 37 | my $have_setlocale = 0; |
38 | eval { | |
39 | require POSIX; | |
40 | import POSIX ':locale_h'; | |
41 | $have_setlocale++; | |
42 | }; | |
43 | ||
6dead956 | 44 | # Visual C's CRT goes silly on strings of the form "en_US.ISO8859-1" |
f6c6487a | 45 | # and mingw32 uses said silly CRT |
3a2d1764 SH |
46 | # This doesn't seem to be an issue any more, at least on Windows XP, |
47 | # so re-enable the tests for Windows XP onwards. | |
48 | my $winxp = ($^O eq 'MSWin32' && defined &Win32::GetOSVersion && | |
49 | join('.', (Win32::GetOSVersion())[1..2]) >= 5.1); | |
50 | $have_setlocale = 0 if ((($^O eq 'MSWin32' && !$winxp) || $^O eq 'NetWare') && | |
51 | $Config{cc} =~ /^(cl|gcc)/i); | |
6dead956 | 52 | |
cd19b65c JH |
53 | # UWIN seems to loop after test 98, just skip for now |
54 | $have_setlocale = 0 if ($^O =~ /^uwin/); | |
55 | ||
906f284f | 56 | my $last = $have_setlocale ? &last : &last_without_setlocale; |
26d80d95 LC |
57 | |
58 | print "1..$last\n"; | |
8ebc5c01 | 59 | |
9a66ea41 | 60 | sub LC_ALL (); |
8ebc5c01 | 61 | |
0e053d1e | 62 | $a = 'abc %'; |
8ebc5c01 | 63 | |
64 | sub ok { | |
65 | my ($n, $result) = @_; | |
66 | ||
67 | print 'not ' unless ($result); | |
68 | print "ok $n\n"; | |
69 | } | |
70 | ||
71 | # First we'll do a lot of taint checking for locales. | |
72 | # This is the easiest to test, actually, as any locale, | |
73 | # even the default locale will taint under 'use locale'. | |
74 | ||
75 | sub is_tainted { # hello, camel two. | |
9f1b1f2d | 76 | no warnings 'uninitialized' ; |
8ebc5c01 | 77 | my $dummy; |
78 | not eval { $dummy = join("", @_), kill 0; 1 } | |
79 | } | |
80 | ||
81 | sub check_taint ($$) { | |
82 | ok $_[0], is_tainted($_[1]); | |
83 | } | |
84 | ||
85 | sub check_taint_not ($$) { | |
86 | ok $_[0], not is_tainted($_[1]); | |
87 | } | |
88 | ||
89 | use locale; # engage locale and therefore locale taint. | |
90 | ||
91 | check_taint_not 1, $a; | |
92 | ||
93 | check_taint 2, uc($a); | |
94 | check_taint 3, "\U$a"; | |
95 | check_taint 4, ucfirst($a); | |
96 | check_taint 5, "\u$a"; | |
97 | check_taint 6, lc($a); | |
98 | check_taint 7, "\L$a"; | |
99 | check_taint 8, lcfirst($a); | |
100 | check_taint 9, "\l$a"; | |
101 | ||
ff9121f8 JH |
102 | check_taint_not 10, sprintf('%e', 123.456); |
103 | check_taint_not 11, sprintf('%f', 123.456); | |
104 | check_taint_not 12, sprintf('%g', 123.456); | |
8ebc5c01 | 105 | check_taint_not 13, sprintf('%d', 123.456); |
106 | check_taint_not 14, sprintf('%x', 123.456); | |
107 | ||
108 | $_ = $a; # untaint $_ | |
109 | ||
110 | $_ = uc($a); # taint $_ | |
111 | ||
112 | check_taint 15, $_; | |
113 | ||
114 | /(\w)/; # taint $&, $`, $', $+, $1. | |
115 | check_taint 16, $&; | |
116 | check_taint 17, $`; | |
117 | check_taint 18, $'; | |
118 | check_taint 19, $+; | |
119 | check_taint 20, $1; | |
120 | check_taint_not 21, $2; | |
121 | ||
122 | /(.)/; # untaint $&, $`, $', $+, $1. | |
123 | check_taint_not 22, $&; | |
124 | check_taint_not 23, $`; | |
125 | check_taint_not 24, $'; | |
126 | check_taint_not 25, $+; | |
127 | check_taint_not 26, $1; | |
128 | check_taint_not 27, $2; | |
129 | ||
130 | /(\W)/; # taint $&, $`, $', $+, $1. | |
131 | check_taint 28, $&; | |
132 | check_taint 29, $`; | |
133 | check_taint 30, $'; | |
134 | check_taint 31, $+; | |
135 | check_taint 32, $1; | |
136 | check_taint_not 33, $2; | |
137 | ||
138 | /(\s)/; # taint $&, $`, $', $+, $1. | |
139 | check_taint 34, $&; | |
140 | check_taint 35, $`; | |
141 | check_taint 36, $'; | |
142 | check_taint 37, $+; | |
143 | check_taint 38, $1; | |
144 | check_taint_not 39, $2; | |
145 | ||
146 | /(\S)/; # taint $&, $`, $', $+, $1. | |
147 | check_taint 40, $&; | |
148 | check_taint 41, $`; | |
149 | check_taint 42, $'; | |
150 | check_taint 43, $+; | |
151 | check_taint 44, $1; | |
152 | check_taint_not 45, $2; | |
153 | ||
154 | $_ = $a; # untaint $_ | |
155 | ||
156 | check_taint_not 46, $_; | |
157 | ||
158 | /(b)/; # this must not taint | |
159 | check_taint_not 47, $&; | |
160 | check_taint_not 48, $`; | |
161 | check_taint_not 49, $'; | |
162 | check_taint_not 50, $+; | |
163 | check_taint_not 51, $1; | |
164 | check_taint_not 52, $2; | |
165 | ||
166 | $_ = $a; # untaint $_ | |
167 | ||
168 | check_taint_not 53, $_; | |
169 | ||
170 | $b = uc($a); # taint $b | |
171 | s/(.+)/$b/; # this must taint only the $_ | |
172 | ||
173 | check_taint 54, $_; | |
174 | check_taint_not 55, $&; | |
175 | check_taint_not 56, $`; | |
176 | check_taint_not 57, $'; | |
177 | check_taint_not 58, $+; | |
178 | check_taint_not 59, $1; | |
179 | check_taint_not 60, $2; | |
180 | ||
181 | $_ = $a; # untaint $_ | |
182 | ||
183 | s/(.+)/b/; # this must not taint | |
184 | check_taint_not 61, $_; | |
185 | check_taint_not 62, $&; | |
186 | check_taint_not 63, $`; | |
187 | check_taint_not 64, $'; | |
188 | check_taint_not 65, $+; | |
189 | check_taint_not 66, $1; | |
190 | check_taint_not 67, $2; | |
191 | ||
192 | $b = $a; # untaint $b | |
193 | ||
194 | ($b = $a) =~ s/\w/$&/; | |
195 | check_taint 68, $b; # $b should be tainted. | |
196 | check_taint_not 69, $a; # $a should be not. | |
197 | ||
198 | $_ = $a; # untaint $_ | |
199 | ||
200 | s/(\w)/\l$1/; # this must taint | |
201 | check_taint 70, $_; | |
202 | check_taint 71, $&; | |
203 | check_taint 72, $`; | |
204 | check_taint 73, $'; | |
205 | check_taint 74, $+; | |
206 | check_taint 75, $1; | |
207 | check_taint_not 76, $2; | |
208 | ||
209 | $_ = $a; # untaint $_ | |
210 | ||
211 | s/(\w)/\L$1/; # this must taint | |
212 | check_taint 77, $_; | |
213 | check_taint 78, $&; | |
214 | check_taint 79, $`; | |
215 | check_taint 80, $'; | |
216 | check_taint 81, $+; | |
217 | check_taint 82, $1; | |
218 | check_taint_not 83, $2; | |
219 | ||
220 | $_ = $a; # untaint $_ | |
221 | ||
222 | s/(\w)/\u$1/; # this must taint | |
223 | check_taint 84, $_; | |
224 | check_taint 85, $&; | |
225 | check_taint 86, $`; | |
226 | check_taint 87, $'; | |
227 | check_taint 88, $+; | |
228 | check_taint 89, $1; | |
229 | check_taint_not 90, $2; | |
230 | ||
231 | $_ = $a; # untaint $_ | |
232 | ||
233 | s/(\w)/\U$1/; # this must taint | |
234 | check_taint 91, $_; | |
235 | check_taint 92, $&; | |
236 | check_taint 93, $`; | |
237 | check_taint 94, $'; | |
238 | check_taint 95, $+; | |
239 | check_taint 96, $1; | |
240 | check_taint_not 97, $2; | |
241 | ||
242 | # After all this tainting $a should be cool. | |
243 | ||
244 | check_taint_not 98, $a; | |
245 | ||
906f284f NC |
246 | sub last_without_setlocale { 98 } |
247 | ||
8ebc5c01 | 248 | # I think we've seen quite enough of taint. |
249 | # Let us do some *real* locale work now, | |
284102e8 | 250 | # unless setlocale() is missing (i.e. minitest). |
8ebc5c01 | 251 | |
252 | exit unless $have_setlocale; | |
253 | ||
284102e8 JH |
254 | # Find locales. |
255 | ||
6be75cd7 JH |
256 | debug "# Scanning for locales...\n"; |
257 | ||
258 | # Note that it's okay that some languages have their native names | |
259 | # capitalized here even though that's not "right". They are lowercased | |
260 | # anyway later during the scanning process (and besides, some clueless | |
261 | # vendor might have them capitalized errorneously anyway). | |
262 | ||
284102e8 | 263 | my $locales = <<EOF; |
6be75cd7 | 264 | Afrikaans:af:za:1 15 |
284102e8 | 265 | Arabic:ar:dz eg sa:6 arabic8 |
6be75cd7 JH |
266 | Brezhoneg Breton:br:fr:1 15 |
267 | Bulgarski Bulgarian:bg:bg:5 | |
dd8482fc | 268 | Chinese:zh:cn tw:cn.EUC eucCN eucTW euc.CN euc.TW Big5 GB2312 tw.EUC |
6be75cd7 JH |
269 | Hrvatski Croatian:hr:hr:2 |
270 | Cymraeg Welsh:cy:cy:1 14 15 | |
284102e8 | 271 | Czech:cs:cz:2 |
6be75cd7 JH |
272 | Dansk Danish:dk:da:1 15 |
273 | Nederlands Dutch:nl:be nl:1 15 | |
dd8482fc | 274 | English American British:en:au ca gb ie nz us uk zw:1 15 cp850 |
6be75cd7 JH |
275 | Esperanto:eo:eo:3 |
276 | Eesti Estonian:et:ee:4 6 13 | |
277 | Suomi Finnish:fi:fi:1 15 | |
278 | Flamish::fl:1 15 | |
6be75cd7 JH |
279 | Deutsch German:de:at be ch de lu:1 15 |
280 | Euskaraz Basque:eu:es fr:1 15 | |
6be75cd7 JH |
281 | Galego Galician:gl:es:1 15 |
282 | Ellada Greek:el:gr:7 g8 | |
6be75cd7 JH |
283 | Frysk:fy:nl:1 15 |
284 | Greenlandic:kl:gl:4 6 | |
284102e8 JH |
285 | Hebrew:iw:il:8 hebrew8 |
286 | Hungarian:hu:hu:2 | |
6be75cd7 JH |
287 | Indonesian:in:id:1 15 |
288 | Gaeilge Irish:ga:IE:1 14 15 | |
289 | Italiano Italian:it:ch it:1 15 | |
290 | Nihongo Japanese:ja:jp:euc eucJP jp.EUC sjis | |
284102e8 | 291 | Korean:ko:kr: |
6be75cd7 JH |
292 | Latine Latin:la:va:1 15 |
293 | Latvian:lv:lv:4 6 13 | |
294 | Lithuanian:lt:lt:4 6 13 | |
295 | Macedonian:mk:mk:1 15 | |
296 | Maltese:mt:mt:3 | |
dd8482fc JH |
297 | Moldovan:mo:mo:2 |
298 | Norsk Norwegian:no no\@nynorsk:no:1 15 | |
6be75cd7 JH |
299 | Occitan:oc:es:1 15 |
300 | Polski Polish:pl:pl:2 | |
284102e8 | 301 | Rumanian:ro:ro:2 |
a528dad0 | 302 | Russki Russian:ru:ru su ua:5 koi8 koi8r KOI8-R koi8u cp1251 cp866 |
6be75cd7 | 303 | Serbski Serbian:sr:yu:5 |
284102e8 | 304 | Slovak:sk:sk:2 |
6be75cd7 | 305 | Slovene Slovenian:sl:si:2 |
d43ce814 JH |
306 | Sqhip Albanian:sq:sq:1 15 |
307 | Svenska Swedish:sv:fi se:1 15 | |
6be75cd7 | 308 | Thai:th:th:11 tis620 |
284102e8 | 309 | Turkish:tr:tr:9 turkish8 |
dd8482fc | 310 | Yiddish:yi::1 15 |
284102e8 JH |
311 | EOF |
312 | ||
ee50adbe | 313 | if ($^O eq 'os390') { |
dd8482fc | 314 | # These cause heartburn. Broken locales? |
ee50adbe PP |
315 | $locales =~ s/Svenska Swedish:sv:fi se:1 15\n//; |
316 | $locales =~ s/Thai:th:th:11 tis620\n//; | |
317 | } | |
318 | ||
ef4a39e5 | 319 | sub in_utf8 () { $^H & 0x08 || (${^OPEN} || "") =~ /:utf8/ } |
f9cbebe1 JH |
320 | |
321 | if (in_utf8) { | |
8a6cb2cb | 322 | require "lib/locale/utf8"; |
f9cbebe1 | 323 | } else { |
8a6cb2cb | 324 | require "lib/locale/latin1"; |
f9cbebe1 JH |
325 | } |
326 | ||
284102e8 JH |
327 | my @Locale; |
328 | my $Locale; | |
329 | my @Alnum_; | |
330 | ||
0e053d1e JH |
331 | my @utf8locale; |
332 | my %utf8skip; | |
333 | ||
284102e8 | 334 | sub getalnum_ { |
8ebc5c01 | 335 | sort grep /\w/, map { chr } 0..255 |
336 | } | |
337 | ||
284102e8 JH |
338 | sub trylocale { |
339 | my $locale = shift; | |
340 | if (setlocale(LC_ALL, $locale)) { | |
341 | push @Locale, $locale; | |
342 | } | |
343 | } | |
8ebc5c01 | 344 | |
284102e8 JH |
345 | sub decode_encodings { |
346 | my @enc; | |
8ebc5c01 | 347 | |
284102e8 JH |
348 | foreach (split(/ /, shift)) { |
349 | if (/^(\d+)$/) { | |
350 | push @enc, "ISO8859-$1"; | |
351 | push @enc, "iso8859$1"; # HP | |
352 | if ($1 eq '1') { | |
353 | push @enc, "roman8"; # HP | |
354 | } | |
355 | } else { | |
356 | push @enc, $_; | |
dd8482fc | 357 | push @enc, "$_.UTF-8"; |
8ebc5c01 | 358 | } |
359 | } | |
ee50adbe PP |
360 | if ($^O eq 'os390') { |
361 | push @enc, qw(IBM-037 IBM-819 IBM-1047); | |
362 | } | |
8ebc5c01 | 363 | |
284102e8 | 364 | return @enc; |
8ebc5c01 | 365 | } |
366 | ||
284102e8 JH |
367 | trylocale("C"); |
368 | trylocale("POSIX"); | |
369 | foreach (0..15) { | |
370 | trylocale("ISO8859-$_"); | |
284102e8 | 371 | trylocale("iso8859$_"); |
097ee67d JH |
372 | trylocale("iso8859-$_"); |
373 | trylocale("iso_8859_$_"); | |
374 | trylocale("isolatin$_"); | |
375 | trylocale("isolatin-$_"); | |
376 | trylocale("iso_latin_$_"); | |
8ebc5c01 | 377 | } |
378 | ||
645e49ed JH |
379 | # Sanitize the environment so that we can run the external 'locale' |
380 | # program without the taint mode getting grumpy. | |
cce5967e JH |
381 | |
382 | # $ENV{PATH} is special in VMS. | |
383 | delete $ENV{PATH} if $^O ne 'VMS' or $Config{d_setenv}; | |
384 | ||
385 | # Other subversive stuff. | |
386 | delete @ENV{qw(IFS CDPATH ENV BASH_ENV)}; | |
dd8482fc | 387 | |
21477fb4 | 388 | if (-x "/usr/bin/locale" && open(LOCALES, "/usr/bin/locale -a 2>/dev/null|")) { |
dd8482fc | 389 | while (<LOCALES>) { |
d281a6ac NC |
390 | # It seems that /usr/bin/locale steadfastly outputs 8 bit data, which |
391 | # ain't great when we're running this testPERL_UNICODE= so that utf8 | |
392 | # locales will cause all IO hadles to default to (assume) utf8 | |
393 | next unless utf8::valid($_); | |
dd8482fc JH |
394 | chomp; |
395 | trylocale($_); | |
284102e8 | 396 | } |
dd8482fc | 397 | close(LOCALES); |
a6259068 PP |
398 | } elsif ($^O eq 'VMS' && defined($ENV{'SYS$I18N_LOCALE'}) && -d 'SYS$I18N_LOCALE') { |
399 | # The SYS$I18N_LOCALE logical name search list was not present on | |
400 | # VAX VMS V5.5-12, but was on AXP && VAX VMS V6.2 as well as later versions. | |
401 | opendir(LOCALES, "SYS\$I18N_LOCALE:"); | |
402 | while ($_ = readdir(LOCALES)) { | |
403 | chomp; | |
404 | trylocale($_); | |
405 | } | |
406 | close(LOCALES); | |
87e33296 SP |
407 | } elsif ($^O eq 'openbsd' && -e '/usr/share/locale') { |
408 | ||
409 | # OpenBSD doesn't have a locale executable, so reading /usr/share/locale | |
410 | # is much easier and faster than the last resort method. | |
411 | ||
412 | opendir(LOCALES, '/usr/share/locale'); | |
413 | while ($_ = readdir(LOCALES)) { | |
414 | chomp; | |
415 | trylocale($_); | |
416 | } | |
417 | close(LOCALES); | |
dd8482fc JH |
418 | } else { |
419 | ||
420 | # This is going to be slow. | |
421 | ||
422 | foreach my $locale (split(/\n/, $locales)) { | |
423 | my ($locale_name, $language_codes, $country_codes, $encodings) = | |
424 | split(/:/, $locale); | |
425 | my @enc = decode_encodings($encodings); | |
426 | foreach my $loc (split(/ /, $locale_name)) { | |
427 | trylocale($loc); | |
284102e8 | 428 | foreach my $enc (@enc) { |
dd8482fc | 429 | trylocale("$loc.$enc"); |
284102e8 | 430 | } |
dd8482fc | 431 | $loc = lc $loc; |
284102e8 | 432 | foreach my $enc (@enc) { |
dd8482fc JH |
433 | trylocale("$loc.$enc"); |
434 | } | |
435 | } | |
436 | foreach my $lang (split(/ /, $language_codes)) { | |
437 | trylocale($lang); | |
438 | foreach my $country (split(/ /, $country_codes)) { | |
439 | my $lc = "${lang}_${country}"; | |
440 | trylocale($lc); | |
441 | foreach my $enc (@enc) { | |
442 | trylocale("$lc.$enc"); | |
443 | } | |
444 | my $lC = "${lang}_\U${country}"; | |
445 | trylocale($lC); | |
446 | foreach my $enc (@enc) { | |
447 | trylocale("$lC.$enc"); | |
448 | } | |
284102e8 JH |
449 | } |
450 | } | |
451 | } | |
452 | } | |
4599a1de | 453 | |
d43ce814 JH |
454 | setlocale(LC_ALL, "C"); |
455 | ||
86f50d7d | 456 | if ($^O eq 'darwin') { |
4373e181 | 457 | # Darwin 8/Mac OS X 10.4 and 10.5 have bad Basque locales: perl bug #35895, |
86f50d7d | 458 | # Apple bug ID# 4139653. It also has a problem in Byelorussian. |
4373e181 RGS |
459 | (my $v) = $Config{osvers} =~ /^(\d+)/; |
460 | if ($v >= 8 and $v < 10) { | |
86f50d7d | 461 | debug "# Skipping eu_ES, be_BY locales -- buggy in Darwin\n"; |
a5ec937f | 462 | @Locale = grep ! m/^(eu_ES(?:\..*)?|be_BY\.CP1131)$/, @Locale; |
a44d0896 NC |
463 | } elsif ($v < 11) { |
464 | debug "# Skipping be_BY locales -- buggy in Darwin\n"; | |
465 | @Locale = grep ! m/^be_BY\.CP1131$/, @Locale; | |
a5ec937f | 466 | } |
86f50d7d DD |
467 | } |
468 | ||
4599a1de JH |
469 | @Locale = sort @Locale; |
470 | ||
887ef7ed PP |
471 | debug "# Locales =\n"; |
472 | for ( @Locale ) { | |
473 | debug "# $_\n"; | |
474 | } | |
8ebc5c01 | 475 | |
284102e8 | 476 | my %Problem; |
2a680da6 JH |
477 | my %Okay; |
478 | my %Testing; | |
097ee67d | 479 | my @Neoalpha; |
d8093b23 | 480 | my %Neoalpha; |
284102e8 | 481 | |
2a680da6 JH |
482 | sub tryneoalpha { |
483 | my ($Locale, $i, $test) = @_; | |
2a680da6 JH |
484 | unless ($test) { |
485 | $Problem{$i}{$Locale} = 1; | |
486 | debug "# failed $i with locale '$Locale'\n"; | |
487 | } else { | |
488 | push @{$Okay{$i}}, $Locale; | |
489 | } | |
490 | } | |
491 | ||
284102e8 JH |
492 | foreach $Locale (@Locale) { |
493 | debug "# Locale = $Locale\n"; | |
494 | @Alnum_ = getalnum_(); | |
db4b7445 | 495 | debug "# w = ", join("",@Alnum_), "\n"; |
284102e8 JH |
496 | |
497 | unless (setlocale(LC_ALL, $Locale)) { | |
498 | foreach (99..103) { | |
499 | $Problem{$_}{$Locale} = -1; | |
8ebc5c01 | 500 | } |
284102e8 | 501 | next; |
8ebc5c01 | 502 | } |
8ebc5c01 | 503 | |
284102e8 JH |
504 | # Sieve the uppercase and the lowercase. |
505 | ||
097ee67d JH |
506 | my %UPPER = (); |
507 | my %lower = (); | |
508 | my %BoThCaSe = (); | |
284102e8 JH |
509 | for (@Alnum_) { |
510 | if (/[^\d_]/) { # skip digits and the _ | |
511 | if (uc($_) eq $_) { | |
512 | $UPPER{$_} = $_; | |
513 | } | |
514 | if (lc($_) eq $_) { | |
515 | $lower{$_} = $_; | |
516 | } | |
517 | } | |
518 | } | |
519 | foreach (keys %UPPER) { | |
097ee67d | 520 | $BoThCaSe{$_}++ if exists $lower{$_}; |
284102e8 JH |
521 | } |
522 | foreach (keys %lower) { | |
097ee67d | 523 | $BoThCaSe{$_}++ if exists $UPPER{$_}; |
284102e8 | 524 | } |
097ee67d | 525 | foreach (keys %BoThCaSe) { |
284102e8 JH |
526 | delete $UPPER{$_}; |
527 | delete $lower{$_}; | |
528 | } | |
529 | ||
db4b7445 A |
530 | debug "# UPPER = ", join("", sort keys %UPPER ), "\n"; |
531 | debug "# lower = ", join("", sort keys %lower ), "\n"; | |
532 | debug "# BoThCaSe = ", join("", sort keys %BoThCaSe), "\n"; | |
284102e8 JH |
533 | |
534 | # Find the alphabets that are not alphabets in the default locale. | |
8ebc5c01 | 535 | |
284102e8 JH |
536 | { |
537 | no locale; | |
8ebc5c01 | 538 | |
284102e8 JH |
539 | @Neoalpha = (); |
540 | for (keys %UPPER, keys %lower) { | |
541 | push(@Neoalpha, $_) if (/\W/); | |
d8093b23 | 542 | $Neoalpha{$_} = $_; |
284102e8 | 543 | } |
8ebc5c01 | 544 | } |
8ebc5c01 | 545 | |
284102e8 | 546 | @Neoalpha = sort @Neoalpha; |
8ebc5c01 | 547 | |
db4b7445 | 548 | debug "# Neoalpha = ", join("",@Neoalpha), "\n"; |
8ebc5c01 | 549 | |
284102e8 JH |
550 | if (@Neoalpha == 0) { |
551 | # If we have no Neoalphas the remaining tests are no-ops. | |
6be75cd7 | 552 | debug "# no Neoalpha, skipping tests 99..102 for locale '$Locale'\n"; |
a88c3d7c GS |
553 | foreach (99..102) { |
554 | push @{$Okay{$_}}, $Locale; | |
555 | } | |
6be75cd7 | 556 | } else { |
8ebc5c01 | 557 | |
6be75cd7 | 558 | # Test \w. |
284102e8 | 559 | |
ef4a39e5 | 560 | my $word = join('', @Neoalpha); |
8ebc5c01 | 561 | |
5fef21a3 JH |
562 | my $badutf8; |
563 | { | |
564 | local $SIG{__WARN__} = sub { | |
565 | $badutf8 = $_[0] =~ /Malformed UTF-8/; | |
566 | }; | |
567 | $Locale =~ /utf-?8/i; | |
568 | } | |
569 | ||
570 | if ($badutf8) { | |
571 | debug "# Locale name contains bad UTF-8, skipping test 99 for locale '$Locale'\n"; | |
572 | } elsif ($Locale =~ /utf-?8/i) { | |
ef4a39e5 JH |
573 | debug "# unknown whether locale and Unicode have the same \\w, skipping test 99 for locale '$Locale'\n"; |
574 | push @{$Okay{99}}, $Locale; | |
575 | } else { | |
576 | if ($word =~ /^(\w+)$/) { | |
577 | tryneoalpha($Locale, 99, 1); | |
578 | } else { | |
579 | tryneoalpha($Locale, 99, 0); | |
580 | } | |
8ac0d9e6 | 581 | } |
ef4a39e5 | 582 | |
2a680da6 | 583 | # Cross-check the whole 8-bit character set. |
8ebc5c01 | 584 | |
6be75cd7 | 585 | for (map { chr } 0..255) { |
2a680da6 JH |
586 | tryneoalpha($Locale, 100, |
587 | (/\w/ xor /\W/) || | |
588 | (/\d/ xor /\D/) || | |
589 | (/\s/ xor /\S/)); | |
284102e8 | 590 | } |
8ebc5c01 | 591 | |
6be75cd7 | 592 | # Test for read-only scalars' locale vs non-locale comparisons. |
284102e8 | 593 | |
284102e8 | 594 | { |
6be75cd7 JH |
595 | no locale; |
596 | $a = "qwerty"; | |
597 | { | |
598 | use locale; | |
2a680da6 | 599 | tryneoalpha($Locale, 101, ($a cmp "qwerty") == 0); |
8ebc5c01 | 600 | } |
601 | } | |
8ebc5c01 | 602 | |
6be75cd7 JH |
603 | { |
604 | my ($from, $to, $lesser, $greater, | |
605 | @test, %test, $test, $yes, $no, $sign); | |
606 | ||
607 | for (0..9) { | |
608 | # Select a slice. | |
609 | $from = int(($_*@Alnum_)/10); | |
610 | $to = $from + int(@Alnum_/10); | |
611 | $to = $#Alnum_ if ($to > $#Alnum_); | |
612 | $lesser = join('', @Alnum_[$from..$to]); | |
613 | # Select a slice one character on. | |
614 | $from++; $to++; | |
615 | $to = $#Alnum_ if ($to > $#Alnum_); | |
616 | $greater = join('', @Alnum_[$from..$to]); | |
617 | ($yes, $no, $sign) = ($lesser lt $greater | |
618 | ? (" ", "not ", 1) | |
619 | : ("not ", " ", -1)); | |
620 | # all these tests should FAIL (return 0). | |
621 | # Exact lt or gt cannot be tested because | |
622 | # in some locales, say, eacute and E may test equal. | |
623 | @test = | |
624 | ( | |
625 | $no.' ($lesser le $greater)', # 1 | |
626 | 'not ($lesser ne $greater)', # 2 | |
627 | ' ($lesser eq $greater)', # 3 | |
628 | $yes.' ($lesser ge $greater)', # 4 | |
629 | $yes.' ($lesser ge $greater)', # 5 | |
630 | $yes.' ($greater le $lesser )', # 7 | |
631 | 'not ($greater ne $lesser )', # 8 | |
632 | ' ($greater eq $lesser )', # 9 | |
633 | $no.' ($greater ge $lesser )', # 10 | |
0e053d1e | 634 | 'not (($lesser cmp $greater) == -($sign))' # 11 |
6be75cd7 JH |
635 | ); |
636 | @test{@test} = 0 x @test; | |
637 | $test = 0; | |
284102e8 | 638 | for my $ti (@test) { |
6be75cd7 JH |
639 | $test{$ti} = eval $ti; |
640 | $test ||= $test{$ti} | |
284102e8 | 641 | } |
2a680da6 | 642 | tryneoalpha($Locale, 102, $test == 0); |
6be75cd7 | 643 | if ($test) { |
6be75cd7 JH |
644 | debug "# lesser = '$lesser'\n"; |
645 | debug "# greater = '$greater'\n"; | |
646 | debug "# lesser cmp greater = ", | |
647 | $lesser cmp $greater, "\n"; | |
648 | debug "# greater cmp lesser = ", | |
649 | $greater cmp $lesser, "\n"; | |
650 | debug "# (greater) from = $from, to = $to\n"; | |
651 | for my $ti (@test) { | |
652 | debugf("# %-40s %-4s", $ti, | |
653 | $test{$ti} ? 'FAIL' : 'ok'); | |
654 | if ($ti =~ /\(\.*(\$.+ +cmp +\$[^\)]+)\.*\)/) { | |
655 | debugf("(%s == %4d)", $1, eval $1); | |
656 | } | |
657 | debug "\n#"; | |
658 | } | |
284102e8 | 659 | |
6be75cd7 JH |
660 | last; |
661 | } | |
284102e8 | 662 | } |
8ebc5c01 | 663 | } |
664 | } | |
6be75cd7 JH |
665 | |
666 | use locale; | |
667 | ||
668 | my ($x, $y) = (1.23, 1.23); | |
669 | ||
0e053d1e | 670 | $a = "$x"; |
6be75cd7 | 671 | printf ''; # printf used to reset locale to "C" |
0e053d1e | 672 | $b = "$y"; |
6be75cd7 | 673 | |
2a680da6 JH |
674 | debug "# 103..107: a = $a, b = $b, Locale = $Locale\n"; |
675 | ||
676 | tryneoalpha($Locale, 103, $a eq $b); | |
6be75cd7 JH |
677 | |
678 | my $c = "$x"; | |
679 | my $z = sprintf ''; # sprintf used to reset locale to "C" | |
680 | my $d = "$y"; | |
681 | ||
2a680da6 | 682 | debug "# 104..107: c = $c, d = $d, Locale = $Locale\n"; |
6be75cd7 | 683 | |
2a680da6 | 684 | tryneoalpha($Locale, 104, $c eq $d); |
6be75cd7 | 685 | |
2a680da6 | 686 | { |
9f1b1f2d | 687 | use warnings; |
2a680da6 | 688 | my $w = 0; |
0e053d1e JH |
689 | local $SIG{__WARN__} = |
690 | sub { | |
906f284f | 691 | print "# @_\n"; |
0e053d1e JH |
692 | $w++; |
693 | }; | |
6be75cd7 | 694 | |
0e053d1e JH |
695 | # The == (among other ops) used to warn for locales |
696 | # that had something else than "." as the radix character. | |
6be75cd7 | 697 | |
2a680da6 | 698 | tryneoalpha($Locale, 105, $c == 1.23); |
6be75cd7 | 699 | |
2a680da6 | 700 | tryneoalpha($Locale, 106, $c == $x); |
6be75cd7 | 701 | |
2a680da6 | 702 | tryneoalpha($Locale, 107, $c == $d); |
6be75cd7 | 703 | |
2a680da6 | 704 | { |
2de3dbcc | 705 | # no locale; # XXX did this ever work correctly? |
6be75cd7 | 706 | |
2a680da6 | 707 | my $e = "$x"; |
6be75cd7 | 708 | |
2a680da6 | 709 | debug "# 108..110: e = $e, Locale = $Locale\n"; |
6be75cd7 | 710 | |
2a680da6 | 711 | tryneoalpha($Locale, 108, $e == 1.23); |
6be75cd7 | 712 | |
2a680da6 JH |
713 | tryneoalpha($Locale, 109, $e == $x); |
714 | ||
715 | tryneoalpha($Locale, 110, $e == $c); | |
6be75cd7 | 716 | } |
2a680da6 | 717 | |
2a680da6 | 718 | my $f = "1.23"; |
906f284f | 719 | my $g = 2.34; |
2a680da6 | 720 | |
906f284f | 721 | debug "# 111..115: f = $f, g = $g, locale = $Locale\n"; |
2a680da6 | 722 | |
906f284f | 723 | tryneoalpha($Locale, 111, $f == 1.23); |
6be75cd7 | 724 | |
906f284f | 725 | tryneoalpha($Locale, 112, $f == $x); |
2a680da6 | 726 | |
906f284f NC |
727 | tryneoalpha($Locale, 113, $f == $c); |
728 | ||
729 | tryneoalpha($Locale, 114, abs(($f + $g) - 3.57) < 0.01); | |
730 | ||
731 | tryneoalpha($Locale, 115, $w == 0); | |
6be75cd7 JH |
732 | } |
733 | ||
26d80d95 LC |
734 | # Does taking lc separately differ from taking |
735 | # the lc "in-line"? (This was the bug 19990704.002, change #3568.) | |
736 | # The bug was in the caching of the 'o'-magic. | |
2a680da6 JH |
737 | { |
738 | use locale; | |
6be75cd7 | 739 | |
2a680da6 JH |
740 | sub lcA { |
741 | my $lc0 = lc $_[0]; | |
742 | my $lc1 = lc $_[1]; | |
743 | return $lc0 cmp $lc1; | |
744 | } | |
6be75cd7 | 745 | |
2a680da6 JH |
746 | sub lcB { |
747 | return lc($_[0]) cmp lc($_[1]); | |
748 | } | |
6be75cd7 | 749 | |
2a680da6 JH |
750 | my $x = "ab"; |
751 | my $y = "aa"; | |
752 | my $z = "AB"; | |
6be75cd7 | 753 | |
906f284f | 754 | tryneoalpha($Locale, 116, |
2a680da6 JH |
755 | lcA($x, $y) == 1 && lcB($x, $y) == 1 || |
756 | lcA($x, $z) == 0 && lcB($x, $z) == 0); | |
6be75cd7 | 757 | } |
d8093b23 | 758 | |
26d80d95 LC |
759 | # Does lc of an UPPER (if different from the UPPER) match |
760 | # case-insensitively the UPPER, and does the UPPER match | |
761 | # case-insensitively the lc of the UPPER. And vice versa. | |
3ba0e062 | 762 | { |
ef4a39e5 JH |
763 | use locale; |
764 | no utf8; | |
765 | my $re = qr/[\[\(\{\*\+\?\|\^\$\\]/; | |
766 | ||
767 | my @f = (); | |
768 | foreach my $x (keys %UPPER) { | |
769 | my $y = lc $x; | |
770 | next unless uc $y eq $x; | |
771 | print "# UPPER $x lc $y ", | |
772 | $x =~ /$y/i ? 1 : 0, " ", | |
773 | $y =~ /$x/i ? 1 : 0, "\n" if 0; | |
83d38f53 | 774 | # |
ef4a39e5 JH |
775 | # If $x and $y contain regular expression characters |
776 | # AND THEY lowercase (/i) to regular expression characters, | |
777 | # regcomp() will be mightily confused. No, the \Q doesn't | |
778 | # help here (maybe regex engine internal lowercasing | |
779 | # is done after the \Q?) An example of this happening is | |
780 | # the bg_BG (Bulgarian) locale under EBCDIC (OS/390 USS): | |
781 | # the chr(173) (the "[") is the lowercase of the chr(235). | |
83d38f53 | 782 | # |
ef4a39e5 JH |
783 | # Similarly losing EBCDIC locales include cs_cz, cs_CZ, |
784 | # el_gr, el_GR, en_us.IBM-037 (!), en_US.IBM-037 (!), | |
785 | # et_ee, et_EE, hr_hr, hr_HR, hu_hu, hu_HU, lt_LT, | |
786 | # mk_mk, mk_MK, nl_nl.IBM-037, nl_NL.IBM-037, | |
787 | # pl_pl, pl_PL, ro_ro, ro_RO, ru_ru, ru_RU, | |
788 | # sk_sk, sk_SK, sl_si, sl_SI, tr_tr, tr_TR. | |
83d38f53 JH |
789 | # |
790 | # Similar things can happen even under (bastardised) | |
791 | # non-EBCDIC locales: in many European countries before the | |
792 | # advent of ISO 8859-x nationally customised versions of | |
793 | # ISO 646 were devised, reusing certain punctuation | |
794 | # characters for modified characters needed by the | |
795 | # country/language. For example, the "|" might have | |
796 | # stood for U+00F6 or LATIN SMALL LETTER O WITH DIAERESIS. | |
797 | # | |
ef4a39e5 JH |
798 | if ($x =~ $re || $y =~ $re) { |
799 | print "# Regex characters in '$x' or '$y', skipping test 117 for locale '$Locale'\n"; | |
800 | next; | |
8ac0d9e6 | 801 | } |
ef4a39e5 JH |
802 | # With utf8 both will fail since the locale concept |
803 | # of upper/lower does not work well in Unicode. | |
804 | push @f, $x unless $x =~ /$y/i == $y =~ /$x/i; | |
805 | ||
8ac0d9e6 JH |
806 | foreach my $x (keys %lower) { |
807 | my $y = uc $x; | |
808 | next unless lc $y eq $x; | |
4c53e876 | 809 | print "# lower $x uc $y ", |
ef4a39e5 JH |
810 | $x =~ /$y/i ? 1 : 0, " ", |
811 | $y =~ /$x/i ? 1 : 0, "\n" if 0; | |
047e14fb JH |
812 | if ($x =~ $re || $y =~ $re) { # See above. |
813 | print "# Regex characters in '$x' or '$y', skipping test 117 for locale '$Locale'\n"; | |
814 | next; | |
815 | } | |
4c53e876 JH |
816 | # With utf8 both will fail since the locale concept |
817 | # of upper/lower does not work well in Unicode. | |
818 | push @f, $x unless $x =~ /$y/i == $y =~ /$x/i; | |
8ac0d9e6 | 819 | } |
906f284f | 820 | tryneoalpha($Locale, 117, @f == 0); |
8ac0d9e6 | 821 | if (@f) { |
906f284f | 822 | print "# failed 117 locale '$Locale' characters @f\n" |
8ac0d9e6 | 823 | } |
26d80d95 | 824 | } |
d8093b23 | 825 | } |
8ebc5c01 | 826 | } |
284102e8 | 827 | |
2a680da6 JH |
828 | # Recount the errors. |
829 | ||
906f284f | 830 | foreach (&last_without_setlocale()+1..$last) { |
2a680da6 | 831 | if ($Problem{$_} || !defined $Okay{$_} || !@{$Okay{$_}}) { |
097ee67d JH |
832 | if ($_ == 102) { |
833 | print "# The failure of test 102 is not necessarily fatal.\n"; | |
b4e009be | 834 | print "# It usually indicates a problem in the environment,\n"; |
284102e8 JH |
835 | print "# not in Perl itself.\n"; |
836 | } | |
837 | print "not "; | |
8ebc5c01 | 838 | } |
284102e8 | 839 | print "ok $_\n"; |
8ebc5c01 | 840 | } |
fb73857a | 841 | |
2a680da6 JH |
842 | # Give final advice. |
843 | ||
284102e8 JH |
844 | my $didwarn = 0; |
845 | ||
26d80d95 | 846 | foreach (99..$last) { |
284102e8 JH |
847 | if ($Problem{$_}) { |
848 | my @f = sort keys %{ $Problem{$_} }; | |
849 | my $f = join(" ", @f); | |
850 | $f =~ s/(.{50,60}) /$1\n#\t/g; | |
2a680da6 JH |
851 | |
852 | "#\n", | |
853 | "# The locale ", (@f == 1 ? "definition" : "definitions"), "\n#\n", | |
284102e8 JH |
854 | "#\t", $f, "\n#\n", |
855 | "# on your system may have errors because the locale test $_\n", | |
856 | "# failed in ", (@f == 1 ? "that locale" : "those locales"), | |
857 | ".\n"; | |
2a680da6 | 858 | print <<EOW; |
284102e8 JH |
859 | # |
860 | # If your users are not using these locales you are safe for the moment, | |
861 | # but please report this failure first to perlbug\@perl.com using the | |
862 | # perlbug script (as described in the INSTALL file) so that the exact | |
863 | # details of the failures can be sorted out first and then your operating | |
864 | # system supplier can be alerted about these anomalies. | |
865 | # | |
866 | EOW | |
867 | $didwarn = 1; | |
fb73857a | 868 | } |
869 | } | |
774d564b | 870 | |
26d80d95 | 871 | # Tell which locales were okay and which were not. |
2a680da6 | 872 | |
284102e8 | 873 | if ($didwarn) { |
26d80d95 | 874 | my (@s, @F); |
284102e8 JH |
875 | |
876 | foreach my $l (@Locale) { | |
877 | my $p = 0; | |
26d80d95 | 878 | foreach my $t (102..$last) { |
284102e8 | 879 | $p++ if $Problem{$t}{$l}; |
8ebc5c01 | 880 | } |
284102e8 | 881 | push @s, $l if $p == 0; |
26d80d95 | 882 | push @F, $l unless $p == 0; |
8ebc5c01 | 883 | } |
284102e8 | 884 | |
68d47915 CK |
885 | if (@s) { |
886 | my $s = join(" ", @s); | |
887 | $s =~ s/(.{50,60}) /$1\n#\t/g; | |
888 | ||
889 | warn | |
890 | "# The following locales\n#\n", | |
891 | "#\t", $s, "\n#\n", | |
892 | "# tested okay.\n#\n", | |
893 | } else { | |
26d80d95 LC |
894 | warn "# None of your locales were fully okay.\n"; |
895 | } | |
896 | ||
897 | if (@F) { | |
898 | my $F = join(" ", @F); | |
899 | $F =~ s/(.{50,60}) /$1\n#\t/g; | |
900 | ||
901 | warn | |
902 | "# The following locales\n#\n", | |
0e053d1e | 903 | "#\t", $F, "\n#\n", |
26d80d95 LC |
904 | "# had problems.\n#\n", |
905 | } else { | |
906 | warn "# None of your locales were broken.\n"; | |
68d47915 | 907 | } |
0e053d1e | 908 | |
c406fbf4 JH |
909 | if (@utf8locale) { |
910 | my $S = join(" ", @utf8locale); | |
911 | $S =~ s/(.{50,60}) /$1\n#\t/g; | |
912 | ||
913 | warn "#\n# The following locales\n#\n", | |
914 | "#\t", $S, "\n#\n", | |
915 | "# were skipped for the tests ", | |
916 | join(" ", sort {$a<=>$b} keys %utf8skip), "\n", | |
917 | "# because UTF-8 and locales do not work together in Perl.\n#\n"; | |
918 | } | |
8ebc5c01 | 919 | } |
90248788 | 920 | |
906f284f NC |
921 | sub last { 117 } |
922 | ||
90248788 | 923 | # eof |