Commit | Line | Data |
---|---|---|
8ebc5c01 | 1 | #!./perl -wT |
2 | ||
66cbab2c KW |
3 | # This tests plain 'use locale' and adorned 'use locale ":not_characters"' |
4 | # Because these pragmas are compile time, and I (khw) am trying to test | |
5 | # without using 'eval' as much as possible, which might cloud the issue, the | |
6 | # crucial parts of the code are duplicated in a block for each pragma. | |
7 | ||
7fd05a1e KW |
8 | # Unfortunately, many systems have defective locale definitions. This test |
9 | # file looks for both perl bugs and bugs in the system's locale definitions. | |
10 | # It can be difficult to tease apart which is which. For the latter, there | |
11 | # are tests that are based on the POSIX standard. A character isn't supposed | |
12 | # to be both a space and graphic, for example. Another example is if a | |
13 | # character is the uppercase of another, that other should be the lowercase of | |
14 | # the first. Including tests for these allows you to test for defective | |
15 | # locales, as described in perllocale. The way this file distinguishes | |
16 | # between defective locales, and perl bugs is to see what percentage of | |
17 | # locales fail a given test. If it's a lot, then it's more likely to be a | |
18 | # perl bug; only a few, those particular locales are likely defective. In | |
19 | # that case the failing tests are marked TODO. (They should be reported to | |
20 | # the vendor, however; but it's not perl's problem.) In some cases, this | |
21 | # script has caused tickets to be filed against perl which turn out to be the | |
22 | # platform's bug, but a higher percentage of locales are failing than the | |
23 | # built-in cut-off point. For those platforms, code has been added to | |
24 | # increase the cut-off, so those platforms don't trigger failing test reports. | |
25 | # Ideally, the platforms would get fixed and that code would be changed to | |
26 | # only kick-in when run on versions that are earlier than the fixed one. But, | |
27 | # this rarely happens in practice. | |
28 | ||
6c2e653d KW |
29 | # To make a TODO test, add the string 'TODO' to its %test_names value |
30 | ||
9de25f67 | 31 | my $is_ebcdic = ord("A") == 193; |
2fb9a6f6 | 32 | my $os = lc $^O; |
9de25f67 | 33 | |
02d357f9 KW |
34 | no warnings 'locale'; # We test even weird locales; and do some scary things |
35 | # in ok locales | |
36 | ||
e3a2734b KW |
37 | binmode STDOUT, ':utf8'; |
38 | binmode STDERR, ':utf8'; | |
39 | ||
8ebc5c01 | 40 | BEGIN { |
41 | chdir 't' if -d 't'; | |
20822f61 | 42 | @INC = '../lib'; |
f9cbebe1 | 43 | unshift @INC, '.'; |
ef9d5242 KW |
44 | require './loc_tools.pl'; |
45 | unless (locales_enabled('LC_CTYPE')) { | |
b002077a CS |
46 | print "1..0\n"; |
47 | exit; | |
48 | } | |
2de3dbcc | 49 | $| = 1; |
ef9d5242 | 50 | require Config; import Config; |
8ebc5c01 | 51 | } |
52 | ||
53 | use strict; | |
6b88b15b | 54 | use feature 'fc'; |
8ebc5c01 | 55 | |
92891c66 | 56 | # =1 adds debugging output; =2 increases the verbosity somewhat |
d127e8bc | 57 | our $debug = $ENV{PERL_DEBUG_FULL_TEST} // 0; |
284102e8 | 58 | |
6d5d702a KW |
59 | # Certain tests have been shown to be problematical for a few locales. Don't |
60 | # fail them unless at least this percentage of the tested locales fail. | |
4874c8de | 61 | # On AIX machines, many locales call a no-break space a graphic. |
df33771d | 62 | # (There aren't 1000 locales currently in existence, so 99.9 works) |
4787c1a5 KW |
63 | # EBCDIC os390 has more locales fail than normal, because it has locales that |
64 | # move various critical characters like '['. | |
2fb9a6f6 | 65 | my $acceptable_failure_percentage = ($os =~ / ^ ( aix ) $ /x) |
31f658ea | 66 | ? 99.9 |
2fb9a6f6 | 67 | : ($os =~ / ^ ( os390 ) $ /x) |
4787c1a5 KW |
68 | ? 10 |
69 | : 5; | |
6d5d702a | 70 | |
7493b8f2 | 71 | # The list of test numbers of the problematic tests. |
5b1f18a7 | 72 | my %problematical_tests; |
7493b8f2 | 73 | |
852b6749 KW |
74 | # If any %problematical_tests fails in one of these locales, it is |
75 | # considered a TODO. | |
76 | my %known_bad_locales = ( | |
77 | irix => qr/ ^ (?: cs | hu | sk ) $/x, | |
78 | darwin => qr/ ^ lt_LT.ISO8859 /ix, | |
79 | os390 => qr/ ^ italian /ix, | |
13f60a09 | 80 | netbsd => qr/\bISO8859-2\b/i, |
852b6749 | 81 | ); |
7493b8f2 | 82 | |
dc51aebe KW |
83 | # cygwin isn't returning proper radix length in this locale, but supposedly to |
84 | # be fixed in later versions. | |
2fb9a6f6 | 85 | if ($os eq 'cygwin' && version->new(($Config{osvers} =~ /^(\d+(?:\.\d+)+)/)[0]) le v2.4.1) { |
dc51aebe KW |
86 | $known_bad_locales{'cygwin'} = qr/ ^ ps_AF /ix; |
87 | } | |
88 | ||
db4b7445 A |
89 | use Dumpvalue; |
90 | ||
91 | my $dumper = Dumpvalue->new( | |
92 | tick => qq{"}, | |
93 | quoteHighBit => 0, | |
94 | unctrl => "quote" | |
95 | ); | |
d127e8bc | 96 | |
6be75cd7 | 97 | sub debug { |
db4b7445 | 98 | return unless $debug; |
46973bb2 | 99 | my($mess) = join "", '# ', @_; |
d8f8a481 | 100 | chomp $mess; |
bd083ee5 | 101 | print STDERR $dumper->stringify($mess,1), "\n"; |
6be75cd7 JH |
102 | } |
103 | ||
d127e8bc KW |
104 | sub note { |
105 | local $debug = 1; | |
106 | debug @_; | |
107 | } | |
108 | ||
92891c66 KW |
109 | sub debug_more { |
110 | return unless $debug > 1; | |
111 | return debug(@_); | |
112 | } | |
113 | ||
6be75cd7 | 114 | sub debugf { |
bd083ee5 | 115 | printf STDERR @_ if $debug; |
6be75cd7 JH |
116 | } |
117 | ||
40205cab | 118 | $a = 'abc %9'; |
8ebc5c01 | 119 | |
c213d471 KW |
120 | my $test_num = 0; |
121 | ||
8ebc5c01 | 122 | sub ok { |
c213d471 | 123 | my ($result, $message) = @_; |
e3a2734b | 124 | $message = "" unless defined $message; |
8ebc5c01 | 125 | |
126 | print 'not ' unless ($result); | |
c213d471 | 127 | print "ok " . ++$test_num; |
e3a2734b KW |
128 | print " $message"; |
129 | print "\n"; | |
cf82bb84 | 130 | return ($result) ? 1 : 0; |
8ebc5c01 | 131 | } |
132 | ||
62e020bb KW |
133 | sub skip { |
134 | return ok 1, "skipped: " . shift; | |
135 | } | |
136 | ||
137 | sub fail { | |
138 | return ok 0, shift; | |
139 | } | |
140 | ||
8ebc5c01 | 141 | # First we'll do a lot of taint checking for locales. |
142 | # This is the easiest to test, actually, as any locale, | |
143 | # even the default locale will taint under 'use locale'. | |
144 | ||
145 | sub is_tainted { # hello, camel two. | |
9f1b1f2d | 146 | no warnings 'uninitialized' ; |
8ebc5c01 | 147 | my $dummy; |
ba74571d | 148 | local $@; |
8ebc5c01 | 149 | not eval { $dummy = join("", @_), kill 0; 1 } |
150 | } | |
151 | ||
a9b7c637 KW |
152 | sub check_taint ($;$) { |
153 | my $message_tail = $_[1] // ""; | |
2575405e KW |
154 | |
155 | # Extra blanks are so aligns with taint_not output | |
156 | $message_tail = ": $message_tail" if $message_tail; | |
a9b7c637 | 157 | ok is_tainted($_[0]), "verify that is tainted$message_tail"; |
8ebc5c01 | 158 | } |
159 | ||
a9b7c637 KW |
160 | sub check_taint_not ($;$) { |
161 | my $message_tail = $_[1] // ""; | |
2575405e | 162 | $message_tail = ": $message_tail" if $message_tail; |
a9b7c637 | 163 | ok((not is_tainted($_[0])), "verify that isn't tainted$message_tail"); |
8ebc5c01 | 164 | } |
165 | ||
308482c2 KW |
166 | foreach my $category (qw(ALL COLLATE CTYPE MESSAGES MONETARY NUMERIC TIME)) { |
167 | my $short_result = locales_enabled($category); | |
168 | ok ($short_result == 0 || $short_result == 1, | |
169 | "Verify locales_enabled('$category') returns 0 or 1"); | |
170 | debug("locales_enabled('$category') returned '$short_result'"); | |
171 | my $long_result = locales_enabled("LC_$category"); | |
172 | if (! ok ($long_result == $short_result, | |
173 | " and locales_enabled('LC_$category') returns " | |
174 | . "the same value") | |
175 | ) { | |
176 | debug("locales_enabled('LC_$category') returned $long_result"); | |
177 | } | |
178 | } | |
179 | ||
bf3cd0e6 KW |
180 | "\tb\t" =~ /^m?(\s)(.*)\1$/; |
181 | check_taint_not $&, "not tainted outside 'use locale'"; | |
182 | ; | |
183 | ||
8ebc5c01 | 184 | use locale; # engage locale and therefore locale taint. |
185 | ||
b6fb4de7 KW |
186 | # BE SURE TO COPY ANYTHING YOU ADD to these tests to the block below for |
187 | # ":notcharacters" | |
2575405e KW |
188 | |
189 | check_taint_not $a, '$a'; | |
190 | ||
191 | check_taint uc($a), 'uc($a)'; | |
192 | check_taint "\U$a", '"\U$a"'; | |
193 | check_taint ucfirst($a), 'ucfirst($a)'; | |
194 | check_taint "\u$a", '"\u$a"'; | |
195 | check_taint lc($a), 'lc($a)'; | |
196 | check_taint fc($a), 'fc($a)'; | |
197 | check_taint "\L$a", '"\L$a"'; | |
198 | check_taint "\F$a", '"\F$a"'; | |
199 | check_taint lcfirst($a), 'lcfirst($a)'; | |
200 | check_taint "\l$a", '"\l$a"'; | |
201 | ||
202 | check_taint_not sprintf('%e', 123.456), "sprintf('%e', 123.456)"; | |
203 | check_taint_not sprintf('%f', 123.456), "sprintf('%f', 123.456)"; | |
204 | check_taint_not sprintf('%g', 123.456), "sprintf('%g', 123.456)"; | |
205 | check_taint_not sprintf('%d', 123.456), "sprintf('%d', 123.456)"; | |
206 | check_taint_not sprintf('%x', 123.456), "sprintf('%x', 123.456)"; | |
8ebc5c01 | 207 | |
208 | $_ = $a; # untaint $_ | |
209 | ||
210 | $_ = uc($a); # taint $_ | |
211 | ||
2575405e | 212 | check_taint $_, '$_ = uc($a)'; |
8ebc5c01 | 213 | |
214 | /(\w)/; # taint $&, $`, $', $+, $1. | |
2575405e | 215 | check_taint $&, "\$& from /(\\w)/"; |
fdf73a7f KW |
216 | check_taint $`, "\t\$`"; |
217 | check_taint $', "\t\$'"; | |
218 | check_taint $+, "\t\$+"; | |
219 | check_taint $1, "\t\$1"; | |
220 | check_taint_not $2, "\t\$2"; | |
8ebc5c01 | 221 | |
222 | /(.)/; # untaint $&, $`, $', $+, $1. | |
2575405e | 223 | check_taint_not $&, "\$& from /(.)/"; |
fdf73a7f KW |
224 | check_taint_not $`, "\t\$`"; |
225 | check_taint_not $', "\t\$'"; | |
226 | check_taint_not $+, "\t\$+"; | |
227 | check_taint_not $1, "\t\$1"; | |
228 | check_taint_not $2, "\t\$2"; | |
8ebc5c01 | 229 | |
230 | /(\W)/; # taint $&, $`, $', $+, $1. | |
2575405e | 231 | check_taint $&, "\$& from /(\\W)/"; |
fdf73a7f KW |
232 | check_taint $`, "\t\$`"; |
233 | check_taint $', "\t\$'"; | |
234 | check_taint $+, "\t\$+"; | |
235 | check_taint $1, "\t\$1"; | |
236 | check_taint_not $2, "\t\$2"; | |
8ebc5c01 | 237 | |
9486279c | 238 | /(.)/; # untaint $&, $`, $', $+, $1. |
2575405e | 239 | check_taint_not $&, "\$& from /(.)/"; |
fdf73a7f KW |
240 | check_taint_not $`, "\t\$`"; |
241 | check_taint_not $', "\t\$'"; | |
242 | check_taint_not $+, "\t\$+"; | |
243 | check_taint_not $1, "\t\$1"; | |
244 | check_taint_not $2, "\t\$2"; | |
9486279c | 245 | |
8ebc5c01 | 246 | /(\s)/; # taint $&, $`, $', $+, $1. |
2575405e | 247 | check_taint $&, "\$& from /(\\s)/"; |
fdf73a7f KW |
248 | check_taint $`, "\t\$`"; |
249 | check_taint $', "\t\$'"; | |
250 | check_taint $+, "\t\$+"; | |
251 | check_taint $1, "\t\$1"; | |
252 | check_taint_not $2, "\t\$2"; | |
8ebc5c01 | 253 | |
9486279c | 254 | /(.)/; # untaint $&, $`, $', $+, $1. |
2575405e | 255 | check_taint_not $&, "\$& from /(.)/"; |
9486279c | 256 | |
8ebc5c01 | 257 | /(\S)/; # taint $&, $`, $', $+, $1. |
2575405e | 258 | check_taint $&, "\$& from /(\\S)/"; |
fdf73a7f KW |
259 | check_taint $`, "\t\$`"; |
260 | check_taint $', "\t\$'"; | |
261 | check_taint $+, "\t\$+"; | |
262 | check_taint $1, "\t\$1"; | |
263 | check_taint_not $2, "\t\$2"; | |
8ebc5c01 | 264 | |
63baef57 | 265 | /(.)/; # untaint $&, $`, $', $+, $1. |
2575405e | 266 | check_taint_not $&, "\$& from /(.)/"; |
63baef57 | 267 | |
40205cab KW |
268 | "0" =~ /(\d)/; # taint $&, $`, $', $+, $1. |
269 | check_taint $&, "\$& from /(\\d)/"; | |
270 | check_taint $`, "\t\$`"; | |
271 | check_taint $', "\t\$'"; | |
272 | check_taint $+, "\t\$+"; | |
273 | check_taint $1, "\t\$1"; | |
274 | check_taint_not $2, "\t\$2"; | |
275 | ||
276 | /(.)/; # untaint $&, $`, $', $+, $1. | |
277 | check_taint_not $&, "\$& from /(.)/"; | |
278 | ||
279 | /(\D)/; # taint $&, $`, $', $+, $1. | |
280 | check_taint $&, "\$& from /(\\D)/"; | |
281 | check_taint $`, "\t\$`"; | |
282 | check_taint $', "\t\$'"; | |
283 | check_taint $+, "\t\$+"; | |
284 | check_taint $1, "\t\$1"; | |
285 | check_taint_not $2, "\t\$2"; | |
286 | ||
287 | /(.)/; # untaint $&, $`, $', $+, $1. | |
288 | check_taint_not $&, "\$& from /(.)/"; | |
289 | ||
290 | /([[:alnum:]])/; # taint $&, $`, $', $+, $1. | |
291 | check_taint $&, "\$& from /([[:alnum:]])/"; | |
292 | check_taint $`, "\t\$`"; | |
293 | check_taint $', "\t\$'"; | |
294 | check_taint $+, "\t\$+"; | |
295 | check_taint $1, "\t\$1"; | |
296 | check_taint_not $2, "\t\$2"; | |
297 | ||
298 | /(.)/; # untaint $&, $`, $', $+, $1. | |
299 | check_taint_not $&, "\$& from /(.)/"; | |
300 | ||
301 | /([[:^alnum:]])/; # taint $&, $`, $', $+, $1. | |
302 | check_taint $&, "\$& from /([[:^alnum:]])/"; | |
303 | check_taint $`, "\t\$`"; | |
304 | check_taint $', "\t\$'"; | |
305 | check_taint $+, "\t\$+"; | |
306 | check_taint $1, "\t\$1"; | |
307 | check_taint_not $2, "\t\$2"; | |
308 | ||
63baef57 | 309 | "a" =~ /(a)|(\w)/; # taint $&, $`, $', $+, $1. |
2575405e | 310 | check_taint $&, "\$& from /(a)|(\\w)/"; |
63baef57 KW |
311 | check_taint $`, "\t\$`"; |
312 | check_taint $', "\t\$'"; | |
313 | check_taint $+, "\t\$+"; | |
314 | check_taint $1, "\t\$1"; | |
2575405e KW |
315 | ok($1 eq 'a', ("\t" x 5) . "\$1 is 'a'"); |
316 | ok(! defined $2, ("\t" x 5) . "\$2 is undefined"); | |
63baef57 KW |
317 | check_taint_not $2, "\t\$2"; |
318 | check_taint_not $3, "\t\$3"; | |
319 | ||
320 | /(.)/; # untaint $&, $`, $', $+, $1. | |
2575405e | 321 | check_taint_not $&, "\$& from /(.)/"; |
63baef57 KW |
322 | |
323 | "\N{CYRILLIC SMALL LETTER A}" =~ /(\N{CYRILLIC CAPITAL LETTER A})/i; # no tainting because no locale dependence | |
2575405e | 324 | check_taint_not $&, "\$& from /(\\N{CYRILLIC CAPITAL LETTER A})/i"; |
63baef57 KW |
325 | check_taint_not $`, "\t\$`"; |
326 | check_taint_not $', "\t\$'"; | |
327 | check_taint_not $+, "\t\$+"; | |
328 | check_taint_not $1, "\t\$1"; | |
2575405e | 329 | ok($1 eq "\N{CYRILLIC SMALL LETTER A}", ("\t" x 4) . "\t\$1 is 'small cyrillic a'"); |
63baef57 KW |
330 | check_taint_not $2, "\t\$2"; |
331 | ||
332 | /(.)/; # untaint $&, $`, $', $+, $1. | |
2575405e | 333 | check_taint_not $&, "\$& from /./"; |
63baef57 | 334 | |
40205cab KW |
335 | "(\N{KELVIN SIGN})" =~ /(\N{KELVIN SIGN})/i; # taints because depends on locale |
336 | check_taint $&, "\$& from /(\\N{KELVIN SIGN})/i"; | |
337 | check_taint $`, "\t\$`"; | |
338 | check_taint $', "\t\$'"; | |
339 | check_taint $+, "\t\$+"; | |
340 | check_taint $1, "\t\$1"; | |
341 | check_taint_not $2, "\t\$2"; | |
342 | ||
63baef57 | 343 | /(.)/; # untaint $&, $`, $', $+, $1. |
2575405e | 344 | check_taint_not $&, "\$& from /(.)/"; |
63baef57 KW |
345 | |
346 | "a:" =~ /(.)\b(.)/; # taint $&, $`, $', $+, $1. | |
2575405e | 347 | check_taint $&, "\$& from /(.)\\b(.)/"; |
63baef57 KW |
348 | check_taint $`, "\t\$`"; |
349 | check_taint $', "\t\$'"; | |
350 | check_taint $+, "\t\$+"; | |
351 | check_taint $1, "\t\$1"; | |
352 | check_taint $2, "\t\$2"; | |
353 | check_taint_not $3, "\t\$3"; | |
354 | ||
355 | /(.)/; # untaint $&, $`, $', $+, $1. | |
2575405e | 356 | check_taint_not $&, "\$& from /./"; |
63baef57 KW |
357 | |
358 | "aa" =~ /(.)\B(.)/; # taint $&, $`, $', $+, $1. | |
2575405e | 359 | check_taint $&, "\$& from /(.)\\B(.)/"; |
63baef57 KW |
360 | check_taint $`, "\t\$`"; |
361 | check_taint $', "\t\$'"; | |
362 | check_taint $+, "\t\$+"; | |
363 | check_taint $1, "\t\$1"; | |
364 | check_taint $2, "\t\$2"; | |
365 | check_taint_not $3, "\t\$3"; | |
366 | ||
367 | /(.)/; # untaint $&, $`, $', $+, $1. | |
2575405e | 368 | check_taint_not $&, "\$& from /./"; |
63baef57 KW |
369 | |
370 | "aaa" =~ /(.).(\1)/i; # notaint because not locale dependent | |
2575405e | 371 | check_taint_not $&, "\$ & from /(.).(\\1)/"; |
63baef57 KW |
372 | check_taint_not $`, "\t\$`"; |
373 | check_taint_not $', "\t\$'"; | |
374 | check_taint_not $+, "\t\$+"; | |
375 | check_taint_not $1, "\t\$1"; | |
376 | check_taint_not $2, "\t\$2"; | |
2575405e | 377 | check_taint_not $3, "\t\$3"; |
63baef57 KW |
378 | |
379 | /(.)/; # untaint $&, $`, $', $+, $1. | |
2575405e | 380 | check_taint_not $&, "\$ & from /./"; |
63baef57 | 381 | |
8ebc5c01 | 382 | $_ = $a; # untaint $_ |
383 | ||
2575405e | 384 | check_taint_not $_, 'untainting $_ works'; |
8ebc5c01 | 385 | |
386 | /(b)/; # this must not taint | |
2575405e | 387 | check_taint_not $&, "\$ & from /(b)/"; |
fdf73a7f KW |
388 | check_taint_not $`, "\t\$`"; |
389 | check_taint_not $', "\t\$'"; | |
390 | check_taint_not $+, "\t\$+"; | |
391 | check_taint_not $1, "\t\$1"; | |
392 | check_taint_not $2, "\t\$2"; | |
8ebc5c01 | 393 | |
394 | $_ = $a; # untaint $_ | |
395 | ||
2575405e | 396 | check_taint_not $_, 'untainting $_ works'; |
8ebc5c01 | 397 | |
398 | $b = uc($a); # taint $b | |
399 | s/(.+)/$b/; # this must taint only the $_ | |
400 | ||
2575405e | 401 | check_taint $_, '$_ (wasn\'t tainted) from s/(.+)/$b/ where $b is tainted'; |
fdf73a7f KW |
402 | check_taint_not $&, "\t\$&"; |
403 | check_taint_not $`, "\t\$`"; | |
404 | check_taint_not $', "\t\$'"; | |
405 | check_taint_not $+, "\t\$+"; | |
406 | check_taint_not $1, "\t\$1"; | |
407 | check_taint_not $2, "\t\$2"; | |
8ebc5c01 | 408 | |
409 | $_ = $a; # untaint $_ | |
410 | ||
411 | s/(.+)/b/; # this must not taint | |
2575405e | 412 | check_taint_not $_, '$_ (wasn\'t tainted) from s/(.+)/b/'; |
fdf73a7f KW |
413 | check_taint_not $&, "\t\$&"; |
414 | check_taint_not $`, "\t\$`"; | |
415 | check_taint_not $', "\t\$'"; | |
416 | check_taint_not $+, "\t\$+"; | |
417 | check_taint_not $1, "\t\$1"; | |
418 | check_taint_not $2, "\t\$2"; | |
8ebc5c01 | 419 | |
420 | $b = $a; # untaint $b | |
421 | ||
422 | ($b = $a) =~ s/\w/$&/; | |
2575405e KW |
423 | check_taint $b, '$b from ($b = $a) =~ s/\w/$&/'; # $b should be tainted. |
424 | check_taint_not $a, '$a from ($b = $a) =~ s/\w/$&/'; # $a should be not. | |
8ebc5c01 | 425 | |
426 | $_ = $a; # untaint $_ | |
427 | ||
428 | s/(\w)/\l$1/; # this must taint | |
2575405e | 429 | check_taint $_, '$_ (wasn\'t tainted) from s/(\w)/\l$1/,'; # this must taint |
fdf73a7f KW |
430 | check_taint $&, "\t\$&"; |
431 | check_taint $`, "\t\$`"; | |
432 | check_taint $', "\t\$'"; | |
433 | check_taint $+, "\t\$+"; | |
434 | check_taint $1, "\t\$1"; | |
435 | check_taint_not $2, "\t\$2"; | |
8ebc5c01 | 436 | |
437 | $_ = $a; # untaint $_ | |
438 | ||
439 | s/(\w)/\L$1/; # this must taint | |
2575405e | 440 | check_taint $_, '$_ (wasn\'t tainted) from s/(\w)/\L$1/,'; |
fdf73a7f KW |
441 | check_taint $&, "\t\$&"; |
442 | check_taint $`, "\t\$`"; | |
443 | check_taint $', "\t\$'"; | |
444 | check_taint $+, "\t\$+"; | |
445 | check_taint $1, "\t\$1"; | |
446 | check_taint_not $2, "\t\$2"; | |
8ebc5c01 | 447 | |
448 | $_ = $a; # untaint $_ | |
449 | ||
450 | s/(\w)/\u$1/; # this must taint | |
2575405e | 451 | check_taint $_, '$_ (wasn\'t tainted) from s/(\w)/\u$1/'; |
fdf73a7f KW |
452 | check_taint $&, "\t\$&"; |
453 | check_taint $`, "\t\$`"; | |
454 | check_taint $', "\t\$'"; | |
455 | check_taint $+, "\t\$+"; | |
456 | check_taint $1, "\t\$1"; | |
457 | check_taint_not $2, "\t\$2"; | |
8ebc5c01 | 458 | |
459 | $_ = $a; # untaint $_ | |
460 | ||
461 | s/(\w)/\U$1/; # this must taint | |
2575405e | 462 | check_taint $_, '$_ (wasn\'t tainted) from s/(\w)/\U$1/'; |
fdf73a7f KW |
463 | check_taint $&, "\t\$&"; |
464 | check_taint $`, "\t\$`"; | |
465 | check_taint $', "\t\$'"; | |
466 | check_taint $+, "\t\$+"; | |
467 | check_taint $1, "\t\$1"; | |
468 | check_taint_not $2, "\t\$2"; | |
8ebc5c01 | 469 | |
faab3281 KW |
470 | # After all this tainting $a should be cool. |
471 | ||
2575405e | 472 | check_taint_not $a, '$a still not tainted'; |
faab3281 | 473 | |
b99851e1 KW |
474 | "a" =~ /([a-z])/; |
475 | check_taint_not $1, '"a" =~ /([a-z])/'; | |
476 | "foo.bar_baz" =~ /^(.*)[._](.*?)$/; # Bug 120675 | |
477 | check_taint_not $1, '"foo.bar_baz" =~ /^(.*)[._](.*?)$/'; | |
478 | ||
7a9d1c02 | 479 | # BE SURE TO COPY ANYTHING YOU ADD to the block below |
8ebc5c01 | 480 | |
66cbab2c KW |
481 | { # This is just the previous tests copied here with a different |
482 | # compile-time pragma. | |
483 | ||
484 | use locale ':not_characters'; # engage restricted locale with different | |
485 | # tainting rules | |
b6fb4de7 KW |
486 | check_taint_not $a, '$a'; |
487 | ||
488 | check_taint_not uc($a), 'uc($a)'; | |
489 | check_taint_not "\U$a", '"\U$a"'; | |
490 | check_taint_not ucfirst($a), 'ucfirst($a)'; | |
491 | check_taint_not "\u$a", '"\u$a"'; | |
492 | check_taint_not lc($a), 'lc($a)'; | |
493 | check_taint_not fc($a), 'fc($a)'; | |
494 | check_taint_not "\L$a", '"\L$a"'; | |
495 | check_taint_not "\F$a", '"\F$a"'; | |
496 | check_taint_not lcfirst($a), 'lcfirst($a)'; | |
497 | check_taint_not "\l$a", '"\l$a"'; | |
498 | ||
499 | check_taint_not sprintf('%e', 123.456), "sprintf('%e', 123.456)"; | |
500 | check_taint_not sprintf('%f', 123.456), "sprintf('%f', 123.456)"; | |
501 | check_taint_not sprintf('%g', 123.456), "sprintf('%g', 123.456)"; | |
502 | check_taint_not sprintf('%d', 123.456), "sprintf('%d', 123.456)"; | |
503 | check_taint_not sprintf('%x', 123.456), "sprintf('%x', 123.456)"; | |
66cbab2c KW |
504 | |
505 | $_ = $a; # untaint $_ | |
506 | ||
b6fb4de7 | 507 | $_ = uc($a); |
66cbab2c | 508 | |
b6fb4de7 | 509 | check_taint_not $_, '$_ = uc($a)'; |
66cbab2c | 510 | |
b6fb4de7 KW |
511 | /(\w)/; |
512 | check_taint_not $&, "\$& from /(\\w)/"; | |
513 | check_taint_not $`, "\t\$`"; | |
514 | check_taint_not $', "\t\$'"; | |
515 | check_taint_not $+, "\t\$+"; | |
516 | check_taint_not $1, "\t\$1"; | |
517 | check_taint_not $2, "\t\$2"; | |
66cbab2c KW |
518 | |
519 | /(.)/; # untaint $&, $`, $', $+, $1. | |
b6fb4de7 KW |
520 | check_taint_not $&, "\$& from /(.)/"; |
521 | check_taint_not $`, "\t\$`"; | |
522 | check_taint_not $', "\t\$'"; | |
523 | check_taint_not $+, "\t\$+"; | |
524 | check_taint_not $1, "\t\$1"; | |
525 | check_taint_not $2, "\t\$2"; | |
526 | ||
527 | /(\W)/; | |
528 | check_taint_not $&, "\$& from /(\\W)/"; | |
529 | check_taint_not $`, "\t\$`"; | |
530 | check_taint_not $', "\t\$'"; | |
531 | check_taint_not $+, "\t\$+"; | |
532 | check_taint_not $1, "\t\$1"; | |
533 | check_taint_not $2, "\t\$2"; | |
66cbab2c | 534 | |
b6fb4de7 KW |
535 | /(.)/; # untaint $&, $`, $', $+, $1. |
536 | check_taint_not $&, "\$& from /(.)/"; | |
537 | check_taint_not $`, "\t\$`"; | |
538 | check_taint_not $', "\t\$'"; | |
539 | check_taint_not $+, "\t\$+"; | |
540 | check_taint_not $1, "\t\$1"; | |
541 | check_taint_not $2, "\t\$2"; | |
542 | ||
543 | /(\s)/; | |
544 | check_taint_not $&, "\$& from /(\\s)/"; | |
545 | check_taint_not $`, "\t\$`"; | |
546 | check_taint_not $', "\t\$'"; | |
547 | check_taint_not $+, "\t\$+"; | |
548 | check_taint_not $1, "\t\$1"; | |
549 | check_taint_not $2, "\t\$2"; | |
66cbab2c | 550 | |
b6fb4de7 KW |
551 | /(.)/; # untaint $&, $`, $', $+, $1. |
552 | check_taint_not $&, "\$& from /(.)/"; | |
66cbab2c | 553 | |
b6fb4de7 KW |
554 | /(\S)/; |
555 | check_taint_not $&, "\$& from /(\\S)/"; | |
556 | check_taint_not $`, "\t\$`"; | |
557 | check_taint_not $', "\t\$'"; | |
558 | check_taint_not $+, "\t\$+"; | |
559 | check_taint_not $1, "\t\$1"; | |
560 | check_taint_not $2, "\t\$2"; | |
66cbab2c | 561 | |
b6fb4de7 KW |
562 | /(.)/; # untaint $&, $`, $', $+, $1. |
563 | check_taint_not $&, "\$& from /(.)/"; | |
564 | ||
565 | "0" =~ /(\d)/; | |
566 | check_taint_not $&, "\$& from /(\\d)/"; | |
567 | check_taint_not $`, "\t\$`"; | |
568 | check_taint_not $', "\t\$'"; | |
569 | check_taint_not $+, "\t\$+"; | |
570 | check_taint_not $1, "\t\$1"; | |
571 | check_taint_not $2, "\t\$2"; | |
572 | ||
573 | /(.)/; # untaint $&, $`, $', $+, $1. | |
574 | check_taint_not $&, "\$& from /(.)/"; | |
66cbab2c | 575 | |
b6fb4de7 KW |
576 | /(\D)/; |
577 | check_taint_not $&, "\$& from /(\\D)/"; | |
578 | check_taint_not $`, "\t\$`"; | |
579 | check_taint_not $', "\t\$'"; | |
580 | check_taint_not $+, "\t\$+"; | |
581 | check_taint_not $1, "\t\$1"; | |
582 | check_taint_not $2, "\t\$2"; | |
66cbab2c | 583 | |
b6fb4de7 KW |
584 | /(.)/; # untaint $&, $`, $', $+, $1. |
585 | check_taint_not $&, "\$& from /(.)/"; | |
586 | ||
587 | /([[:alnum:]])/; | |
588 | check_taint_not $&, "\$& from /([[:alnum:]])/"; | |
589 | check_taint_not $`, "\t\$`"; | |
590 | check_taint_not $', "\t\$'"; | |
591 | check_taint_not $+, "\t\$+"; | |
592 | check_taint_not $1, "\t\$1"; | |
593 | check_taint_not $2, "\t\$2"; | |
594 | ||
595 | /(.)/; # untaint $&, $`, $', $+, $1. | |
596 | check_taint_not $&, "\$& from /(.)/"; | |
597 | ||
598 | /([[:^alnum:]])/; | |
599 | check_taint_not $&, "\$& from /([[:^alnum:]])/"; | |
600 | check_taint_not $`, "\t\$`"; | |
601 | check_taint_not $', "\t\$'"; | |
602 | check_taint_not $+, "\t\$+"; | |
603 | check_taint_not $1, "\t\$1"; | |
604 | check_taint_not $2, "\t\$2"; | |
605 | ||
606 | "a" =~ /(a)|(\w)/; | |
607 | check_taint_not $&, "\$& from /(a)|(\\w)/"; | |
608 | check_taint_not $`, "\t\$`"; | |
609 | check_taint_not $', "\t\$'"; | |
610 | check_taint_not $+, "\t\$+"; | |
611 | check_taint_not $1, "\t\$1"; | |
612 | ok($1 eq 'a', ("\t" x 5) . "\$1 is 'a'"); | |
613 | ok(! defined $2, ("\t" x 5) . "\$2 is undefined"); | |
614 | check_taint_not $2, "\t\$2"; | |
615 | check_taint_not $3, "\t\$3"; | |
616 | ||
617 | /(.)/; # untaint $&, $`, $', $+, $1. | |
618 | check_taint_not $&, "\$& from /(.)/"; | |
619 | ||
620 | "\N{CYRILLIC SMALL LETTER A}" =~ /(\N{CYRILLIC CAPITAL LETTER A})/i; | |
621 | check_taint_not $&, "\$& from /(\\N{CYRILLIC CAPITAL LETTER A})/i"; | |
622 | check_taint_not $`, "\t\$`"; | |
623 | check_taint_not $', "\t\$'"; | |
624 | check_taint_not $+, "\t\$+"; | |
625 | check_taint_not $1, "\t\$1"; | |
626 | ok($1 eq "\N{CYRILLIC SMALL LETTER A}", ("\t" x 4) . "\t\$1 is 'small cyrillic a'"); | |
627 | check_taint_not $2, "\t\$2"; | |
628 | ||
629 | /(.)/; # untaint $&, $`, $', $+, $1. | |
630 | check_taint_not $&, "\$& from /./"; | |
66cbab2c | 631 | |
b6fb4de7 KW |
632 | "(\N{KELVIN SIGN})" =~ /(\N{KELVIN SIGN})/i; |
633 | check_taint_not $&, "\$& from /(\\N{KELVIN SIGN})/i"; | |
634 | check_taint_not $`, "\t\$`"; | |
635 | check_taint_not $', "\t\$'"; | |
636 | check_taint_not $+, "\t\$+"; | |
637 | check_taint_not $1, "\t\$1"; | |
638 | check_taint_not $2, "\t\$2"; | |
639 | ||
640 | /(.)/; # untaint $&, $`, $', $+, $1. | |
641 | check_taint_not $&, "\$& from /(.)/"; | |
642 | ||
643 | "a:" =~ /(.)\b(.)/; | |
644 | check_taint_not $&, "\$& from /(.)\\b(.)/"; | |
645 | check_taint_not $`, "\t\$`"; | |
646 | check_taint_not $', "\t\$'"; | |
647 | check_taint_not $+, "\t\$+"; | |
648 | check_taint_not $1, "\t\$1"; | |
649 | check_taint_not $2, "\t\$2"; | |
650 | check_taint_not $3, "\t\$3"; | |
651 | ||
652 | /(.)/; # untaint $&, $`, $', $+, $1. | |
653 | check_taint_not $&, "\$& from /./"; | |
654 | ||
655 | "aa" =~ /(.)\B(.)/; | |
656 | check_taint_not $&, "\$& from /(.)\\B(.)/"; | |
657 | check_taint_not $`, "\t\$`"; | |
658 | check_taint_not $', "\t\$'"; | |
659 | check_taint_not $+, "\t\$+"; | |
660 | check_taint_not $1, "\t\$1"; | |
661 | check_taint_not $2, "\t\$2"; | |
662 | check_taint_not $3, "\t\$3"; | |
663 | ||
664 | /(.)/; # untaint $&, $`, $', $+, $1. | |
665 | check_taint_not $&, "\$& from /./"; | |
666 | ||
667 | "aaa" =~ /(.).(\1)/i; # notaint because not locale dependent | |
668 | check_taint_not $&, "\$ & from /(.).(\\1)/"; | |
669 | check_taint_not $`, "\t\$`"; | |
670 | check_taint_not $', "\t\$'"; | |
671 | check_taint_not $+, "\t\$+"; | |
672 | check_taint_not $1, "\t\$1"; | |
673 | check_taint_not $2, "\t\$2"; | |
674 | check_taint_not $3, "\t\$3"; | |
675 | ||
676 | /(.)/; # untaint $&, $`, $', $+, $1. | |
677 | check_taint_not $&, "\$ & from /./"; | |
678 | ||
679 | $_ = $a; # untaint $_ | |
680 | ||
681 | check_taint_not $_, 'untainting $_ works'; | |
682 | ||
683 | /(b)/; | |
684 | check_taint_not $&, "\$ & from /(b)/"; | |
685 | check_taint_not $`, "\t\$`"; | |
686 | check_taint_not $', "\t\$'"; | |
687 | check_taint_not $+, "\t\$+"; | |
688 | check_taint_not $1, "\t\$1"; | |
689 | check_taint_not $2, "\t\$2"; | |
66cbab2c KW |
690 | |
691 | $_ = $a; # untaint $_ | |
692 | ||
b6fb4de7 KW |
693 | check_taint_not $_, 'untainting $_ works'; |
694 | ||
695 | s/(.+)/b/; | |
696 | check_taint_not $_, '$_ (wasn\'t tainted) from s/(.+)/b/'; | |
697 | check_taint_not $&, "\t\$&"; | |
698 | check_taint_not $`, "\t\$`"; | |
699 | check_taint_not $', "\t\$'"; | |
700 | check_taint_not $+, "\t\$+"; | |
701 | check_taint_not $1, "\t\$1"; | |
702 | check_taint_not $2, "\t\$2"; | |
66cbab2c KW |
703 | |
704 | $b = $a; # untaint $b | |
705 | ||
706 | ($b = $a) =~ s/\w/$&/; | |
b6fb4de7 KW |
707 | check_taint_not $b, '$b from ($b = $a) =~ s/\w/$&/'; |
708 | check_taint_not $a, '$a from ($b = $a) =~ s/\w/$&/'; | |
66cbab2c KW |
709 | |
710 | $_ = $a; # untaint $_ | |
711 | ||
b6fb4de7 KW |
712 | s/(\w)/\l$1/; |
713 | check_taint_not $_, '$_ (wasn\'t tainted) from s/(\w)/\l$1/,'; # this must taint | |
714 | check_taint_not $&, "\t\$&"; | |
715 | check_taint_not $`, "\t\$`"; | |
716 | check_taint_not $', "\t\$'"; | |
717 | check_taint_not $+, "\t\$+"; | |
718 | check_taint_not $1, "\t\$1"; | |
719 | check_taint_not $2, "\t\$2"; | |
66cbab2c KW |
720 | |
721 | $_ = $a; # untaint $_ | |
722 | ||
b6fb4de7 KW |
723 | s/(\w)/\L$1/; |
724 | check_taint_not $_, '$_ (wasn\'t tainted) from s/(\w)/\L$1/,'; | |
725 | check_taint_not $&, "\t\$&"; | |
726 | check_taint_not $`, "\t\$`"; | |
727 | check_taint_not $', "\t\$'"; | |
728 | check_taint_not $+, "\t\$+"; | |
729 | check_taint_not $1, "\t\$1"; | |
730 | check_taint_not $2, "\t\$2"; | |
66cbab2c KW |
731 | |
732 | $_ = $a; # untaint $_ | |
733 | ||
b6fb4de7 KW |
734 | s/(\w)/\u$1/; |
735 | check_taint_not $_, '$_ (wasn\'t tainted) from s/(\w)/\u$1/'; | |
736 | check_taint_not $&, "\t\$&"; | |
737 | check_taint_not $`, "\t\$`"; | |
738 | check_taint_not $', "\t\$'"; | |
739 | check_taint_not $+, "\t\$+"; | |
740 | check_taint_not $1, "\t\$1"; | |
741 | check_taint_not $2, "\t\$2"; | |
66cbab2c KW |
742 | |
743 | $_ = $a; # untaint $_ | |
744 | ||
b6fb4de7 KW |
745 | s/(\w)/\U$1/; |
746 | check_taint_not $_, '$_ (wasn\'t tainted) from s/(\w)/\U$1/'; | |
747 | check_taint_not $&, "\t\$&"; | |
748 | check_taint_not $`, "\t\$`"; | |
749 | check_taint_not $', "\t\$'"; | |
750 | check_taint_not $+, "\t\$+"; | |
751 | check_taint_not $1, "\t\$1"; | |
752 | check_taint_not $2, "\t\$2"; | |
66cbab2c KW |
753 | |
754 | # After all this tainting $a should be cool. | |
755 | ||
b6fb4de7 | 756 | check_taint_not $a, '$a still not tainted'; |
7a9d1c02 KW |
757 | |
758 | "a" =~ /([a-z])/; | |
759 | check_taint_not $1, '"a" =~ /([a-z])/'; | |
760 | "foo.bar_baz" =~ /^(.*)[._](.*?)$/; # Bug 120675 | |
761 | check_taint_not $1, '"foo.bar_baz" =~ /^(.*)[._](.*?)$/'; | |
b6fb4de7 | 762 | |
66cbab2c KW |
763 | } |
764 | ||
765 | # Here are in scope of 'use locale' | |
766 | ||
8ebc5c01 | 767 | # I think we've seen quite enough of taint. |
768 | # Let us do some *real* locale work now, | |
284102e8 | 769 | # unless setlocale() is missing (i.e. minitest). |
8ebc5c01 | 770 | |
6cf0b567 | 771 | # The test number before our first setlocale() |
66330f13 | 772 | my $final_without_setlocale = $test_num; |
6cf0b567 | 773 | |
284102e8 JH |
774 | # Find locales. |
775 | ||
46973bb2 | 776 | debug "Scanning for locales...\n"; |
6be75cd7 | 777 | |
9b0711ee KW |
778 | require POSIX; import POSIX ':locale_h'; |
779 | ||
83cf8693 | 780 | my $categories = [ 'LC_CTYPE', 'LC_NUMERIC', 'LC_ALL' ]; |
9c8a6dc2 | 781 | debug "Scanning for just compatible"; |
83cf8693 | 782 | my @Locale = find_locales($categories); |
9c8a6dc2 | 783 | debug "Scanning for even incompatible"; |
83cf8693 | 784 | my @include_incompatible_locales = find_locales($categories, |
0d941362 KW |
785 | 'even incompatible locales'); |
786 | ||
787 | # The locales included in the incompatible list that aren't in the compatible | |
788 | # one. | |
789 | my @incompatible_locales; | |
790 | ||
791 | if (@Locale < @include_incompatible_locales) { | |
792 | my %seen; | |
793 | @seen{@Locale} = (); | |
794 | ||
795 | foreach my $item (@include_incompatible_locales) { | |
796 | push @incompatible_locales, $item unless exists $seen{$item}; | |
797 | } | |
798 | ||
799 | # For each bad locale, switch into it to find out why it's incompatible | |
800 | for my $bad_locale (@incompatible_locales) { | |
801 | my @warnings; | |
802 | ||
803 | use warnings 'locale'; | |
804 | ||
805 | local $SIG{__WARN__} = sub { | |
806 | my $warning = $_[0]; | |
807 | chomp $warning; | |
808 | push @warnings, ($warning =~ s/\n/\n# /sgr); | |
809 | }; | |
810 | ||
9c8a6dc2 | 811 | debug "Trying incompatible $bad_locale"; |
83cf8693 | 812 | my $ret = setlocale(&POSIX::LC_CTYPE, $bad_locale); |
0d941362 KW |
813 | |
814 | my $message = "testing of locale '$bad_locale' is skipped"; | |
815 | if (@warnings) { | |
816 | skip $message . ":\n# " . join "\n# ", @warnings; | |
817 | } | |
83cf8693 KW |
818 | elsif (! $ret) { |
819 | skip("$message:\n#" | |
820 | . " setlocale(&POSIX::LC_CTYPE, '$bad_locale') failed"); | |
821 | } | |
0d941362 KW |
822 | else { |
823 | fail $message . ", because it is was found to be incompatible with" | |
824 | . " Perl, but could not discern reason"; | |
825 | } | |
826 | } | |
827 | } | |
4599a1de | 828 | |
46973bb2 | 829 | debug "Locales =\n"; |
887ef7ed | 830 | for ( @Locale ) { |
46973bb2 | 831 | debug "$_\n"; |
887ef7ed | 832 | } |
8ebc5c01 | 833 | |
d369fd5b KW |
834 | unless (@Locale) { |
835 | print "1..$test_num\n"; | |
836 | exit; | |
837 | } | |
838 | ||
d369fd5b KW |
839 | |
840 | setlocale(&POSIX::LC_ALL, "C"); | |
841 | ||
73fc293b KW |
842 | my %posixes; |
843 | ||
284102e8 | 844 | my %Problem; |
2a680da6 | 845 | my %Okay; |
852b6749 | 846 | my %Known_bad_locale; # Failed test for a locale known to be bad |
2a680da6 | 847 | my %Testing; |
30032ef4 | 848 | my @Added_alpha; # Alphas that aren't in the C locale. |
c08acc4c | 849 | my %test_names; |
284102e8 | 850 | |
705af3af | 851 | sub disp_chars { |
019bf7dd KW |
852 | # This returns a display string denoting the input parameter @_, each |
853 | # entry of which is a single character in the range 0-255. The first part | |
854 | # of the output is a string of the characters in @_ that are ASCII | |
855 | # graphics, and hence unambiguously displayable. They are given by code | |
856 | # point order. The second part is the remaining code points, the ordinals | |
857 | # of which are each displayed as 2-digit hex. Blanks are inserted so as | |
858 | # to keep anything from the first part looking like a 2-digit hex number. | |
859 | ||
860 | no locale; | |
861 | my @chars = sort { ord $a <=> ord $b } @_; | |
862 | my $output = ""; | |
019bf7dd KW |
863 | my $range_start; |
864 | my $start_class; | |
865 | push @chars, chr(258); # This sentinel simplifies the loop termination | |
866 | # logic | |
867 | foreach my $i (0 .. @chars - 1) { | |
868 | my $char = $chars[$i]; | |
869 | my $range_end; | |
870 | my $class; | |
871 | ||
872 | # We avoid using [:posix:] classes, as these are being tested in this | |
873 | # file. Each equivalence class below is for things that can appear in | |
874 | # a range; those that can't be in a range have class -1. 0 for those | |
875 | # which should be output in hex; and >0 for the other ranges | |
876 | if ($char =~ /[A-Z]/) { | |
877 | $class = 2; | |
878 | } | |
879 | elsif ($char =~ /[a-z]/) { | |
880 | $class = 3; | |
881 | } | |
882 | elsif ($char =~ /[0-9]/) { | |
883 | $class = 4; | |
884 | } | |
78fbfaf5 KW |
885 | # Uncomment to get literal punctuation displayed instead of hex |
886 | #elsif ($char =~ /[[\]!"#\$\%&\'()*+,.\/:\\;<=>?\@\^_`{|}~-]/) { | |
887 | # $class = -1; # Punct never appears in a range | |
888 | #} | |
019bf7dd KW |
889 | else { |
890 | $class = 0; # Output in hex | |
891 | } | |
892 | ||
893 | if (! defined $range_start) { | |
894 | if ($class < 0) { | |
78fbfaf5 | 895 | $output .= " " . $char; |
019bf7dd KW |
896 | } |
897 | else { | |
898 | $range_start = ord $char; | |
899 | $start_class = $class; | |
900 | } | |
901 | } # A range ends if not consecutive, or the class-type changes | |
902 | elsif (ord $char != ($range_end = ord($chars[$i-1])) + 1 | |
903 | || $class != $start_class) | |
904 | { | |
905 | ||
906 | # Here, the current character is not in the range. This means the | |
907 | # previous character must have been. Output the range up through | |
908 | # that one. | |
909 | my $range_length = $range_end - $range_start + 1; | |
910 | if ($start_class > 0) { | |
911 | $output .= " " . chr($range_start); | |
912 | $output .= "-" . chr($range_end) if $range_length > 1; | |
913 | } | |
914 | else { | |
78fbfaf5 KW |
915 | $output .= sprintf(" %02X", $range_start); |
916 | $output .= sprintf("-%02X", $range_end) if $range_length > 1; | |
019bf7dd KW |
917 | } |
918 | ||
919 | # Handle the new current character, as potentially beginning a new | |
920 | # range | |
921 | undef $range_start; | |
922 | redo; | |
923 | } | |
924 | } | |
925 | ||
926 | $output =~ s/^ //; | |
78fbfaf5 | 927 | return $output; |
019bf7dd KW |
928 | } |
929 | ||
010b53a6 KW |
930 | sub disp_str ($) { |
931 | my $string = shift; | |
932 | ||
933 | # Displays the string unambiguously. ASCII printables are always output | |
934 | # as-is, though perhaps separated by blanks from other characters. If | |
935 | # entirely printable ASCII, just returns the string. Otherwise if valid | |
936 | # UTF-8 it uses the character names for non-printable-ASCII. Otherwise it | |
937 | # outputs hex for each non-ASCII-printable byte. | |
938 | ||
939 | return $string if $string =~ / ^ [[:print:]]* $/xa; | |
940 | ||
941 | my $result = ""; | |
942 | my $prev_was_punct = 1; # Beginning is considered punct | |
943 | if (utf8::valid($string) && utf8::is_utf8($string)) { | |
944 | use charnames (); | |
945 | foreach my $char (split "", $string) { | |
946 | ||
947 | # Keep punctuation adjacent to other characters; otherwise | |
948 | # separate them with a blank | |
949 | if ($char =~ /[[:punct:]]/a) { | |
950 | $result .= $char; | |
951 | $prev_was_punct = 1; | |
952 | } | |
953 | elsif ($char =~ /[[:print:]]/a) { | |
954 | $result .= " " unless $prev_was_punct; | |
955 | $result .= $char; | |
956 | $prev_was_punct = 0; | |
957 | } | |
958 | else { | |
959 | $result .= " " unless $prev_was_punct; | |
e82d2a11 KW |
960 | my $name = charnames::viacode(ord $char); |
961 | $result .= (defined $name) ? $name : ':unknown:'; | |
010b53a6 KW |
962 | $prev_was_punct = 0; |
963 | } | |
964 | } | |
965 | } | |
966 | else { | |
967 | use bytes; | |
968 | foreach my $char (split "", $string) { | |
969 | if ($char =~ /[[:punct:]]/a) { | |
970 | $result .= $char; | |
971 | $prev_was_punct = 1; | |
972 | } | |
973 | elsif ($char =~ /[[:print:]]/a) { | |
974 | $result .= " " unless $prev_was_punct; | |
975 | $result .= $char; | |
976 | $prev_was_punct = 0; | |
977 | } | |
978 | else { | |
979 | $result .= " " unless $prev_was_punct; | |
980 | $result .= sprintf("%02X", ord $char); | |
981 | $prev_was_punct = 0; | |
982 | } | |
983 | } | |
984 | } | |
985 | ||
986 | return $result; | |
987 | } | |
988 | ||
30032ef4 KW |
989 | sub report_result { |
990 | my ($Locale, $i, $pass_fail, $message) = @_; | |
852b6749 KW |
991 | if ($pass_fail) { |
992 | push @{$Okay{$i}}, $Locale; | |
993 | } | |
994 | else { | |
f17bc913 KW |
995 | $message //= ""; |
996 | $message = " ($message)" if $message; | |
2fb9a6f6 KW |
997 | $Known_bad_locale{$i}{$Locale} = 1 if exists $known_bad_locales{$os} |
998 | && $Locale =~ $known_bad_locales{$os}; | |
2a680da6 | 999 | $Problem{$i}{$Locale} = 1; |
46973bb2 | 1000 | debug "failed $i ($test_names{$i}) with locale '$Locale'$message\n"; |
2a680da6 JH |
1001 | } |
1002 | } | |
1003 | ||
7c844d17 KW |
1004 | sub report_multi_result { |
1005 | my ($Locale, $i, $results_ref) = @_; | |
1006 | ||
1007 | # $results_ref points to an array, each element of which is a character that was | |
1008 | # in error for this test numbered '$i'. If empty, the test passed | |
1009 | ||
1010 | my $message = ""; | |
1011 | if (@$results_ref) { | |
705af3af | 1012 | $message = join " ", "for", disp_chars(@$results_ref); |
7c844d17 KW |
1013 | } |
1014 | report_result($Locale, $i, @$results_ref == 0, $message); | |
1015 | } | |
1016 | ||
0d941362 KW |
1017 | my $first_locales_test_number = $final_without_setlocale |
1018 | + 1 + @incompatible_locales; | |
c4093d7d KW |
1019 | my $locales_test_number; |
1020 | my $not_necessarily_a_problem_test_number; | |
6d5d702a | 1021 | my $first_casing_test_number; |
c4093d7d KW |
1022 | my %setlocale_failed; # List of locales that setlocale() didn't work on |
1023 | ||
73fc293b | 1024 | foreach my $Locale (@Locale) { |
c4093d7d | 1025 | $locales_test_number = $first_locales_test_number - 1; |
46973bb2 KW |
1026 | debug "\n"; |
1027 | debug "Locale = $Locale\n"; | |
284102e8 | 1028 | |
a810e350 | 1029 | unless (setlocale(&POSIX::LC_ALL, $Locale)) { |
c4093d7d | 1030 | $setlocale_failed{$Locale} = $Locale; |
284102e8 | 1031 | next; |
8ebc5c01 | 1032 | } |
8ebc5c01 | 1033 | |
31f05a37 KW |
1034 | # We test UTF-8 locales only under ':not_characters'; It is easier to |
1035 | # test them in other test files than here. Non- UTF-8 locales are tested | |
1036 | # only under plain 'use locale', as otherwise we would have to convert | |
1037 | # everything in them to Unicode. | |
66cbab2c | 1038 | |
95eaa1bf KW |
1039 | my %UPPER = (); # All alpha X for which uc(X) == X and lc(X) != X |
1040 | my %lower = (); # All alpha X for which lc(X) == X and uc(X) != X | |
1041 | my %BoThCaSe = (); # All alpha X for which uc(X) == lc(X) == X | |
66cbab2c | 1042 | |
ab8b8bcc | 1043 | my $is_utf8_locale = is_locale_utf8($Locale); |
b9df08e4 | 1044 | |
46973bb2 | 1045 | debug "is utf8 locale? = $is_utf8_locale\n"; |
b9df08e4 | 1046 | |
68886934 | 1047 | debug "radix = " . disp_str(localeconv()->{decimal_point}) . "\n"; |
b9df08e4 | 1048 | |
66cbab2c KW |
1049 | if (! $is_utf8_locale) { |
1050 | use locale; | |
fb844330 KW |
1051 | @{$posixes{'word'}} = grep /\w/, map { chr } 0..255; |
1052 | @{$posixes{'digit'}} = grep /\d/, map { chr } 0..255; | |
1053 | @{$posixes{'space'}} = grep /\s/, map { chr } 0..255; | |
1054 | @{$posixes{'alpha'}} = grep /[[:alpha:]]/, map {chr } 0..255; | |
1055 | @{$posixes{'alnum'}} = grep /[[:alnum:]]/, map {chr } 0..255; | |
1056 | @{$posixes{'ascii'}} = grep /[[:ascii:]]/, map {chr } 0..255; | |
1057 | @{$posixes{'blank'}} = grep /[[:blank:]]/, map {chr } 0..255; | |
1058 | @{$posixes{'cntrl'}} = grep /[[:cntrl:]]/, map {chr } 0..255; | |
1059 | @{$posixes{'graph'}} = grep /[[:graph:]]/, map {chr } 0..255; | |
1060 | @{$posixes{'lower'}} = grep /[[:lower:]]/, map {chr } 0..255; | |
1061 | @{$posixes{'print'}} = grep /[[:print:]]/, map {chr } 0..255; | |
98ed56e7 | 1062 | @{$posixes{'punct'}} = grep /[[:punct:]]/, map {chr } 0..255; |
fb844330 KW |
1063 | @{$posixes{'upper'}} = grep /[[:upper:]]/, map {chr } 0..255; |
1064 | @{$posixes{'xdigit'}} = grep /[[:xdigit:]]/, map {chr } 0..255; | |
389cb6df | 1065 | @{$posixes{'cased'}} = grep /[[:upper:][:lower:]]/i, map {chr } 0..255; |
e5272a46 | 1066 | |
71e5cbb3 KW |
1067 | # Sieve the uppercase and the lowercase. |
1068 | ||
fb844330 | 1069 | for (@{$posixes{'word'}}) { |
71e5cbb3 KW |
1070 | if (/[^\d_]/) { # skip digits and the _ |
1071 | if (uc($_) eq $_) { | |
1072 | $UPPER{$_} = $_; | |
1073 | } | |
1074 | if (lc($_) eq $_) { | |
1075 | $lower{$_} = $_; | |
1076 | } | |
1077 | } | |
1078 | } | |
66cbab2c KW |
1079 | } |
1080 | else { | |
1081 | use locale ':not_characters'; | |
fb844330 KW |
1082 | @{$posixes{'word'}} = grep /\w/, map { chr } 0..255; |
1083 | @{$posixes{'digit'}} = grep /\d/, map { chr } 0..255; | |
1084 | @{$posixes{'space'}} = grep /\s/, map { chr } 0..255; | |
1085 | @{$posixes{'alpha'}} = grep /[[:alpha:]]/, map {chr } 0..255; | |
1086 | @{$posixes{'alnum'}} = grep /[[:alnum:]]/, map {chr } 0..255; | |
1087 | @{$posixes{'ascii'}} = grep /[[:ascii:]]/, map {chr } 0..255; | |
1088 | @{$posixes{'blank'}} = grep /[[:blank:]]/, map {chr } 0..255; | |
1089 | @{$posixes{'cntrl'}} = grep /[[:cntrl:]]/, map {chr } 0..255; | |
1090 | @{$posixes{'graph'}} = grep /[[:graph:]]/, map {chr } 0..255; | |
1091 | @{$posixes{'lower'}} = grep /[[:lower:]]/, map {chr } 0..255; | |
1092 | @{$posixes{'print'}} = grep /[[:print:]]/, map {chr } 0..255; | |
98ed56e7 | 1093 | @{$posixes{'punct'}} = grep /[[:punct:]]/, map {chr } 0..255; |
fb844330 KW |
1094 | @{$posixes{'upper'}} = grep /[[:upper:]]/, map {chr } 0..255; |
1095 | @{$posixes{'xdigit'}} = grep /[[:xdigit:]]/, map {chr } 0..255; | |
389cb6df | 1096 | @{$posixes{'cased'}} = grep /[[:upper:][:lower:]]/i, map {chr } 0..255; |
fb844330 | 1097 | for (@{$posixes{'word'}}) { |
66cbab2c KW |
1098 | if (/[^\d_]/) { # skip digits and the _ |
1099 | if (uc($_) eq $_) { | |
1100 | $UPPER{$_} = $_; | |
1101 | } | |
1102 | if (lc($_) eq $_) { | |
1103 | $lower{$_} = $_; | |
1104 | } | |
1105 | } | |
1106 | } | |
1107 | } | |
a160ac48 | 1108 | |
fb844330 KW |
1109 | # Ordered, where possible, in groups of "this is a subset of the next |
1110 | # one" | |
46973bb2 KW |
1111 | debug ":upper: = ", disp_chars(@{$posixes{'upper'}}), "\n"; |
1112 | debug ":lower: = ", disp_chars(@{$posixes{'lower'}}), "\n"; | |
1113 | debug ":cased: = ", disp_chars(@{$posixes{'cased'}}), "\n"; | |
1114 | debug ":alpha: = ", disp_chars(@{$posixes{'alpha'}}), "\n"; | |
1115 | debug ":alnum: = ", disp_chars(@{$posixes{'alnum'}}), "\n"; | |
8eb6215e | 1116 | debug ' \w = ', disp_chars(@{$posixes{'word'}}), "\n"; |
46973bb2 KW |
1117 | debug ":graph: = ", disp_chars(@{$posixes{'graph'}}), "\n"; |
1118 | debug ":print: = ", disp_chars(@{$posixes{'print'}}), "\n"; | |
8eb6215e | 1119 | debug ' \d = ', disp_chars(@{$posixes{'digit'}}), "\n"; |
46973bb2 KW |
1120 | debug ":xdigit: = ", disp_chars(@{$posixes{'xdigit'}}), "\n"; |
1121 | debug ":blank: = ", disp_chars(@{$posixes{'blank'}}), "\n"; | |
8eb6215e | 1122 | debug ' \s = ', disp_chars(@{$posixes{'space'}}), "\n"; |
46973bb2 KW |
1123 | debug ":punct: = ", disp_chars(@{$posixes{'punct'}}), "\n"; |
1124 | debug ":cntrl: = ", disp_chars(@{$posixes{'cntrl'}}), "\n"; | |
1125 | debug ":ascii: = ", disp_chars(@{$posixes{'ascii'}}), "\n"; | |
a160ac48 | 1126 | |
284102e8 | 1127 | foreach (keys %UPPER) { |
a160ac48 | 1128 | |
097ee67d | 1129 | $BoThCaSe{$_}++ if exists $lower{$_}; |
284102e8 JH |
1130 | } |
1131 | foreach (keys %lower) { | |
097ee67d | 1132 | $BoThCaSe{$_}++ if exists $UPPER{$_}; |
284102e8 | 1133 | } |
097ee67d | 1134 | foreach (keys %BoThCaSe) { |
284102e8 JH |
1135 | delete $UPPER{$_}; |
1136 | delete $lower{$_}; | |
1137 | } | |
1138 | ||
e5cc0528 KW |
1139 | my %Unassigned; |
1140 | foreach my $ord ( 0 .. 255 ) { | |
1141 | $Unassigned{chr $ord} = 1; | |
1142 | } | |
1143 | foreach my $class (keys %posixes) { | |
1144 | foreach my $char (@{$posixes{$class}}) { | |
1145 | delete $Unassigned{$char}; | |
1146 | } | |
1147 | } | |
1148 | ||
46973bb2 KW |
1149 | debug "UPPER = ", disp_chars(sort { ord $a <=> ord $b } keys %UPPER), "\n"; |
1150 | debug "lower = ", disp_chars(sort { ord $a <=> ord $b } keys %lower), "\n"; | |
1151 | debug "BoThCaSe = ", disp_chars(sort { ord $a <=> ord $b } keys %BoThCaSe), "\n"; | |
1152 | debug "Unassigned = ", disp_chars(sort { ord $a <=> ord $b } keys %Unassigned), "\n"; | |
284102e8 | 1153 | |
baa71cfd | 1154 | my @failures; |
3da38613 | 1155 | my @fold_failures; |
b9df08e4 | 1156 | foreach my $x (sort { ord $a <=> ord $b } keys %UPPER) { |
baa71cfd | 1157 | my $ok; |
3da38613 | 1158 | my $fold_ok; |
baa71cfd KW |
1159 | if ($is_utf8_locale) { |
1160 | use locale ':not_characters'; | |
1161 | $ok = $x =~ /[[:upper:]]/; | |
3da38613 | 1162 | $fold_ok = $x =~ /[[:lower:]]/i; |
baa71cfd KW |
1163 | } |
1164 | else { | |
1165 | use locale; | |
1166 | $ok = $x =~ /[[:upper:]]/; | |
3da38613 | 1167 | $fold_ok = $x =~ /[[:lower:]]/i; |
baa71cfd KW |
1168 | } |
1169 | push @failures, $x unless $ok; | |
3da38613 | 1170 | push @fold_failures, $x unless $fold_ok; |
baa71cfd | 1171 | } |
baa71cfd | 1172 | $locales_test_number++; |
6d5d702a | 1173 | $first_casing_test_number = $locales_test_number; |
95eaa1bf | 1174 | $test_names{$locales_test_number} = 'Verify that /[[:upper:]]/ matches all alpha X for which uc(X) == X and lc(X) != X'; |
7c844d17 | 1175 | report_multi_result($Locale, $locales_test_number, \@failures); |
6d5d702a | 1176 | |
3da38613 | 1177 | $locales_test_number++; |
6d5d702a | 1178 | |
95eaa1bf | 1179 | $test_names{$locales_test_number} = 'Verify that /[[:lower:]]/i matches all alpha X for which uc(X) == X and lc(X) != X'; |
7c844d17 | 1180 | report_multi_result($Locale, $locales_test_number, \@fold_failures); |
baa71cfd | 1181 | |
baa71cfd | 1182 | undef @failures; |
3da38613 | 1183 | undef @fold_failures; |
baa71cfd | 1184 | |
b9df08e4 | 1185 | foreach my $x (sort { ord $a <=> ord $b } keys %lower) { |
baa71cfd | 1186 | my $ok; |
3da38613 | 1187 | my $fold_ok; |
baa71cfd KW |
1188 | if ($is_utf8_locale) { |
1189 | use locale ':not_characters'; | |
1190 | $ok = $x =~ /[[:lower:]]/; | |
3da38613 | 1191 | $fold_ok = $x =~ /[[:upper:]]/i; |
baa71cfd KW |
1192 | } |
1193 | else { | |
1194 | use locale; | |
1195 | $ok = $x =~ /[[:lower:]]/; | |
3da38613 | 1196 | $fold_ok = $x =~ /[[:upper:]]/i; |
baa71cfd KW |
1197 | } |
1198 | push @failures, $x unless $ok; | |
3da38613 | 1199 | push @fold_failures, $x unless $fold_ok; |
baa71cfd KW |
1200 | } |
1201 | ||
1202 | $locales_test_number++; | |
95eaa1bf | 1203 | $test_names{$locales_test_number} = 'Verify that /[[:lower:]]/ matches all alpha X for which lc(X) == X and uc(X) != X'; |
7c844d17 KW |
1204 | report_multi_result($Locale, $locales_test_number, \@failures); |
1205 | ||
3da38613 | 1206 | $locales_test_number++; |
95eaa1bf | 1207 | $test_names{$locales_test_number} = 'Verify that /[[:upper:]]/i matches all alpha X for which lc(X) == X and uc(X) != X'; |
7c844d17 | 1208 | report_multi_result($Locale, $locales_test_number, \@fold_failures); |
baa71cfd | 1209 | |
9445c837 KW |
1210 | { # Find the alphabetic characters that are not considered alphabetics |
1211 | # in the default (C) locale. | |
8ebc5c01 | 1212 | |
284102e8 | 1213 | no locale; |
71e5cbb3 | 1214 | |
30032ef4 | 1215 | @Added_alpha = (); |
5e7a1028 | 1216 | for (keys %UPPER, keys %lower, keys %BoThCaSe) { |
30032ef4 | 1217 | push(@Added_alpha, $_) if (/\W/); |
284102e8 | 1218 | } |
8ebc5c01 | 1219 | } |
8ebc5c01 | 1220 | |
b9df08e4 | 1221 | @Added_alpha = sort { ord $a <=> ord $b } @Added_alpha; |
8ebc5c01 | 1222 | |
46973bb2 | 1223 | debug "Added_alpha = ", disp_chars(@Added_alpha), "\n"; |
8ebc5c01 | 1224 | |
db31898d | 1225 | # Cross-check the whole 8-bit character set. |
8ebc5c01 | 1226 | |
db31898d KW |
1227 | ++$locales_test_number; |
1228 | my @f; | |
1229 | $test_names{$locales_test_number} = 'Verify that \w and [:word:] are identical'; | |
1230 | for (map { chr } 0..255) { | |
1231 | if ($is_utf8_locale) { | |
1232 | use locale ':not_characters'; | |
1233 | push @f, $_ unless /[[:word:]]/ == /\w/; | |
1234 | } | |
1235 | else { | |
1236 | push @f, $_ unless /[[:word:]]/ == /\w/; | |
1237 | } | |
1238 | } | |
1239 | report_multi_result($Locale, $locales_test_number, \@f); | |
8ebc5c01 | 1240 | |
db31898d KW |
1241 | ++$locales_test_number; |
1242 | undef @f; | |
1243 | $test_names{$locales_test_number} = 'Verify that \d and [:digit:] are identical'; | |
1244 | for (map { chr } 0..255) { | |
1245 | if ($is_utf8_locale) { | |
1246 | use locale ':not_characters'; | |
1247 | push @f, $_ unless /[[:digit:]]/ == /\d/; | |
1248 | } | |
1249 | else { | |
1250 | push @f, $_ unless /[[:digit:]]/ == /\d/; | |
1251 | } | |
1252 | } | |
1253 | report_multi_result($Locale, $locales_test_number, \@f); | |
32e8aa3f | 1254 | |
db31898d KW |
1255 | ++$locales_test_number; |
1256 | undef @f; | |
1257 | $test_names{$locales_test_number} = 'Verify that \s and [:space:] are identical'; | |
1258 | for (map { chr } 0..255) { | |
1259 | if ($is_utf8_locale) { | |
1260 | use locale ':not_characters'; | |
1261 | push @f, $_ unless /[[:space:]]/ == /\s/; | |
1262 | } | |
1263 | else { | |
1264 | push @f, $_ unless /[[:space:]]/ == /\s/; | |
1265 | } | |
1266 | } | |
1267 | report_multi_result($Locale, $locales_test_number, \@f); | |
32e8aa3f | 1268 | |
db31898d KW |
1269 | ++$locales_test_number; |
1270 | undef @f; | |
1271 | $test_names{$locales_test_number} = 'Verify that [:posix:] and [:^posix:] are mutually exclusive'; | |
1272 | for (map { chr } 0..255) { | |
1273 | if ($is_utf8_locale) { | |
1274 | use locale ':not_characters'; | |
1275 | push @f, $_ unless (/[[:alpha:]]/ xor /[[:^alpha:]]/) || | |
1276 | (/[[:alnum:]]/ xor /[[:^alnum:]]/) || | |
1277 | (/[[:ascii:]]/ xor /[[:^ascii:]]/) || | |
1278 | (/[[:blank:]]/ xor /[[:^blank:]]/) || | |
1279 | (/[[:cntrl:]]/ xor /[[:^cntrl:]]/) || | |
1280 | (/[[:digit:]]/ xor /[[:^digit:]]/) || | |
1281 | (/[[:graph:]]/ xor /[[:^graph:]]/) || | |
1282 | (/[[:lower:]]/ xor /[[:^lower:]]/) || | |
1283 | (/[[:print:]]/ xor /[[:^print:]]/) || | |
1284 | (/[[:space:]]/ xor /[[:^space:]]/) || | |
1285 | (/[[:upper:]]/ xor /[[:^upper:]]/) || | |
1286 | (/[[:word:]]/ xor /[[:^word:]]/) || | |
1287 | (/[[:xdigit:]]/ xor /[[:^xdigit:]]/) || | |
1288 | ||
1289 | # effectively is what [:cased:] would be if it existed. | |
389cb6df | 1290 | (/[[:upper:][:lower:]]/i xor /[^[:upper:][:lower:]]/i); |
db31898d KW |
1291 | } |
1292 | else { | |
1293 | push @f, $_ unless (/[[:alpha:]]/ xor /[[:^alpha:]]/) || | |
1294 | (/[[:alnum:]]/ xor /[[:^alnum:]]/) || | |
1295 | (/[[:ascii:]]/ xor /[[:^ascii:]]/) || | |
1296 | (/[[:blank:]]/ xor /[[:^blank:]]/) || | |
1297 | (/[[:cntrl:]]/ xor /[[:^cntrl:]]/) || | |
1298 | (/[[:digit:]]/ xor /[[:^digit:]]/) || | |
1299 | (/[[:graph:]]/ xor /[[:^graph:]]/) || | |
1300 | (/[[:lower:]]/ xor /[[:^lower:]]/) || | |
1301 | (/[[:print:]]/ xor /[[:^print:]]/) || | |
1302 | (/[[:space:]]/ xor /[[:^space:]]/) || | |
1303 | (/[[:upper:]]/ xor /[[:^upper:]]/) || | |
1304 | (/[[:word:]]/ xor /[[:^word:]]/) || | |
1305 | (/[[:xdigit:]]/ xor /[[:^xdigit:]]/) || | |
389cb6df | 1306 | (/[[:upper:][:lower:]]/i xor /[^[:upper:][:lower:]]/i); |
db31898d KW |
1307 | } |
1308 | } | |
1309 | report_multi_result($Locale, $locales_test_number, \@f); | |
32e8aa3f | 1310 | |
db31898d KW |
1311 | # The rules for the relationships are given in: |
1312 | # http://www.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap07.html | |
32e8aa3f | 1313 | |
660c7bcc KW |
1314 | |
1315 | ++$locales_test_number; | |
1316 | undef @f; | |
1317 | $test_names{$locales_test_number} = 'Verify that [:lower:] contains at least a-z'; | |
1318 | for ('a' .. 'z') { | |
1319 | if ($is_utf8_locale) { | |
1320 | use locale ':not_characters'; | |
1321 | push @f, $_ unless /[[:lower:]]/; | |
1322 | } | |
1323 | else { | |
1324 | push @f, $_ unless /[[:lower:]]/; | |
1325 | } | |
1326 | } | |
1327 | report_multi_result($Locale, $locales_test_number, \@f); | |
1328 | ||
db31898d KW |
1329 | ++$locales_test_number; |
1330 | undef @f; | |
1331 | $test_names{$locales_test_number} = 'Verify that [:lower:] is a subset of [:alpha:]'; | |
1332 | for (map { chr } 0..255) { | |
1333 | if ($is_utf8_locale) { | |
1334 | use locale ':not_characters'; | |
1335 | push @f, $_ if /[[:lower:]]/ and ! /[[:alpha:]]/; | |
1336 | } | |
1337 | else { | |
1338 | push @f, $_ if /[[:lower:]]/ and ! /[[:alpha:]]/; | |
1339 | } | |
1340 | } | |
1341 | report_multi_result($Locale, $locales_test_number, \@f); | |
32e8aa3f | 1342 | |
db31898d KW |
1343 | ++$locales_test_number; |
1344 | undef @f; | |
660c7bcc KW |
1345 | $test_names{$locales_test_number} = 'Verify that [:upper:] contains at least A-Z'; |
1346 | for ('A' .. 'Z') { | |
1347 | if ($is_utf8_locale) { | |
1348 | use locale ':not_characters'; | |
1349 | push @f, $_ unless /[[:upper:]]/; | |
1350 | } | |
1351 | else { | |
1352 | push @f, $_ unless /[[:upper:]]/; | |
1353 | } | |
1354 | } | |
1355 | report_multi_result($Locale, $locales_test_number, \@f); | |
1356 | ||
1357 | ++$locales_test_number; | |
1358 | undef @f; | |
db31898d KW |
1359 | $test_names{$locales_test_number} = 'Verify that [:upper:] is a subset of [:alpha:]'; |
1360 | for (map { chr } 0..255) { | |
1361 | if ($is_utf8_locale) { | |
1362 | use locale ':not_characters'; | |
1363 | push @f, $_ if /[[:upper:]]/ and ! /[[:alpha:]]/; | |
1364 | } | |
1365 | else { | |
1366 | push @f, $_ if /[[:upper:]]/ and ! /[[:alpha:]]/; | |
1367 | } | |
1368 | } | |
1369 | report_multi_result($Locale, $locales_test_number, \@f); | |
32e8aa3f | 1370 | |
db31898d KW |
1371 | ++$locales_test_number; |
1372 | undef @f; | |
1373 | $test_names{$locales_test_number} = 'Verify that /[[:lower:]]/i is a subset of [:alpha:]'; | |
1374 | for (map { chr } 0..255) { | |
1375 | if ($is_utf8_locale) { | |
1376 | use locale ':not_characters'; | |
1377 | push @f, $_ if /[[:lower:]]/i and ! /[[:alpha:]]/; | |
1378 | } | |
1379 | else { | |
1380 | push @f, $_ if /[[:lower:]]/i and ! /[[:alpha:]]/; | |
1381 | } | |
1382 | } | |
1383 | report_multi_result($Locale, $locales_test_number, \@f); | |
32e8aa3f | 1384 | |
db31898d KW |
1385 | ++$locales_test_number; |
1386 | undef @f; | |
1387 | $test_names{$locales_test_number} = 'Verify that [:alpha:] is a subset of [:alnum:]'; | |
1388 | for (map { chr } 0..255) { | |
1389 | if ($is_utf8_locale) { | |
1390 | use locale ':not_characters'; | |
1391 | push @f, $_ if /[[:alpha:]]/ and ! /[[:alnum:]]/; | |
1392 | } | |
1393 | else { | |
1394 | push @f, $_ if /[[:alpha:]]/ and ! /[[:alnum:]]/; | |
1395 | } | |
1396 | } | |
1397 | report_multi_result($Locale, $locales_test_number, \@f); | |
32e8aa3f | 1398 | |
db31898d KW |
1399 | ++$locales_test_number; |
1400 | undef @f; | |
660c7bcc KW |
1401 | $test_names{$locales_test_number} = 'Verify that [:digit:] contains at least 0-9'; |
1402 | for ('0' .. '9') { | |
1403 | if ($is_utf8_locale) { | |
1404 | use locale ':not_characters'; | |
1405 | push @f, $_ unless /[[:digit:]]/; | |
1406 | } | |
1407 | else { | |
1408 | push @f, $_ unless /[[:digit:]]/; | |
1409 | } | |
1410 | } | |
1411 | report_multi_result($Locale, $locales_test_number, \@f); | |
1412 | ||
1413 | ++$locales_test_number; | |
1414 | undef @f; | |
db31898d KW |
1415 | $test_names{$locales_test_number} = 'Verify that [:digit:] is a subset of [:alnum:]'; |
1416 | for (map { chr } 0..255) { | |
1417 | if ($is_utf8_locale) { | |
1418 | use locale ':not_characters'; | |
1419 | push @f, $_ if /[[:digit:]]/ and ! /[[:alnum:]]/; | |
1420 | } | |
1421 | else { | |
1422 | push @f, $_ if /[[:digit:]]/ and ! /[[:alnum:]]/; | |
1423 | } | |
1424 | } | |
1425 | report_multi_result($Locale, $locales_test_number, \@f); | |
32e8aa3f | 1426 | |
db31898d KW |
1427 | ++$locales_test_number; |
1428 | undef @f; | |
aac995d2 | 1429 | $test_names{$locales_test_number} = 'Verify that [:digit:] matches either 10 or 20 code points'; |
fb844330 | 1430 | report_result($Locale, $locales_test_number, @{$posixes{'digit'}} == 10 || @{$posixes{'digit'}} == 20); |
aac995d2 KW |
1431 | |
1432 | ++$locales_test_number; | |
1433 | undef @f; | |
660c7bcc KW |
1434 | $test_names{$locales_test_number} = 'Verify that if there is a second set of digits in [:digit:], they are consecutive'; |
1435 | if (@{$posixes{'digit'}} == 20) { | |
1436 | my $previous_ord; | |
fc81f5f2 | 1437 | for (map { chr } 0..255) { |
660c7bcc KW |
1438 | next unless /[[:digit:]]/; |
1439 | next if /[0-9]/; | |
1440 | if (defined $previous_ord) { | |
1441 | if ($is_utf8_locale) { | |
1442 | use locale ':not_characters'; | |
1443 | push @f, $_ if ord $_ != $previous_ord + 1; | |
1444 | } | |
1445 | else { | |
1446 | push @f, $_ if ord $_ != $previous_ord + 1; | |
1447 | } | |
fc81f5f2 | 1448 | } |
660c7bcc | 1449 | $previous_ord = ord $_; |
db31898d KW |
1450 | } |
1451 | } | |
1452 | report_multi_result($Locale, $locales_test_number, \@f); | |
32e8aa3f | 1453 | |
db31898d KW |
1454 | ++$locales_test_number; |
1455 | undef @f; | |
d458c02c KW |
1456 | my @xdigit_digits; # :digit: & :xdigit: |
1457 | $test_names{$locales_test_number} = 'Verify that [:xdigit:] contains one or two blocks of 10 consecutive [:digit:] chars'; | |
db31898d KW |
1458 | for (map { chr } 0..255) { |
1459 | if ($is_utf8_locale) { | |
1460 | use locale ':not_characters'; | |
d458c02c KW |
1461 | # For utf8 locales, we actually use a stricter test: that :digit: |
1462 | # is a subset of :xdigit:, as we know that only 0-9 should match | |
1463 | push @f, $_ if /[[:digit:]]/ and ! /[[:xdigit:]]/; | |
db31898d KW |
1464 | } |
1465 | else { | |
d458c02c | 1466 | push @xdigit_digits, $_ if /[[:digit:]]/ and /[[:xdigit:]]/; |
db31898d KW |
1467 | } |
1468 | } | |
d458c02c KW |
1469 | if (! $is_utf8_locale) { |
1470 | ||
1471 | # For non-utf8 locales, @xdigit_digits is a list of the characters | |
1472 | # that are both :xdigit: and :digit:. Because :digit: is stored in | |
1473 | # increasing code point order (unless the tests above failed), | |
1474 | # @xdigit_digits is as well. There should be exactly 10 or | |
1475 | # 20 of these. | |
1476 | if (@xdigit_digits != 10 && @xdigit_digits != 20) { | |
1477 | @f = @xdigit_digits; | |
1478 | } | |
1479 | else { | |
1480 | ||
1481 | # Look for contiguity in the series, adding any wrong ones to @f | |
1482 | my @temp = @xdigit_digits; | |
1483 | while (@temp > 1) { | |
1484 | push @f, $temp[1] if ($temp[0] != $temp[1] - 1) | |
1485 | ||
1486 | # Skip this test for the 0th character of | |
1487 | # the second block of 10, as it won't be | |
1488 | # contiguous with the previous block | |
1489 | && (! defined $xdigit_digits[10] | |
1490 | || $temp[1] != $xdigit_digits[10]); | |
1491 | shift @temp; | |
1492 | } | |
1493 | } | |
1494 | } | |
1495 | ||
db31898d | 1496 | report_multi_result($Locale, $locales_test_number, \@f); |
32e8aa3f | 1497 | |
660c7bcc KW |
1498 | ++$locales_test_number; |
1499 | undef @f; | |
1500 | $test_names{$locales_test_number} = 'Verify that [:xdigit:] contains at least A-F, a-f'; | |
1501 | for ('A' .. 'F', 'a' .. 'f') { | |
1502 | if ($is_utf8_locale) { | |
1503 | use locale ':not_characters'; | |
1504 | push @f, $_ unless /[[:xdigit:]]/; | |
1505 | } | |
1506 | else { | |
1507 | push @f, $_ unless /[[:xdigit:]]/; | |
1508 | } | |
1509 | } | |
1510 | report_multi_result($Locale, $locales_test_number, \@f); | |
1511 | ||
1512 | ++$locales_test_number; | |
1513 | undef @f; | |
1514 | $test_names{$locales_test_number} = 'Verify that any additional members of [:xdigit:], are in groups of 6 consecutive code points'; | |
1515 | my $previous_ord; | |
1516 | my $count = 0; | |
d458c02c KW |
1517 | for my $chr (map { chr } 0..255) { |
1518 | next unless $chr =~ /[[:xdigit:]]/; | |
1519 | if ($is_utf8_locale) { | |
1520 | next if $chr =~ /[[:digit:]]/; | |
1521 | } | |
1522 | else { | |
1523 | next if grep { $chr eq $_ } @xdigit_digits; | |
1524 | } | |
1525 | next if $chr =~ /[A-Fa-f]/; | |
660c7bcc KW |
1526 | if (defined $previous_ord) { |
1527 | if ($is_utf8_locale) { | |
1528 | use locale ':not_characters'; | |
d458c02c | 1529 | push @f, $chr if ord $chr != $previous_ord + 1; |
660c7bcc KW |
1530 | } |
1531 | else { | |
d458c02c | 1532 | push @f, $chr if ord $chr != $previous_ord + 1; |
660c7bcc KW |
1533 | } |
1534 | } | |
1535 | $count++; | |
1536 | if ($count == 6) { | |
1537 | undef $previous_ord; | |
1538 | } | |
1539 | else { | |
d458c02c | 1540 | $previous_ord = ord $chr; |
660c7bcc KW |
1541 | } |
1542 | } | |
1543 | report_multi_result($Locale, $locales_test_number, \@f); | |
32e8aa3f | 1544 | |
db31898d KW |
1545 | ++$locales_test_number; |
1546 | undef @f; | |
1547 | $test_names{$locales_test_number} = 'Verify that [:xdigit:] is a subset of [:graph:]'; | |
1548 | for (map { chr } 0..255) { | |
1549 | if ($is_utf8_locale) { | |
1550 | use locale ':not_characters'; | |
1551 | push @f, $_ if /[[:xdigit:]]/ and ! /[[:graph:]]/; | |
1552 | } | |
1553 | else { | |
1554 | push @f, $_ if /[[:xdigit:]]/ and ! /[[:graph:]]/; | |
1555 | } | |
1556 | } | |
1557 | report_multi_result($Locale, $locales_test_number, \@f); | |
32e8aa3f | 1558 | |
660c7bcc KW |
1559 | # Note that xdigit doesn't have to be a subset of alnum |
1560 | ||
db31898d KW |
1561 | ++$locales_test_number; |
1562 | undef @f; | |
1563 | $test_names{$locales_test_number} = 'Verify that [:punct:] is a subset of [:graph:]'; | |
1564 | for (map { chr } 0..255) { | |
1565 | if ($is_utf8_locale) { | |
1566 | use locale ':not_characters'; | |
1567 | push @f, $_ if /[[:punct:]]/ and ! /[[:graph:]]/; | |
1568 | } | |
1569 | else { | |
1570 | push @f, $_ if /[[:punct:]]/ and ! /[[:graph:]]/; | |
1571 | } | |
1572 | } | |
1573 | report_multi_result($Locale, $locales_test_number, \@f); | |
32e8aa3f | 1574 | |
db31898d KW |
1575 | ++$locales_test_number; |
1576 | undef @f; | |
660c7bcc KW |
1577 | $test_names{$locales_test_number} = 'Verify that the space character is not in [:graph:]'; |
1578 | if ($is_utf8_locale) { | |
1579 | use locale ':not_characters'; | |
1580 | push @f, " " if " " =~ /[[:graph:]]/; | |
1581 | } | |
1582 | else { | |
1583 | push @f, " " if " " =~ /[[:graph:]]/; | |
1584 | } | |
1585 | report_multi_result($Locale, $locales_test_number, \@f); | |
1586 | ||
1587 | ++$locales_test_number; | |
1588 | undef @f; | |
1589 | $test_names{$locales_test_number} = 'Verify that [:space:] contains at least [\f\n\r\t\cK ]'; | |
1590 | for (' ', "\f", "\n", "\r", "\t", "\cK") { | |
1591 | if ($is_utf8_locale) { | |
1592 | use locale ':not_characters'; | |
1593 | push @f, $_ unless /[[:space:]]/; | |
1594 | } | |
1595 | else { | |
1596 | push @f, $_ unless /[[:space:]]/; | |
1597 | } | |
1598 | } | |
1599 | report_multi_result($Locale, $locales_test_number, \@f); | |
1600 | ||
1601 | ++$locales_test_number; | |
1602 | undef @f; | |
1603 | $test_names{$locales_test_number} = 'Verify that [:blank:] contains at least [\t ]'; | |
1604 | for (' ', "\t") { | |
1605 | if ($is_utf8_locale) { | |
1606 | use locale ':not_characters'; | |
1607 | push @f, $_ unless /[[:blank:]]/; | |
1608 | } | |
1609 | else { | |
1610 | push @f, $_ unless /[[:blank:]]/; | |
1611 | } | |
1612 | } | |
1613 | report_multi_result($Locale, $locales_test_number, \@f); | |
1614 | ||
1615 | ++$locales_test_number; | |
1616 | undef @f; | |
db31898d KW |
1617 | $test_names{$locales_test_number} = 'Verify that [:blank:] is a subset of [:space:]'; |
1618 | for (map { chr } 0..255) { | |
1619 | if ($is_utf8_locale) { | |
1620 | use locale ':not_characters'; | |
1621 | push @f, $_ if /[[:blank:]]/ and ! /[[:space:]]/; | |
1622 | } | |
1623 | else { | |
1624 | push @f, $_ if /[[:blank:]]/ and ! /[[:space:]]/; | |
1625 | } | |
1626 | } | |
1627 | report_multi_result($Locale, $locales_test_number, \@f); | |
32e8aa3f | 1628 | |
db31898d KW |
1629 | ++$locales_test_number; |
1630 | undef @f; | |
1631 | $test_names{$locales_test_number} = 'Verify that [:graph:] is a subset of [:print:]'; | |
1632 | for (map { chr } 0..255) { | |
1633 | if ($is_utf8_locale) { | |
1634 | use locale ':not_characters'; | |
1635 | push @f, $_ if /[[:graph:]]/ and ! /[[:print:]]/; | |
1636 | } | |
1637 | else { | |
1638 | push @f, $_ if /[[:graph:]]/ and ! /[[:print:]]/; | |
1639 | } | |
1640 | } | |
1641 | report_multi_result($Locale, $locales_test_number, \@f); | |
32e8aa3f | 1642 | |
db31898d KW |
1643 | ++$locales_test_number; |
1644 | undef @f; | |
660c7bcc KW |
1645 | $test_names{$locales_test_number} = 'Verify that the space character is in [:print:]'; |
1646 | if ($is_utf8_locale) { | |
1647 | use locale ':not_characters'; | |
1648 | push @f, " " if " " !~ /[[:print:]]/; | |
1649 | } | |
1650 | else { | |
1651 | push @f, " " if " " !~ /[[:print:]]/; | |
1652 | } | |
1653 | report_multi_result($Locale, $locales_test_number, \@f); | |
1654 | ||
1655 | ++$locales_test_number; | |
1656 | undef @f; | |
db31898d KW |
1657 | $test_names{$locales_test_number} = 'Verify that isn\'t both [:cntrl:] and [:print:]'; |
1658 | for (map { chr } 0..255) { | |
1659 | if ($is_utf8_locale) { | |
1660 | use locale ':not_characters'; | |
1661 | push @f, $_ if (/[[:print:]]/ and /[[:cntrl:]]/); | |
1662 | } | |
1663 | else { | |
1664 | push @f, $_ if (/[[:print:]]/ and /[[:cntrl:]]/); | |
1665 | } | |
1666 | } | |
1667 | report_multi_result($Locale, $locales_test_number, \@f); | |
32e8aa3f | 1668 | |
db31898d KW |
1669 | ++$locales_test_number; |
1670 | undef @f; | |
660c7bcc KW |
1671 | $test_names{$locales_test_number} = 'Verify that isn\'t both [:alpha:] and [:digit:]'; |
1672 | for (map { chr } 0..255) { | |
1673 | if ($is_utf8_locale) { | |
1674 | use locale ':not_characters'; | |
1675 | push @f, $_ if /[[:alpha:]]/ and /[[:digit:]]/; | |
1676 | } | |
1677 | else { | |
1678 | push @f, $_ if /[[:alpha:]]/ and /[[:digit:]]/; | |
1679 | } | |
1680 | } | |
1681 | report_multi_result($Locale, $locales_test_number, \@f); | |
1682 | ||
1683 | ++$locales_test_number; | |
1684 | undef @f; | |
db31898d KW |
1685 | $test_names{$locales_test_number} = 'Verify that isn\'t both [:alnum:] and [:punct:]'; |
1686 | for (map { chr } 0..255) { | |
1687 | if ($is_utf8_locale) { | |
1688 | use locale ':not_characters'; | |
1689 | push @f, $_ if /[[:alnum:]]/ and /[[:punct:]]/; | |
1690 | } | |
1691 | else { | |
1692 | push @f, $_ if /[[:alnum:]]/ and /[[:punct:]]/; | |
1693 | } | |
1694 | } | |
1695 | report_multi_result($Locale, $locales_test_number, \@f); | |
32e8aa3f | 1696 | |
db31898d KW |
1697 | ++$locales_test_number; |
1698 | undef @f; | |
1699 | $test_names{$locales_test_number} = 'Verify that isn\'t both [:xdigit:] and [:punct:]'; | |
1700 | for (map { chr } 0..255) { | |
1701 | if ($is_utf8_locale) { | |
1702 | use locale ':not_characters'; | |
1703 | push @f, $_ if (/[[:punct:]]/ and /[[:xdigit:]]/); | |
1704 | } | |
1705 | else { | |
1706 | push @f, $_ if (/[[:punct:]]/ and /[[:xdigit:]]/); | |
1707 | } | |
1708 | } | |
1709 | report_multi_result($Locale, $locales_test_number, \@f); | |
32e8aa3f | 1710 | |
db31898d KW |
1711 | ++$locales_test_number; |
1712 | undef @f; | |
1713 | $test_names{$locales_test_number} = 'Verify that isn\'t both [:graph:] and [:space:]'; | |
1714 | for (map { chr } 0..255) { | |
1715 | if ($is_utf8_locale) { | |
1716 | use locale ':not_characters'; | |
1717 | push @f, $_ if (/[[:graph:]]/ and /[[:space:]]/); | |
1718 | } | |
1719 | else { | |
1720 | push @f, $_ if (/[[:graph:]]/ and /[[:space:]]/); | |
1721 | } | |
1722 | } | |
1723 | report_multi_result($Locale, $locales_test_number, \@f); | |
32e8aa3f | 1724 | |
7493b8f2 | 1725 | foreach ($first_casing_test_number..$locales_test_number) { |
5b1f18a7 | 1726 | $problematical_tests{$_} = 1; |
7493b8f2 KW |
1727 | } |
1728 | ||
32e8aa3f | 1729 | |
db31898d KW |
1730 | # Test for read-only scalars' locale vs non-locale comparisons. |
1731 | ||
1732 | { | |
1733 | no locale; | |
1734 | my $ok; | |
1735 | $a = "qwerty"; | |
1736 | if ($is_utf8_locale) { | |
1737 | use locale ':not_characters'; | |
1738 | $ok = ($a cmp "qwerty") == 0; | |
1739 | } | |
1740 | else { | |
1741 | use locale; | |
1742 | $ok = ($a cmp "qwerty") == 0; | |
1743 | } | |
1744 | report_result($Locale, ++$locales_test_number, $ok); | |
1745 | $test_names{$locales_test_number} = 'Verify that cmp works with a read-only scalar; no- vs locale'; | |
1746 | } | |
8ebc5c01 | 1747 | |
db31898d KW |
1748 | { |
1749 | my ($from, $to, $lesser, $greater, | |
1750 | @test, %test, $test, $yes, $no, $sign); | |
284102e8 | 1751 | |
db31898d KW |
1752 | ++$locales_test_number; |
1753 | $test_names{$locales_test_number} = 'Verify that "le", "ne", etc work'; | |
1754 | $not_necessarily_a_problem_test_number = $locales_test_number; | |
1755 | for (0..9) { | |
1756 | # Select a slice. | |
fb844330 KW |
1757 | $from = int(($_*@{$posixes{'word'}})/10); |
1758 | $to = $from + int(@{$posixes{'word'}}/10); | |
1759 | $to = $#{$posixes{'word'}} if ($to > $#{$posixes{'word'}}); | |
1760 | $lesser = join('', @{$posixes{'word'}}[$from..$to]); | |
db31898d KW |
1761 | # Select a slice one character on. |
1762 | $from++; $to++; | |
fb844330 KW |
1763 | $to = $#{$posixes{'word'}} if ($to > $#{$posixes{'word'}}); |
1764 | $greater = join('', @{$posixes{'word'}}[$from..$to]); | |
66cbab2c KW |
1765 | if ($is_utf8_locale) { |
1766 | use locale ':not_characters'; | |
db31898d KW |
1767 | ($yes, $no, $sign) = ($lesser lt $greater |
1768 | ? (" ", "not ", 1) | |
1769 | : ("not ", " ", -1)); | |
66cbab2c KW |
1770 | } |
1771 | else { | |
1772 | use locale; | |
db31898d KW |
1773 | ($yes, $no, $sign) = ($lesser lt $greater |
1774 | ? (" ", "not ", 1) | |
1775 | : ("not ", " ", -1)); | |
66cbab2c | 1776 | } |
db31898d KW |
1777 | # all these tests should FAIL (return 0). Exact lt or gt cannot |
1778 | # be tested because in some locales, say, eacute and E may test | |
1779 | # equal. | |
1780 | @test = | |
1781 | ( | |
1782 | $no.' ($lesser le $greater)', # 1 | |
1783 | 'not ($lesser ne $greater)', # 2 | |
1784 | ' ($lesser eq $greater)', # 3 | |
1785 | $yes.' ($lesser ge $greater)', # 4 | |
1786 | $yes.' ($lesser ge $greater)', # 5 | |
1787 | $yes.' ($greater le $lesser )', # 7 | |
1788 | 'not ($greater ne $lesser )', # 8 | |
1789 | ' ($greater eq $lesser )', # 9 | |
1790 | $no.' ($greater ge $lesser )', # 10 | |
1791 | 'not (($lesser cmp $greater) == -($sign))' # 11 | |
1792 | ); | |
1793 | @test{@test} = 0 x @test; | |
1794 | $test = 0; | |
1795 | for my $ti (@test) { | |
66cbab2c KW |
1796 | if ($is_utf8_locale) { |
1797 | use locale ':not_characters'; | |
db31898d | 1798 | $test{$ti} = eval $ti; |
66cbab2c KW |
1799 | } |
1800 | else { | |
db31898d KW |
1801 | # Already in 'use locale'; |
1802 | $test{$ti} = eval $ti; | |
66cbab2c | 1803 | } |
db31898d KW |
1804 | $test ||= $test{$ti} |
1805 | } | |
1806 | report_result($Locale, $locales_test_number, $test == 0); | |
1807 | if ($test) { | |
46973bb2 KW |
1808 | debug "lesser = '$lesser'\n"; |
1809 | debug "greater = '$greater'\n"; | |
1810 | debug "lesser cmp greater = ", | |
db31898d | 1811 | $lesser cmp $greater, "\n"; |
46973bb2 | 1812 | debug "greater cmp lesser = ", |
db31898d | 1813 | $greater cmp $lesser, "\n"; |
46973bb2 | 1814 | debug "(greater) from = $from, to = $to\n"; |
db31898d KW |
1815 | for my $ti (@test) { |
1816 | debugf("# %-40s %-4s", $ti, | |
1817 | $test{$ti} ? 'FAIL' : 'ok'); | |
1818 | if ($ti =~ /\(\.*(\$.+ +cmp +\$[^\)]+)\.*\)/) { | |
1819 | debugf("(%s == %4d)", $1, eval $1); | |
66cbab2c | 1820 | } |
46973bb2 | 1821 | debugf("\n#"); |
db31898d KW |
1822 | } |
1823 | ||
1824 | last; | |
1825 | } | |
1826 | } | |
4e615abd KW |
1827 | |
1828 | use locale; | |
1829 | ||
77ab507a | 1830 | my @sorted_controls; |
620f73f4 KW |
1831 | |
1832 | ++$locales_test_number; | |
1833 | $test_names{$locales_test_number} | |
77ab507a KW |
1834 | = 'Skip in locales where there are no controls;' |
1835 | . ' otherwise verify that \0 sorts before any (other) control'; | |
1836 | if (! $posixes{'cntrl'}) { | |
535a3fb3 | 1837 | report_result($Locale, $locales_test_number, 1); |
77ab507a KW |
1838 | |
1839 | # We use all code points for the tests below since there aren't | |
1840 | # any controls | |
1841 | push @sorted_controls, chr $_ for 1..255; | |
1842 | @sorted_controls = sort @sorted_controls; | |
535a3fb3 KW |
1843 | } |
1844 | else { | |
77ab507a KW |
1845 | @sorted_controls = @{$posixes{'cntrl'}}; |
1846 | push @sorted_controls, "\0", | |
1847 | unless grep { $_ eq "\0" } @sorted_controls; | |
1848 | @sorted_controls = sort @sorted_controls; | |
1849 | my $output = ""; | |
1850 | for my $control (@sorted_controls) { | |
1851 | $output .= " " . disp_chars($control); | |
1852 | } | |
1853 | debug "sorted :cntrl: (plus NUL) = $output\n"; | |
535a3fb3 KW |
1854 | my $ok = $sorted_controls[0] eq "\0"; |
1855 | report_result($Locale, $locales_test_number, $ok); | |
77ab507a KW |
1856 | |
1857 | shift @sorted_controls if $ok; | |
535a3fb3 | 1858 | } |
77ab507a | 1859 | |
620f73f4 KW |
1860 | my $lowest_control = $sorted_controls[0]; |
1861 | ||
4e615abd KW |
1862 | ++$locales_test_number; |
1863 | $test_names{$locales_test_number} | |
620f73f4 | 1864 | = 'Skip in locales where all controls have primary sorting weight; ' |
4e615abd | 1865 | . 'otherwise verify that \0 doesn\'t have primary sorting weight'; |
620f73f4 | 1866 | if ("a${lowest_control}c" lt "ab") { |
4e615abd KW |
1867 | report_result($Locale, $locales_test_number, 1); |
1868 | } | |
1869 | else { | |
1870 | my $ok = "ab" lt "a\0c"; | |
1871 | report_result($Locale, $locales_test_number, $ok); | |
1872 | } | |
6696cfa7 KW |
1873 | |
1874 | ++$locales_test_number; | |
1875 | $test_names{$locales_test_number} | |
a4a439fb | 1876 | = 'Verify that strings with embedded NUL collate'; |
535a3fb3 | 1877 | my $ok = "a\0a\0a" lt "a${lowest_control}a${lowest_control}a"; |
6696cfa7 KW |
1878 | report_result($Locale, $locales_test_number, $ok); |
1879 | ||
1880 | ++$locales_test_number; | |
1881 | $test_names{$locales_test_number} | |
a4a439fb | 1882 | = 'Verify that strings with embedded NUL and ' |
6696cfa7 | 1883 | . 'extra trailing NUL collate'; |
620f73f4 | 1884 | $ok = "a\0a\0" lt "a${lowest_control}a${lowest_control}"; |
6696cfa7 | 1885 | report_result($Locale, $locales_test_number, $ok); |
a4a439fb KW |
1886 | |
1887 | ++$locales_test_number; | |
1888 | $test_names{$locales_test_number} | |
e1c30f0c KW |
1889 | = 'Verify that empty strings collate'; |
1890 | $ok = "" le ""; | |
1891 | report_result($Locale, $locales_test_number, $ok); | |
1892 | ||
1893 | ++$locales_test_number; | |
1894 | $test_names{$locales_test_number} | |
a4a439fb KW |
1895 | = "Skip in non-UTF-8 locales; otherwise verify that UTF8ness " |
1896 | . "doesn't matter with collation"; | |
1897 | if (! $is_utf8_locale) { | |
1898 | report_result($Locale, $locales_test_number, 1); | |
1899 | } | |
1900 | else { | |
1901 | ||
1902 | # khw can't think of anything better. Start with a string that is | |
1903 | # higher than its UTF-8 representation in both EBCDIC and ASCII | |
1904 | my $string = chr utf8::unicode_to_native(0xff); | |
1905 | my $utf8_string = $string; | |
1906 | utf8::upgrade($utf8_string); | |
1907 | ||
1908 | # 8 should be lt 9 in all locales (except ones that aren't | |
1909 | # ASCII-based, which might fail this) | |
1910 | $ok = ("a${string}8") lt ("a${utf8_string}9"); | |
1911 | report_result($Locale, $locales_test_number, $ok); | |
1912 | } | |
1913 | ||
1914 | ++$locales_test_number; | |
1915 | $test_names{$locales_test_number} | |
1916 | = "Skip in UTF-8 locales; otherwise verify that single byte " | |
1917 | . "collates before 0x100 and above"; | |
1918 | if ($is_utf8_locale) { | |
1919 | report_result($Locale, $locales_test_number, 1); | |
1920 | } | |
1921 | else { | |
1922 | my $max_collating = chr 0; # Find byte that collates highest | |
1923 | for my $i (0 .. 255) { | |
1924 | my $char = chr $i; | |
1925 | $max_collating = $char if $char gt $max_collating; | |
1926 | } | |
1927 | $ok = $max_collating lt chr 0x100; | |
1928 | report_result($Locale, $locales_test_number, $ok); | |
1929 | } | |
1930 | ||
1931 | ++$locales_test_number; | |
1932 | $test_names{$locales_test_number} | |
1933 | = "Skip in UTF-8 locales; otherwise verify that 0x100 and " | |
1934 | . "above collate in code point order"; | |
1935 | if ($is_utf8_locale) { | |
1936 | report_result($Locale, $locales_test_number, 1); | |
1937 | } | |
1938 | else { | |
1939 | $ok = chr 0x100 lt chr 0x101; | |
1940 | report_result($Locale, $locales_test_number, $ok); | |
1941 | } | |
db31898d | 1942 | } |
c4093d7d | 1943 | |
66cbab2c KW |
1944 | my $ok1; |
1945 | my $ok2; | |
1946 | my $ok3; | |
1947 | my $ok4; | |
1948 | my $ok5; | |
1949 | my $ok6; | |
1950 | my $ok7; | |
1951 | my $ok8; | |
1952 | my $ok9; | |
1953 | my $ok10; | |
1954 | my $ok11; | |
1955 | my $ok12; | |
1956 | my $ok13; | |
1500bd91 | 1957 | my $ok14; |
2c6ee1a7 | 1958 | my $ok14_5; |
28acfe03 KW |
1959 | my $ok15; |
1960 | my $ok16; | |
e46375fa KW |
1961 | my $ok17; |
1962 | my $ok18; | |
f406a445 | 1963 | my $ok19; |
9717af6d | 1964 | my $ok20; |
8ee4b769 | 1965 | my $ok21; |
66cbab2c KW |
1966 | |
1967 | my $c; | |
1968 | my $d; | |
1969 | my $e; | |
1970 | my $f; | |
1971 | my $g; | |
e46375fa KW |
1972 | my $h; |
1973 | my $i; | |
1974 | my $j; | |
66cbab2c KW |
1975 | |
1976 | if (! $is_utf8_locale) { | |
71e5cbb3 | 1977 | use locale; |
6be75cd7 | 1978 | |
71e5cbb3 | 1979 | my ($x, $y) = (1.23, 1.23); |
6be75cd7 | 1980 | |
71e5cbb3 KW |
1981 | $a = "$x"; |
1982 | printf ''; # printf used to reset locale to "C" | |
1983 | $b = "$y"; | |
1984 | $ok1 = $a eq $b; | |
6be75cd7 | 1985 | |
71e5cbb3 KW |
1986 | $c = "$x"; |
1987 | my $z = sprintf ''; # sprintf used to reset locale to "C" | |
1988 | $d = "$y"; | |
1989 | $ok2 = $c eq $d; | |
1990 | { | |
66cbab2c | 1991 | |
71e5cbb3 KW |
1992 | use warnings; |
1993 | my $w = 0; | |
1994 | local $SIG{__WARN__} = | |
1995 | sub { | |
1996 | print "# @_\n"; | |
1997 | $w++; | |
1998 | }; | |
6be75cd7 | 1999 | |
71e5cbb3 KW |
2000 | # The == (among other ops) used to warn for locales |
2001 | # that had something else than "." as the radix character. | |
6be75cd7 | 2002 | |
71e5cbb3 KW |
2003 | $ok3 = $c == 1.23; |
2004 | $ok4 = $c == $x; | |
2005 | $ok5 = $c == $d; | |
2006 | { | |
2007 | no locale; | |
66cbab2c | 2008 | |
b79536ea | 2009 | $e = "$x"; |
71e5cbb3 KW |
2010 | |
2011 | $ok6 = $e == 1.23; | |
2012 | $ok7 = $e == $x; | |
2013 | $ok8 = $e == $c; | |
2014 | } | |
66cbab2c | 2015 | |
71e5cbb3 KW |
2016 | $f = "1.23"; |
2017 | $g = 2.34; | |
e46375fa KW |
2018 | $h = 1.5; |
2019 | $i = 1.25; | |
2020 | $j = "$h:$i"; | |
66cbab2c | 2021 | |
71e5cbb3 KW |
2022 | $ok9 = $f == 1.23; |
2023 | $ok10 = $f == $x; | |
2024 | $ok11 = $f == $c; | |
2025 | $ok12 = abs(($f + $g) - 3.57) < 0.01; | |
2026 | $ok13 = $w == 0; | |
2c6ee1a7 | 2027 | $ok14 = $ok14_5 = $ok15 = $ok16 = 1; # Skip for non-utf8 locales |
71e5cbb3 | 2028 | } |
e46375fa KW |
2029 | { |
2030 | no locale; | |
2031 | $ok17 = "1.5:1.25" eq sprintf("%g:%g", $h, $i); | |
2032 | } | |
2033 | $ok18 = $j eq sprintf("%g:%g", $h, $i); | |
66cbab2c KW |
2034 | } |
2035 | else { | |
2036 | use locale ':not_characters'; | |
2037 | ||
2038 | my ($x, $y) = (1.23, 1.23); | |
2039 | $a = "$x"; | |
2040 | printf ''; # printf used to reset locale to "C" | |
2041 | $b = "$y"; | |
2042 | $ok1 = $a eq $b; | |
2043 | ||
2044 | $c = "$x"; | |
2045 | my $z = sprintf ''; # sprintf used to reset locale to "C" | |
2046 | $d = "$y"; | |
2047 | $ok2 = $c eq $d; | |
2048 | { | |
2049 | use warnings; | |
2050 | my $w = 0; | |
2051 | local $SIG{__WARN__} = | |
2052 | sub { | |
2053 | print "# @_\n"; | |
2054 | $w++; | |
2055 | }; | |
2056 | $ok3 = $c == 1.23; | |
2057 | $ok4 = $c == $x; | |
2058 | $ok5 = $c == $d; | |
2059 | { | |
2060 | no locale; | |
b79536ea | 2061 | $e = "$x"; |
66cbab2c KW |
2062 | |
2063 | $ok6 = $e == 1.23; | |
2064 | $ok7 = $e == $x; | |
2065 | $ok8 = $e == $c; | |
2066 | } | |
2067 | ||
2068 | $f = "1.23"; | |
2069 | $g = 2.34; | |
e46375fa KW |
2070 | $h = 1.5; |
2071 | $i = 1.25; | |
2072 | $j = "$h:$i"; | |
66cbab2c KW |
2073 | |
2074 | $ok9 = $f == 1.23; | |
2075 | $ok10 = $f == $x; | |
2076 | $ok11 = $f == $c; | |
2077 | $ok12 = abs(($f + $g) - 3.57) < 0.01; | |
2078 | $ok13 = $w == 0; | |
1500bd91 KW |
2079 | |
2080 | # Look for non-ASCII error messages, and verify that the first | |
5320b60d | 2081 | # such is in UTF-8 (the others almost certainly will be like the |
5af2752a | 2082 | # first). This is only done if the current locale has LC_MESSAGES |
1500bd91 | 2083 | $ok14 = 1; |
2c6ee1a7 | 2084 | $ok14_5 = 1; |
ef9d5242 KW |
2085 | if ( locales_enabled('LC_MESSAGES') |
2086 | && setlocale(&POSIX::LC_MESSAGES, $Locale)) | |
2087 | { | |
9bc33472 KW |
2088 | foreach my $err (keys %!) { |
2089 | use Errno; | |
2090 | $! = eval "&Errno::$err"; # Convert to strerror() output | |
594feacd | 2091 | my $errnum = 0+$!; |
9bc33472 KW |
2092 | my $strerror = "$!"; |
2093 | if ("$strerror" =~ /\P{ASCII}/) { | |
2094 | $ok14 = utf8::is_utf8($strerror); | |
2095 | no locale; | |
2096 | $ok14_5 = "$!" !~ /\P{ASCII}/; | |
594feacd KW |
2097 | debug( disp_str( |
2098 | "non-ASCII \$! for error $errnum='$strerror'")) | |
2099 | if ! $ok14_5; | |
9bc33472 KW |
2100 | last; |
2101 | } | |
1500bd91 KW |
2102 | } |
2103 | } | |
28acfe03 KW |
2104 | |
2105 | # Similarly, we verify that a non-ASCII radix is in UTF-8. This | |
2106 | # also catches if there is a disparity between sprintf and | |
2107 | # stringification. | |
2108 | ||
2109 | my $string_g = "$g"; | |
2ef12107 | 2110 | my $sprintf_g = sprintf("%g", $g); |
28acfe03 | 2111 | |
2ef12107 KW |
2112 | $ok15 = $string_g =~ / ^ \p{ASCII}+ $ /x || utf8::is_utf8($string_g); |
2113 | $ok16 = $sprintf_g eq $string_g; | |
66cbab2c | 2114 | } |
e46375fa KW |
2115 | { |
2116 | no locale; | |
2117 | $ok17 = "1.5:1.25" eq sprintf("%g:%g", $h, $i); | |
2118 | } | |
2119 | $ok18 = $j eq sprintf("%g:%g", $h, $i); | |
57ba90dd KW |
2120 | } |
2121 | ||
d6bfff6d KW |
2122 | $ok19 = $ok20 = 1; |
2123 | if (setlocale(&POSIX::LC_TIME, $Locale)) { # These tests aren't affected by | |
2124 | # :not_characters | |
57ba90dd KW |
2125 | my @times = CORE::localtime(); |
2126 | ||
2127 | use locale; | |
2128 | $ok19 = POSIX::strftime("%p", @times) ne "%p"; # [perl #119425] | |
dc5a2383 KW |
2129 | my $date = POSIX::strftime("'%A' '%B' '%Z' '%p'", @times); |
2130 | debug("'Day' 'Month' 'TZ' 'am/pm' = ", disp_str($date)); | |
57ba90dd KW |
2131 | |
2132 | # If there is any non-ascii, it better be UTF-8 in a UTF-8 locale, and | |
2133 | # not UTF-8 if the locale isn't UTF-8. | |
2134 | $ok20 = $date =~ / ^ \p{ASCII}+ $ /x | |
2135 | || $is_utf8_locale == utf8::is_utf8($date); | |
66cbab2c KW |
2136 | } |
2137 | ||
8ee4b769 | 2138 | $ok21 = 1; |
ef9d5242 | 2139 | if (locales_enabled('LC_MESSAGES')) { |
a2917a7e KW |
2140 | foreach my $err (keys %!) { |
2141 | no locale; | |
2142 | use Errno; | |
2143 | $! = eval "&Errno::$err"; # Convert to strerror() output | |
2144 | my $strerror = "$!"; | |
c9dda6da | 2145 | if ($strerror =~ /\P{ASCII}/) { |
a2917a7e | 2146 | $ok21 = 0; |
c9dda6da | 2147 | debug(disp_str("non-ASCII strerror=$strerror")); |
a2917a7e | 2148 | last; |
ef9d5242 | 2149 | } |
8ee4b769 KW |
2150 | } |
2151 | } | |
2152 | ||
30032ef4 | 2153 | report_result($Locale, ++$locales_test_number, $ok1); |
66cbab2c KW |
2154 | $test_names{$locales_test_number} = 'Verify that an intervening printf doesn\'t change assignment results'; |
2155 | my $first_a_test = $locales_test_number; | |
2156 | ||
46973bb2 | 2157 | debug "$first_a_test..$locales_test_number: \$a = $a, \$b = $b, Locale = $Locale\n"; |
66cbab2c | 2158 | |
30032ef4 | 2159 | report_result($Locale, ++$locales_test_number, $ok2); |
66cbab2c KW |
2160 | $test_names{$locales_test_number} = 'Verify that an intervening sprintf doesn\'t change assignment results'; |
2161 | ||
2162 | my $first_c_test = $locales_test_number; | |
2163 | ||
435847ca JH |
2164 | $test_names{++$locales_test_number} = 'Verify that a different locale radix works when doing "==" with a constant'; |
2165 | if ($Config{usequadmath}) { | |
2166 | print "# Skip: no locale radix with usequadmath ($test_names{$locales_test_number})\n"; | |
2167 | report_result($Locale, $locales_test_number, 1); | |
2168 | } else { | |
2169 | report_result($Locale, $locales_test_number, $ok3); | |
2170 | $problematical_tests{$locales_test_number} = 1; | |
2171 | } | |
6be75cd7 | 2172 | |
435847ca JH |
2173 | $test_names{++$locales_test_number} = 'Verify that a different locale radix works when doing "==" with a scalar'; |
2174 | if ($Config{usequadmath}) { | |
2175 | print "# Skip: no locale radix with usequadmath ($test_names{$locales_test_number})\n"; | |
2176 | report_result($Locale, $locales_test_number, 1); | |
2177 | } else { | |
2178 | report_result($Locale, $locales_test_number, $ok4); | |
2179 | $problematical_tests{$locales_test_number} = 1; | |
2180 | } | |
66cbab2c | 2181 | |
30032ef4 | 2182 | report_result($Locale, ++$locales_test_number, $ok5); |
71e5cbb3 | 2183 | $test_names{$locales_test_number} = 'Verify that a different locale radix works when doing "==" with a scalar and an intervening sprintf'; |
b057411d | 2184 | $problematical_tests{$locales_test_number} = 1; |
66cbab2c | 2185 | |
46973bb2 | 2186 | debug "$first_c_test..$locales_test_number: \$c = $c, \$d = $d, Locale = $Locale\n"; |
66cbab2c | 2187 | |
30032ef4 | 2188 | report_result($Locale, ++$locales_test_number, $ok6); |
b79536ea | 2189 | $test_names{$locales_test_number} = 'Verify that can assign stringified under inner no-locale block'; |
71e5cbb3 | 2190 | my $first_e_test = $locales_test_number; |
6be75cd7 | 2191 | |
30032ef4 | 2192 | report_result($Locale, ++$locales_test_number, $ok7); |
71e5cbb3 | 2193 | $test_names{$locales_test_number} = 'Verify that "==" with a scalar still works in inner no locale'; |
66cbab2c | 2194 | |
435847ca JH |
2195 | $test_names{++$locales_test_number} = 'Verify that "==" with a scalar and an intervening sprintf still works in inner no locale'; |
2196 | if ($Config{usequadmath}) { | |
2197 | print "# Skip: no locale radix with usequadmath ($test_names{$locales_test_number})\n"; | |
2198 | report_result($Locale, $locales_test_number, 1); | |
2199 | } else { | |
2200 | report_result($Locale, $locales_test_number, $ok8); | |
2201 | $problematical_tests{$locales_test_number} = 1; | |
2202 | } | |
c4093d7d | 2203 | |
46973bb2 | 2204 | debug "$first_e_test..$locales_test_number: \$e = $e, no locale\n"; |
2a680da6 | 2205 | |
30032ef4 | 2206 | report_result($Locale, ++$locales_test_number, $ok9); |
71e5cbb3 | 2207 | $test_names{$locales_test_number} = 'Verify that after a no-locale block, a different locale radix still works when doing "==" with a constant'; |
b057411d | 2208 | $problematical_tests{$locales_test_number} = 1; |
71e5cbb3 | 2209 | my $first_f_test = $locales_test_number; |
6be75cd7 | 2210 | |
30032ef4 | 2211 | report_result($Locale, ++$locales_test_number, $ok10); |
71e5cbb3 | 2212 | $test_names{$locales_test_number} = 'Verify that after a no-locale block, a different locale radix still works when doing "==" with a scalar'; |
b057411d | 2213 | $problematical_tests{$locales_test_number} = 1; |
66cbab2c | 2214 | |
435847ca JH |
2215 | $test_names{++$locales_test_number} = 'Verify that after a no-locale block, a different locale radix still works when doing "==" with a scalar and an intervening sprintf'; |
2216 | if ($Config{usequadmath}) { | |
2217 | print "# Skip: no locale radix with usequadmath ($test_names{$locales_test_number})\n"; | |
2218 | report_result($Locale, $locales_test_number, 1); | |
2219 | } else { | |
2220 | report_result($Locale, $locales_test_number, $ok11); | |
2221 | $problematical_tests{$locales_test_number} = 1; | |
2222 | } | |
906f284f | 2223 | |
30032ef4 | 2224 | report_result($Locale, ++$locales_test_number, $ok12); |
71e5cbb3 | 2225 | $test_names{$locales_test_number} = 'Verify that after a no-locale block, a different locale radix can participate in an addition and function call as numeric'; |
b057411d | 2226 | $problematical_tests{$locales_test_number} = 1; |
c4093d7d | 2227 | |
30032ef4 | 2228 | report_result($Locale, ++$locales_test_number, $ok13); |
71e5cbb3 | 2229 | $test_names{$locales_test_number} = 'Verify that don\'t get warning under "==" even if radix is not a dot'; |
b057411d | 2230 | $problematical_tests{$locales_test_number} = 1; |
c4093d7d | 2231 | |
30032ef4 | 2232 | report_result($Locale, ++$locales_test_number, $ok14); |
5320b60d | 2233 | $test_names{$locales_test_number} = 'Verify that non-ASCII UTF-8 error messages are in UTF-8'; |
1500bd91 | 2234 | |
2c6ee1a7 KW |
2235 | report_result($Locale, ++$locales_test_number, $ok14_5); |
2236 | $test_names{$locales_test_number} = '... and are ASCII outside "use locale"'; | |
2237 | ||
30032ef4 | 2238 | report_result($Locale, ++$locales_test_number, $ok15); |
28acfe03 KW |
2239 | $test_names{$locales_test_number} = 'Verify that a number with a UTF-8 radix has a UTF-8 stringification'; |
2240 | ||
30032ef4 | 2241 | report_result($Locale, ++$locales_test_number, $ok16); |
28acfe03 | 2242 | $test_names{$locales_test_number} = 'Verify that a sprintf of a number with a UTF-8 radix yields UTF-8'; |
dc51aebe | 2243 | $problematical_tests{$locales_test_number} = 1; |
28acfe03 | 2244 | |
e46375fa KW |
2245 | report_result($Locale, ++$locales_test_number, $ok17); |
2246 | $test_names{$locales_test_number} = 'Verify that a sprintf of a number outside locale scope uses a dot radix'; | |
2247 | ||
2248 | report_result($Locale, ++$locales_test_number, $ok18); | |
2249 | $test_names{$locales_test_number} = 'Verify that a sprintf of a number back within locale scope uses locale radix'; | |
dc51aebe | 2250 | $problematical_tests{$locales_test_number} = 1; |
e46375fa | 2251 | |
f406a445 KW |
2252 | report_result($Locale, ++$locales_test_number, $ok19); |
2253 | $test_names{$locales_test_number} = 'Verify that strftime doesn\'t return "%p" in locales where %p is empty'; | |
2254 | ||
9717af6d KW |
2255 | report_result($Locale, ++$locales_test_number, $ok20); |
2256 | $test_names{$locales_test_number} = 'Verify that strftime returns date with UTF-8 flag appropriately set'; | |
6a78954a KW |
2257 | $problematical_tests{$locales_test_number} = 1; # This is broken in |
2258 | # OS X 10.9.3 | |
9717af6d | 2259 | |
8ee4b769 KW |
2260 | report_result($Locale, ++$locales_test_number, $ok21); |
2261 | $test_names{$locales_test_number} = '"$!" is ASCII only outside of locale scope'; | |
2262 | ||
46973bb2 | 2263 | debug "$first_f_test..$locales_test_number: \$f = $f, \$g = $g, back to locale = $Locale\n"; |
906f284f | 2264 | |
26d80d95 | 2265 | # Does taking lc separately differ from taking |
126fe723 | 2266 | # the lc "in-line"? (This was the bug 19990704.002 (#965), change #3568.) |
26d80d95 | 2267 | # The bug was in the caching of the 'o'-magic. |
66cbab2c | 2268 | if (! $is_utf8_locale) { |
2a680da6 | 2269 | use locale; |
6be75cd7 | 2270 | |
2a680da6 JH |
2271 | sub lcA { |
2272 | my $lc0 = lc $_[0]; | |
2273 | my $lc1 = lc $_[1]; | |
2274 | return $lc0 cmp $lc1; | |
2275 | } | |
6be75cd7 | 2276 | |
2a680da6 JH |
2277 | sub lcB { |
2278 | return lc($_[0]) cmp lc($_[1]); | |
2279 | } | |
6be75cd7 | 2280 | |
2a680da6 JH |
2281 | my $x = "ab"; |
2282 | my $y = "aa"; | |
2283 | my $z = "AB"; | |
6be75cd7 | 2284 | |
30032ef4 | 2285 | report_result($Locale, ++$locales_test_number, |
2a680da6 JH |
2286 | lcA($x, $y) == 1 && lcB($x, $y) == 1 || |
2287 | lcA($x, $z) == 0 && lcB($x, $z) == 0); | |
6be75cd7 | 2288 | } |
66cbab2c KW |
2289 | else { |
2290 | use locale ':not_characters'; | |
2291 | ||
2292 | sub lcC { | |
2293 | my $lc0 = lc $_[0]; | |
2294 | my $lc1 = lc $_[1]; | |
2295 | return $lc0 cmp $lc1; | |
2296 | } | |
2297 | ||
2298 | sub lcD { | |
2299 | return lc($_[0]) cmp lc($_[1]); | |
2300 | } | |
2301 | ||
2302 | my $x = "ab"; | |
2303 | my $y = "aa"; | |
2304 | my $z = "AB"; | |
2305 | ||
30032ef4 | 2306 | report_result($Locale, ++$locales_test_number, |
66cbab2c KW |
2307 | lcC($x, $y) == 1 && lcD($x, $y) == 1 || |
2308 | lcC($x, $z) == 0 && lcD($x, $z) == 0); | |
2309 | } | |
2310 | $test_names{$locales_test_number} = 'Verify "lc(foo) cmp lc(bar)" is the same as using intermediaries for the cmp'; | |
d8093b23 | 2311 | |
26d80d95 LC |
2312 | # Does lc of an UPPER (if different from the UPPER) match |
2313 | # case-insensitively the UPPER, and does the UPPER match | |
2314 | # case-insensitively the lc of the UPPER. And vice versa. | |
3ba0e062 | 2315 | { |
ef4a39e5 JH |
2316 | use locale; |
2317 | no utf8; | |
2318 | my $re = qr/[\[\(\{\*\+\?\|\^\$\\]/; | |
2319 | ||
2320 | my @f = (); | |
c4093d7d | 2321 | ++$locales_test_number; |
c08acc4c | 2322 | $test_names{$locales_test_number} = 'Verify case insensitive matching works'; |
b9df08e4 | 2323 | foreach my $x (sort { ord $a <=> ord $b } keys %UPPER) { |
66cbab2c | 2324 | if (! $is_utf8_locale) { |
71e5cbb3 KW |
2325 | my $y = lc $x; |
2326 | next unless uc $y eq $x; | |
46973bb2 | 2327 | debug_more( "UPPER=", disp_chars(($x)), |
92891c66 | 2328 | "; lc=", disp_chars(($y)), "; ", |
4eac893c | 2329 | "; fc=", disp_chars((fc $x)), "; ", |
92891c66 | 2330 | disp_chars(($x)), "=~/", disp_chars(($y)), "/i=", |
4d2de105 | 2331 | $x =~ /\Q$y/i ? 1 : 0, |
92891c66 KW |
2332 | "; ", |
2333 | disp_chars(($y)), "=~/", disp_chars(($x)), "/i=", | |
4d2de105 | 2334 | $y =~ /\Q$x/i ? 1 : 0, |
92891c66 | 2335 | "\n"); |
71e5cbb3 KW |
2336 | # |
2337 | # If $x and $y contain regular expression characters | |
2338 | # AND THEY lowercase (/i) to regular expression characters, | |
2339 | # regcomp() will be mightily confused. No, the \Q doesn't | |
2340 | # help here (maybe regex engine internal lowercasing | |
2341 | # is done after the \Q?) An example of this happening is | |
2342 | # the bg_BG (Bulgarian) locale under EBCDIC (OS/390 USS): | |
2343 | # the chr(173) (the "[") is the lowercase of the chr(235). | |
2344 | # | |
2345 | # Similarly losing EBCDIC locales include cs_cz, cs_CZ, | |
2346 | # el_gr, el_GR, en_us.IBM-037 (!), en_US.IBM-037 (!), | |
2347 | # et_ee, et_EE, hr_hr, hr_HR, hu_hu, hu_HU, lt_LT, | |
2348 | # mk_mk, mk_MK, nl_nl.IBM-037, nl_NL.IBM-037, | |
2349 | # pl_pl, pl_PL, ro_ro, ro_RO, ru_ru, ru_RU, | |
2350 | # sk_sk, sk_SK, sl_si, sl_SI, tr_tr, tr_TR. | |
2351 | # | |
2352 | # Similar things can happen even under (bastardised) | |
2353 | # non-EBCDIC locales: in many European countries before the | |
2354 | # advent of ISO 8859-x nationally customised versions of | |
2355 | # ISO 646 were devised, reusing certain punctuation | |
2356 | # characters for modified characters needed by the | |
2357 | # country/language. For example, the "|" might have | |
2358 | # stood for U+00F6 or LATIN SMALL LETTER O WITH DIAERESIS. | |
2359 | # | |
2360 | if ($x =~ $re || $y =~ $re) { | |
2361 | print "# Regex characters in '$x' or '$y', skipping test $locales_test_number for locale '$Locale'\n"; | |
2362 | next; | |
2363 | } | |
4d2de105 | 2364 | push @f, $x unless $x =~ /\Q$y/i && $y =~ /\Q$x/i; |
26c1569f KW |
2365 | |
2366 | # fc is not a locale concept, so Perl uses lc for it. | |
2367 | push @f, $x unless lc $x eq fc $x; | |
66cbab2c KW |
2368 | } |
2369 | else { | |
2370 | use locale ':not_characters'; | |
2371 | my $y = lc $x; | |
2372 | next unless uc $y eq $x; | |
46973bb2 | 2373 | debug_more( "UPPER=", disp_chars(($x)), |
92891c66 | 2374 | "; lc=", disp_chars(($y)), "; ", |
4eac893c | 2375 | "; fc=", disp_chars((fc $x)), "; ", |
92891c66 | 2376 | disp_chars(($x)), "=~/", disp_chars(($y)), "/i=", |
4d2de105 | 2377 | $x =~ /\Q$y/i ? 1 : 0, |
92891c66 KW |
2378 | "; ", |
2379 | disp_chars(($y)), "=~/", disp_chars(($x)), "/i=", | |
4d2de105 | 2380 | $y =~ /\Q$x/i ? 1 : 0, |
92891c66 | 2381 | "\n"); |
66cbab2c | 2382 | |
4d2de105 | 2383 | push @f, $x unless $x =~ /\Q$y/i && $y =~ /\Q$x/i; |
26c1569f KW |
2384 | |
2385 | # The places where Unicode's lc is different from fc are | |
2386 | # skipped here by virtue of the 'next unless uc...' line above | |
2387 | push @f, $x unless lc $x eq fc $x; | |
66cbab2c | 2388 | } |
c00ff1c7 | 2389 | } |
ef4a39e5 | 2390 | |
b9df08e4 | 2391 | foreach my $x (sort { ord $a <=> ord $b } keys %lower) { |
66cbab2c | 2392 | if (! $is_utf8_locale) { |
71e5cbb3 KW |
2393 | my $y = uc $x; |
2394 | next unless lc $y eq $x; | |
46973bb2 | 2395 | debug_more( "lower=", disp_chars(($x)), |
92891c66 | 2396 | "; uc=", disp_chars(($y)), "; ", |
4eac893c | 2397 | "; fc=", disp_chars((fc $x)), "; ", |
92891c66 | 2398 | disp_chars(($x)), "=~/", disp_chars(($y)), "/i=", |
4d2de105 | 2399 | $x =~ /\Q$y/i ? 1 : 0, |
92891c66 KW |
2400 | "; ", |
2401 | disp_chars(($y)), "=~/", disp_chars(($x)), "/i=", | |
4d2de105 | 2402 | $y =~ /\Q$x/i ? 1 : 0, |
92891c66 | 2403 | "\n"); |
71e5cbb3 KW |
2404 | if ($x =~ $re || $y =~ $re) { # See above. |
2405 | print "# Regex characters in '$x' or '$y', skipping test $locales_test_number for locale '$Locale'\n"; | |
2406 | next; | |
2407 | } | |
4d2de105 | 2408 | push @f, $x unless $x =~ /\Q$y/i && $y =~ /\Q$x/i; |
26c1569f KW |
2409 | |
2410 | push @f, $x unless lc $x eq fc $x; | |
66cbab2c KW |
2411 | } |
2412 | else { | |
2413 | use locale ':not_characters'; | |
2414 | my $y = uc $x; | |
2415 | next unless lc $y eq $x; | |
46973bb2 | 2416 | debug_more( "lower=", disp_chars(($x)), |
92891c66 | 2417 | "; uc=", disp_chars(($y)), "; ", |
4eac893c | 2418 | "; fc=", disp_chars((fc $x)), "; ", |
92891c66 | 2419 | disp_chars(($x)), "=~/", disp_chars(($y)), "/i=", |
4d2de105 | 2420 | $x =~ /\Q$y/i ? 1 : 0, |
92891c66 KW |
2421 | "; ", |
2422 | disp_chars(($y)), "=~/", disp_chars(($x)), "/i=", | |
4d2de105 | 2423 | $y =~ /\Q$x/i ? 1 : 0, |
92891c66 | 2424 | "\n"); |
4d2de105 | 2425 | push @f, $x unless $x =~ /\Q$y/i && $y =~ /\Q$x/i; |
26c1569f KW |
2426 | |
2427 | push @f, $x unless lc $x eq fc $x; | |
66cbab2c | 2428 | } |
c00ff1c7 | 2429 | } |
7c844d17 | 2430 | report_multi_result($Locale, $locales_test_number, \@f); |
5b1f18a7 | 2431 | $problematical_tests{$locales_test_number} = 1; |
d8093b23 | 2432 | } |
78787052 JL |
2433 | |
2434 | # [perl #109318] | |
2435 | { | |
2436 | my @f = (); | |
2437 | ++$locales_test_number; | |
2438 | $test_names{$locales_test_number} = 'Verify atof with locale radix and negative exponent'; | |
b057411d | 2439 | $problematical_tests{$locales_test_number} = 1; |
78787052 JL |
2440 | |
2441 | my $radix = POSIX::localeconv()->{decimal_point}; | |
2442 | my @nums = ( | |
2443 | "3.14e+9", "3${radix}14e+9", "3.14e-9", "3${radix}14e-9", | |
2444 | "-3.14e+9", "-3${radix}14e+9", "-3.14e-9", "-3${radix}14e-9", | |
2445 | ); | |
2446 | ||
2447 | if (! $is_utf8_locale) { | |
2448 | use locale; | |
2449 | for my $num (@nums) { | |
2450 | push @f, $num | |
2451 | unless sprintf("%g", $num) =~ /3.+14/; | |
2452 | } | |
2453 | } | |
2454 | else { | |
2455 | use locale ':not_characters'; | |
2456 | for my $num (@nums) { | |
2457 | push @f, $num | |
2458 | unless sprintf("%g", $num) =~ /3.+14/; | |
2459 | } | |
2460 | } | |
2461 | ||
435847ca JH |
2462 | if ($Config{usequadmath}) { |
2463 | print "# Skip: no locale radix with usequadmath ($Locale)\n"; | |
2464 | report_result($Locale, $locales_test_number, 1); | |
2465 | } else { | |
2466 | report_result($Locale, $locales_test_number, @f == 0); | |
2467 | if (@f) { | |
2468 | print "# failed $locales_test_number locale '$Locale' numbers @f\n" | |
2469 | } | |
78787052 JL |
2470 | } |
2471 | } | |
8ebc5c01 | 2472 | } |
284102e8 | 2473 | |
c4093d7d | 2474 | my $final_locales_test_number = $locales_test_number; |
6cf0b567 | 2475 | |
2a680da6 JH |
2476 | # Recount the errors. |
2477 | ||
852b6749 | 2478 | TEST_NUM: |
0d3899b1 | 2479 | foreach $test_num ($first_locales_test_number..$final_locales_test_number) { |
6b88b15b KW |
2480 | my $has_non_global_failure = $Problem{$test_num} |
2481 | || ! defined $Okay{$test_num} | |
2482 | || ! @{$Okay{$test_num}}; | |
2483 | print "not " if %setlocale_failed || $has_non_global_failure; | |
2484 | print "ok $test_num"; | |
2485 | $test_names{$test_num} = "" unless defined $test_names{$test_num}; | |
2486 | ||
2487 | # If TODO is in the test name, make it thus | |
2488 | my $todo = $test_names{$test_num} =~ s/\s*TODO\s*//; | |
2489 | print " $test_names{$test_num}"; | |
2490 | if ($todo) { | |
2491 | print " # TODO\n"; | |
c4093d7d | 2492 | } |
6b88b15b KW |
2493 | elsif (%setlocale_failed || ! $has_non_global_failure) { |
2494 | print "\n"; | |
2495 | } | |
2496 | elsif ($has_non_global_failure) { | |
852b6749 KW |
2497 | |
2498 | # If there are any locales that pass this test, or are known-bad, it | |
6b88b15b KW |
2499 | # may be that there are enough passes that we TODO the failure, but |
2500 | # only for tests that we have decided can be problematical. | |
2501 | if ( ($Okay{$test_num} || $Known_bad_locale{$test_num}) | |
852b6749 KW |
2502 | && grep { $_ == $test_num } keys %problematical_tests) |
2503 | { | |
852b6749 KW |
2504 | # Don't count the known-bad failures when calculating the |
2505 | # percentage that fail. | |
2506 | my $known_failures = (exists $Known_bad_locale{$test_num}) | |
26230909 | 2507 | ? scalar(keys $Known_bad_locale{$test_num}->%*) |
852b6749 | 2508 | : 0; |
26230909 | 2509 | my $adjusted_failures = scalar(keys $Problem{$test_num}->%*) |
852b6749 KW |
2510 | - $known_failures; |
2511 | ||
2512 | # Specially handle failures where only known-bad locales fail. | |
2513 | # This makes the diagnositics clearer. | |
2514 | if ($adjusted_failures <= 0) { | |
6b88b15b | 2515 | print " # TODO fails only on known bad locales: ", |
26230909 | 2516 | join " ", keys $Known_bad_locale{$test_num}->%*, "\n"; |
852b6749 KW |
2517 | next TEST_NUM; |
2518 | } | |
2519 | ||
0a974e2d | 2520 | # Round to nearest .1% |
852b6749 | 2521 | my $percent_fail = (int(.5 + (1000 * $adjusted_failures |
0a974e2d KW |
2522 | / scalar(@Locale)))) |
2523 | / 10; | |
6b88b15b KW |
2524 | $todo = $percent_fail < $acceptable_failure_percentage; |
2525 | print " # TODO" if $todo; | |
2526 | print "\n"; | |
2527 | ||
30962f68 | 2528 | if ($debug) { |
a20d09fc | 2529 | print "# $percent_fail% of locales (", |
26230909 | 2530 | scalar(keys $Problem{$test_num}->%*), |
a20d09fc KW |
2531 | " of ", |
2532 | scalar(@Locale), | |
30962f68 KW |
2533 | ") fail the above test (TODO cut-off is ", |
2534 | $acceptable_failure_percentage, | |
2535 | "%)\n"; | |
6d5d702a | 2536 | } |
6b88b15b KW |
2537 | elsif ($todo) { |
2538 | print "# ", 100 - $percent_fail, "% of locales not known to be problematic on this platform\n"; | |
2539 | print "# pass the above test, so it is likely that the failures\n"; | |
2540 | print "# are errors in the locale definitions. The test is marked TODO, as the\n"; | |
2541 | print "# problem is not likely to be Perl's\n"; | |
2542 | } | |
6d5d702a | 2543 | } |
6b88b15b | 2544 | |
f5627fc1 KW |
2545 | if ($debug) { |
2546 | print "# The code points that had this failure are given above. Look for lines\n"; | |
0d3899b1 | 2547 | print "# that match 'failed $test_num'\n"; |
f5627fc1 KW |
2548 | } |
2549 | else { | |
2550 | print "# For more details, rerun, with environment variable PERL_DEBUG_FULL_TEST=1.\n"; | |
0d3899b1 | 2551 | print "# Then look at that output for lines that match 'failed $test_num'\n"; |
108a305e | 2552 | } |
6b88b15b KW |
2553 | if (defined $not_necessarily_a_problem_test_number |
2554 | && $test_num == $not_necessarily_a_problem_test_number) | |
2555 | { | |
2556 | print "# The failure of test $not_necessarily_a_problem_test_number is not necessarily fatal.\n"; | |
2557 | print "# It usually indicates a problem in the environment,\n"; | |
2558 | print "# not in Perl itself.\n"; | |
2559 | } | |
6c2e653d | 2560 | } |
8ebc5c01 | 2561 | } |
fb73857a | 2562 | |
c4093d7d | 2563 | $test_num = $final_locales_test_number; |
c213d471 | 2564 | |
2fb9a6f6 | 2565 | unless ( $os =~ m!^(dragonfly|openbsd|bitrig|mirbsd)$! ) { |
083ace8f | 2566 | # perl #115808 |
fbd840df KW |
2567 | use warnings; |
2568 | my $warned = 0; | |
2569 | local $SIG{__WARN__} = sub { | |
2570 | $warned = $_[0] =~ /uninitialized/; | |
2571 | }; | |
2572 | my $z = "y" . setlocale(&POSIX::LC_ALL, "xyzzy"); | |
2573 | ok($warned, "variable set to setlocale(BAD LOCALE) is considered uninitialized"); | |
2574 | } | |
2575 | ||
094a2f8c | 2576 | # Test that tainting and case changing works on utf8 strings. These tests are |
1f5852c9 KW |
2577 | # placed last to avoid disturbing the hard-coded test numbers that existed at |
2578 | # the time these were added above this in this file. | |
0099bb8d KW |
2579 | # This also tests that locale overrides unicode_strings in the same scope for |
2580 | # non-utf8 strings. | |
a810e350 | 2581 | setlocale(&POSIX::LC_ALL, "C"); |
094a2f8c KW |
2582 | { |
2583 | use locale; | |
0099bb8d | 2584 | use feature 'unicode_strings'; |
094a2f8c | 2585 | |
26c1569f | 2586 | foreach my $function ("uc", "ucfirst", "lc", "lcfirst", "fc") { |
094a2f8c KW |
2587 | my @list; # List of code points to test for $function |
2588 | ||
2589 | # Used to calculate the changed case for ASCII characters by using the | |
2590 | # ord, instead of using one of the functions under test. | |
2591 | my $ascii_case_change_delta; | |
2592 | my $above_latin1_case_change_delta; # Same for the specific ords > 255 | |
2593 | # that we use | |
2594 | ||
445bf929 | 2595 | # We test an ASCII character, which should change case; |
094a2f8c | 2596 | # a Latin1 character, which shouldn't change case under this C locale, |
094a2f8c | 2597 | # an above-Latin1 character that when the case is changed would cross |
445bf929 KW |
2598 | # the 255/256 boundary, so doesn't change case |
2599 | # (the \x{149} is one of these, but changes into 2 characters, the | |
094a2f8c KW |
2600 | # first one of which doesn't cross the boundary. |
2601 | # the final one in each list is an above-Latin1 character whose case | |
445bf929 KW |
2602 | # does change. The code below uses its position in its list as a |
2603 | # marker to indicate that it, unlike the other code points above | |
2604 | # ASCII, has a successful case change | |
2605 | # | |
2606 | # All casing operations under locale (but not :not_characters) should | |
2607 | # taint | |
094a2f8c | 2608 | if ($function =~ /^u/) { |
9de25f67 KW |
2609 | @list = ("", "a", |
2610 | chr(utf8::unicode_to_native(0xe0)), | |
2611 | chr(utf8::unicode_to_native(0xff)), | |
2612 | "\x{fb00}", "\x{149}", "\x{101}"); | |
2613 | $ascii_case_change_delta = ($is_ebcdic) ? +64 : -32; | |
094a2f8c KW |
2614 | $above_latin1_case_change_delta = -1; |
2615 | } | |
2616 | else { | |
9de25f67 KW |
2617 | @list = ("", "A", |
2618 | chr(utf8::unicode_to_native(0xC0)), | |
2619 | "\x{17F}", "\x{100}"); | |
2620 | $ascii_case_change_delta = ($is_ebcdic) ? -64 : +32; | |
094a2f8c KW |
2621 | $above_latin1_case_change_delta = +1; |
2622 | } | |
66cbab2c | 2623 | foreach my $is_utf8_locale (0 .. 1) { |
71e5cbb3 KW |
2624 | foreach my $j (0 .. $#list) { |
2625 | my $char = $list[$j]; | |
0099bb8d KW |
2626 | |
2627 | for my $encoded_in_utf8 (0 .. 1) { | |
faf0c248 KW |
2628 | my $should_be; |
2629 | my $changed; | |
2630 | if (! $is_utf8_locale) { | |
ab0b796c | 2631 | no warnings 'locale'; |
faf0c248 KW |
2632 | $should_be = ($j == $#list) |
2633 | ? chr(ord($char) + $above_latin1_case_change_delta) | |
9de25f67 KW |
2634 | : (length $char == 0 || utf8::native_to_unicode(ord($char)) > 127) |
2635 | ? $char | |
2636 | : chr(ord($char) + $ascii_case_change_delta); | |
faf0c248 KW |
2637 | |
2638 | # This monstrosity is in order to avoid using an eval, | |
2639 | # which might perturb the results | |
2640 | $changed = ($function eq "uc") | |
2641 | ? uc($char) | |
2642 | : ($function eq "ucfirst") | |
2643 | ? ucfirst($char) | |
2644 | : ($function eq "lc") | |
2645 | ? lc($char) | |
2646 | : ($function eq "lcfirst") | |
2647 | ? lcfirst($char) | |
26c1569f KW |
2648 | : ($function eq "fc") |
2649 | ? fc($char) | |
faf0c248 KW |
2650 | : die("Unexpected function \"$function\""); |
2651 | } | |
2652 | else { | |
2653 | { | |
2654 | no locale; | |
71e5cbb3 | 2655 | |
faf0c248 KW |
2656 | # For utf8-locales the case changing functions |
2657 | # should work just like they do outside of locale. | |
2658 | # Can use eval here because not testing it when | |
2659 | # not in locale. | |
2660 | $should_be = eval "$function('$char')"; | |
2661 | die "Unexpected eval error $@ from 'eval \"$function('$char')\"'" if $@; | |
71e5cbb3 | 2662 | |
faf0c248 KW |
2663 | } |
2664 | use locale ':not_characters'; | |
2665 | $changed = ($function eq "uc") | |
2666 | ? uc($char) | |
2667 | : ($function eq "ucfirst") | |
2668 | ? ucfirst($char) | |
2669 | : ($function eq "lc") | |
2670 | ? lc($char) | |
2671 | : ($function eq "lcfirst") | |
2672 | ? lcfirst($char) | |
26c1569f KW |
2673 | : ($function eq "fc") |
2674 | ? fc($char) | |
faf0c248 | 2675 | : die("Unexpected function \"$function\""); |
71e5cbb3 | 2676 | } |
faf0c248 KW |
2677 | ok($changed eq $should_be, |
2678 | "$function(\"$char\") in C locale " | |
2679 | . (($is_utf8_locale) | |
2680 | ? "(use locale ':not_characters'" | |
2681 | : "(use locale") | |
2682 | . (($encoded_in_utf8) | |
2683 | ? "; encoded in utf8)" | |
2684 | : "; not encoded in utf8)") | |
2685 | . " should be \"$should_be\", got \"$changed\""); | |
2686 | ||
445bf929 KW |
2687 | # Tainting shouldn't happen for use locale :not_character |
2688 | # (a utf8 locale) | |
2689 | (! $is_utf8_locale) | |
faf0c248 KW |
2690 | ? check_taint($changed) |
2691 | : check_taint_not($changed); | |
2692 | ||
2693 | # Use UTF-8 next time through the loop | |
2694 | utf8::upgrade($char); | |
0099bb8d | 2695 | } |
66cbab2c | 2696 | } |
094a2f8c KW |
2697 | } |
2698 | } | |
2699 | } | |
2700 | ||
1bfe8fea KW |
2701 | # Give final advice. |
2702 | ||
2703 | my $didwarn = 0; | |
2704 | ||
2705 | foreach ($first_locales_test_number..$final_locales_test_number) { | |
2706 | if ($Problem{$_}) { | |
2707 | my @f = sort keys %{ $Problem{$_} }; | |
852b6749 KW |
2708 | |
2709 | # Don't list the failures caused by known-bad locales. | |
2fb9a6f6 KW |
2710 | if (exists $known_bad_locales{$os}) { |
2711 | @f = grep { $_ !~ $known_bad_locales{$os} } @f; | |
852b6749 KW |
2712 | next unless @f; |
2713 | } | |
1bfe8fea KW |
2714 | my $f = join(" ", @f); |
2715 | $f =~ s/(.{50,60}) /$1\n#\t/g; | |
2716 | ||
2717 | "#\n", | |
2718 | "# The locale ", (@f == 1 ? "definition" : "definitions"), "\n#\n", | |
2719 | "#\t", $f, "\n#\n", | |
2720 | "# on your system may have errors because the locale test $_\n", | |
2721 | "# \"$test_names{$_}\"\n", | |
2722 | "# failed in ", (@f == 1 ? "that locale" : "those locales"), | |
2723 | ".\n"; | |
2724 | print <<EOW; | |
2725 | # | |
2726 | # If your users are not using these locales you are safe for the moment, | |
98393d94 | 2727 | # but please report this failure first to perlbug\@perl.org using the |
1bfe8fea KW |
2728 | # perlbug script (as described in the INSTALL file) so that the exact |
2729 | # details of the failures can be sorted out first and then your operating | |
2730 | # system supplier can be alerted about these anomalies. | |
2731 | # | |
2732 | EOW | |
2733 | $didwarn = 1; | |
2734 | } | |
2735 | } | |
2736 | ||
2737 | # Tell which locales were okay and which were not. | |
2738 | ||
2739 | if ($didwarn) { | |
2740 | my (@s, @F); | |
2741 | ||
2742 | foreach my $l (@Locale) { | |
2743 | my $p = 0; | |
2744 | if ($setlocale_failed{$l}) { | |
2745 | $p++; | |
2746 | } | |
2747 | else { | |
2748 | foreach my $t | |
2749 | ($first_locales_test_number..$final_locales_test_number) | |
2750 | { | |
2751 | $p++ if $Problem{$t}{$l}; | |
2752 | } | |
2753 | } | |
2754 | push @s, $l if $p == 0; | |
2755 | push @F, $l unless $p == 0; | |
2756 | } | |
2757 | ||
2758 | if (@s) { | |
2759 | my $s = join(" ", @s); | |
2760 | $s =~ s/(.{50,60}) /$1\n#\t/g; | |
2761 | ||
a5cf558a | 2762 | |
1bfe8fea KW |
2763 | "# The following locales\n#\n", |
2764 | "#\t", $s, "\n#\n", | |
2765 | "# tested okay.\n#\n", | |
2766 | } else { | |
a5cf558a | 2767 | print "# None of your locales were fully okay.\n"; |
1bfe8fea KW |
2768 | } |
2769 | ||
2770 | if (@F) { | |
2771 | my $F = join(" ", @F); | |
2772 | $F =~ s/(.{50,60}) /$1\n#\t/g; | |
2773 | ||
c7e7f6b7 KW |
2774 | my $details = ""; |
2775 | unless ($debug) { | |
2776 | $details = "# For more details, rerun, with environment variable PERL_DEBUG_FULL_TEST=1.\n"; | |
2777 | } | |
2778 | elsif ($debug == 1) { | |
2779 | $details = "# For even more details, rerun, with environment variable PERL_DEBUG_FULL_TEST=2.\n"; | |
2780 | } | |
2781 | ||
a5cf558a | 2782 | |
1bfe8fea KW |
2783 | "# The following locales\n#\n", |
2784 | "#\t", $F, "\n#\n", | |
2785 | "# had problems.\n#\n", | |
c7e7f6b7 | 2786 | $details; |
1bfe8fea | 2787 | } else { |
a5cf558a | 2788 | print "# None of your locales were broken.\n"; |
1bfe8fea KW |
2789 | } |
2790 | } | |
2791 | ||
2fb9a6f6 | 2792 | if (exists $known_bad_locales{$os} && ! %Known_bad_locale) { |
852b6749 KW |
2793 | $test_num++; |
2794 | print "ok $test_num $^O no longer has known bad locales # TODO\n"; | |
2795 | } | |
2796 | ||
fdf053ee | 2797 | print "1..$test_num\n"; |
906f284f | 2798 | |
90248788 | 2799 | # eof |