Commit | Line | Data |
---|---|---|
8ebc5c01 | 1 | #!./perl -wT |
2 | ||
66cbab2c KW |
3 | # This tests plain 'use locale' and adorned 'use locale ":not_characters"' |
4 | # Because these pragmas are compile time, and I (khw) am trying to test | |
5 | # without using 'eval' as much as possible, which might cloud the issue, the | |
6 | # crucial parts of the code are duplicated in a block for each pragma. | |
7 | ||
6c2e653d KW |
8 | # To make a TODO test, add the string 'TODO' to its %test_names value |
9 | ||
e3a2734b KW |
10 | binmode STDOUT, ':utf8'; |
11 | binmode STDERR, ':utf8'; | |
12 | ||
8ebc5c01 | 13 | BEGIN { |
14 | chdir 't' if -d 't'; | |
20822f61 | 15 | @INC = '../lib'; |
f9cbebe1 | 16 | unshift @INC, '.'; |
b002077a | 17 | require Config; import Config; |
97a0514d | 18 | if (!$Config{d_setlocale} || $Config{ccflags} =~ /\bD?NO_LOCALE\b/) { |
b002077a CS |
19 | print "1..0\n"; |
20 | exit; | |
21 | } | |
73fc293b | 22 | require './loc_tools.pl'; |
2de3dbcc | 23 | $| = 1; |
8ebc5c01 | 24 | } |
25 | ||
26 | use strict; | |
26c1569f | 27 | use feature 'fc'; |
8ebc5c01 | 28 | |
92891c66 | 29 | # =1 adds debugging output; =2 increases the verbosity somewhat |
108a305e | 30 | my $debug = $ENV{PERL_DEBUG_FULL_TEST} // 0; |
284102e8 | 31 | |
6d5d702a KW |
32 | # Certain tests have been shown to be problematical for a few locales. Don't |
33 | # fail them unless at least this percentage of the tested locales fail. | |
4874c8de | 34 | # On AIX machines, many locales call a no-break space a graphic. |
df33771d | 35 | # (There aren't 1000 locales currently in existence, so 99.9 works) |
4874c8de | 36 | my $acceptable_failure_percentage = ($^O =~ / ^ ( AIX ) $ /ix) |
31f658ea KW |
37 | ? 99.9 |
38 | : 5; | |
6d5d702a | 39 | |
7493b8f2 | 40 | # The list of test numbers of the problematic tests. |
5b1f18a7 | 41 | my %problematical_tests; |
7493b8f2 KW |
42 | |
43 | ||
db4b7445 A |
44 | use Dumpvalue; |
45 | ||
46 | my $dumper = Dumpvalue->new( | |
47 | tick => qq{"}, | |
48 | quoteHighBit => 0, | |
49 | unctrl => "quote" | |
50 | ); | |
6be75cd7 | 51 | sub debug { |
db4b7445 A |
52 | return unless $debug; |
53 | my($mess) = join "", @_; | |
54 | chop $mess; | |
55 | print $dumper->stringify($mess,1), "\n"; | |
6be75cd7 JH |
56 | } |
57 | ||
92891c66 KW |
58 | sub debug_more { |
59 | return unless $debug > 1; | |
60 | return debug(@_); | |
61 | } | |
62 | ||
6be75cd7 JH |
63 | sub debugf { |
64 | printf @_ if $debug; | |
65 | } | |
66 | ||
40205cab | 67 | $a = 'abc %9'; |
8ebc5c01 | 68 | |
c213d471 KW |
69 | my $test_num = 0; |
70 | ||
8ebc5c01 | 71 | sub ok { |
c213d471 | 72 | my ($result, $message) = @_; |
e3a2734b | 73 | $message = "" unless defined $message; |
8ebc5c01 | 74 | |
75 | print 'not ' unless ($result); | |
c213d471 | 76 | print "ok " . ++$test_num; |
e3a2734b KW |
77 | print " $message"; |
78 | print "\n"; | |
8ebc5c01 | 79 | } |
80 | ||
81 | # First we'll do a lot of taint checking for locales. | |
82 | # This is the easiest to test, actually, as any locale, | |
83 | # even the default locale will taint under 'use locale'. | |
84 | ||
85 | sub is_tainted { # hello, camel two. | |
9f1b1f2d | 86 | no warnings 'uninitialized' ; |
8ebc5c01 | 87 | my $dummy; |
ba74571d | 88 | local $@; |
8ebc5c01 | 89 | not eval { $dummy = join("", @_), kill 0; 1 } |
90 | } | |
91 | ||
a9b7c637 KW |
92 | sub check_taint ($;$) { |
93 | my $message_tail = $_[1] // ""; | |
2575405e KW |
94 | |
95 | # Extra blanks are so aligns with taint_not output | |
96 | $message_tail = ": $message_tail" if $message_tail; | |
a9b7c637 | 97 | ok is_tainted($_[0]), "verify that is tainted$message_tail"; |
8ebc5c01 | 98 | } |
99 | ||
a9b7c637 KW |
100 | sub check_taint_not ($;$) { |
101 | my $message_tail = $_[1] // ""; | |
2575405e | 102 | $message_tail = ": $message_tail" if $message_tail; |
a9b7c637 | 103 | ok((not is_tainted($_[0])), "verify that isn't tainted$message_tail"); |
8ebc5c01 | 104 | } |
105 | ||
bf3cd0e6 KW |
106 | "\tb\t" =~ /^m?(\s)(.*)\1$/; |
107 | check_taint_not $&, "not tainted outside 'use locale'"; | |
108 | ; | |
109 | ||
8ebc5c01 | 110 | use locale; # engage locale and therefore locale taint. |
111 | ||
b6fb4de7 KW |
112 | # BE SURE TO COPY ANYTHING YOU ADD to these tests to the block below for |
113 | # ":notcharacters" | |
2575405e KW |
114 | |
115 | check_taint_not $a, '$a'; | |
116 | ||
117 | check_taint uc($a), 'uc($a)'; | |
118 | check_taint "\U$a", '"\U$a"'; | |
119 | check_taint ucfirst($a), 'ucfirst($a)'; | |
120 | check_taint "\u$a", '"\u$a"'; | |
121 | check_taint lc($a), 'lc($a)'; | |
122 | check_taint fc($a), 'fc($a)'; | |
123 | check_taint "\L$a", '"\L$a"'; | |
124 | check_taint "\F$a", '"\F$a"'; | |
125 | check_taint lcfirst($a), 'lcfirst($a)'; | |
126 | check_taint "\l$a", '"\l$a"'; | |
127 | ||
128 | check_taint_not sprintf('%e', 123.456), "sprintf('%e', 123.456)"; | |
129 | check_taint_not sprintf('%f', 123.456), "sprintf('%f', 123.456)"; | |
130 | check_taint_not sprintf('%g', 123.456), "sprintf('%g', 123.456)"; | |
131 | check_taint_not sprintf('%d', 123.456), "sprintf('%d', 123.456)"; | |
132 | check_taint_not sprintf('%x', 123.456), "sprintf('%x', 123.456)"; | |
8ebc5c01 | 133 | |
134 | $_ = $a; # untaint $_ | |
135 | ||
136 | $_ = uc($a); # taint $_ | |
137 | ||
2575405e | 138 | check_taint $_, '$_ = uc($a)'; |
8ebc5c01 | 139 | |
140 | /(\w)/; # taint $&, $`, $', $+, $1. | |
2575405e | 141 | check_taint $&, "\$& from /(\\w)/"; |
fdf73a7f KW |
142 | check_taint $`, "\t\$`"; |
143 | check_taint $', "\t\$'"; | |
144 | check_taint $+, "\t\$+"; | |
145 | check_taint $1, "\t\$1"; | |
146 | check_taint_not $2, "\t\$2"; | |
8ebc5c01 | 147 | |
148 | /(.)/; # untaint $&, $`, $', $+, $1. | |
2575405e | 149 | check_taint_not $&, "\$& from /(.)/"; |
fdf73a7f KW |
150 | check_taint_not $`, "\t\$`"; |
151 | check_taint_not $', "\t\$'"; | |
152 | check_taint_not $+, "\t\$+"; | |
153 | check_taint_not $1, "\t\$1"; | |
154 | check_taint_not $2, "\t\$2"; | |
8ebc5c01 | 155 | |
156 | /(\W)/; # taint $&, $`, $', $+, $1. | |
2575405e | 157 | check_taint $&, "\$& from /(\\W)/"; |
fdf73a7f KW |
158 | check_taint $`, "\t\$`"; |
159 | check_taint $', "\t\$'"; | |
160 | check_taint $+, "\t\$+"; | |
161 | check_taint $1, "\t\$1"; | |
162 | check_taint_not $2, "\t\$2"; | |
8ebc5c01 | 163 | |
9486279c | 164 | /(.)/; # untaint $&, $`, $', $+, $1. |
2575405e | 165 | check_taint_not $&, "\$& from /(.)/"; |
fdf73a7f KW |
166 | check_taint_not $`, "\t\$`"; |
167 | check_taint_not $', "\t\$'"; | |
168 | check_taint_not $+, "\t\$+"; | |
169 | check_taint_not $1, "\t\$1"; | |
170 | check_taint_not $2, "\t\$2"; | |
9486279c | 171 | |
8ebc5c01 | 172 | /(\s)/; # taint $&, $`, $', $+, $1. |
2575405e | 173 | check_taint $&, "\$& from /(\\s)/"; |
fdf73a7f KW |
174 | check_taint $`, "\t\$`"; |
175 | check_taint $', "\t\$'"; | |
176 | check_taint $+, "\t\$+"; | |
177 | check_taint $1, "\t\$1"; | |
178 | check_taint_not $2, "\t\$2"; | |
8ebc5c01 | 179 | |
9486279c | 180 | /(.)/; # untaint $&, $`, $', $+, $1. |
2575405e | 181 | check_taint_not $&, "\$& from /(.)/"; |
9486279c | 182 | |
8ebc5c01 | 183 | /(\S)/; # taint $&, $`, $', $+, $1. |
2575405e | 184 | check_taint $&, "\$& from /(\\S)/"; |
fdf73a7f KW |
185 | check_taint $`, "\t\$`"; |
186 | check_taint $', "\t\$'"; | |
187 | check_taint $+, "\t\$+"; | |
188 | check_taint $1, "\t\$1"; | |
189 | check_taint_not $2, "\t\$2"; | |
8ebc5c01 | 190 | |
63baef57 | 191 | /(.)/; # untaint $&, $`, $', $+, $1. |
2575405e | 192 | check_taint_not $&, "\$& from /(.)/"; |
63baef57 | 193 | |
40205cab KW |
194 | "0" =~ /(\d)/; # taint $&, $`, $', $+, $1. |
195 | check_taint $&, "\$& from /(\\d)/"; | |
196 | check_taint $`, "\t\$`"; | |
197 | check_taint $', "\t\$'"; | |
198 | check_taint $+, "\t\$+"; | |
199 | check_taint $1, "\t\$1"; | |
200 | check_taint_not $2, "\t\$2"; | |
201 | ||
202 | /(.)/; # untaint $&, $`, $', $+, $1. | |
203 | check_taint_not $&, "\$& from /(.)/"; | |
204 | ||
205 | /(\D)/; # taint $&, $`, $', $+, $1. | |
206 | check_taint $&, "\$& from /(\\D)/"; | |
207 | check_taint $`, "\t\$`"; | |
208 | check_taint $', "\t\$'"; | |
209 | check_taint $+, "\t\$+"; | |
210 | check_taint $1, "\t\$1"; | |
211 | check_taint_not $2, "\t\$2"; | |
212 | ||
213 | /(.)/; # untaint $&, $`, $', $+, $1. | |
214 | check_taint_not $&, "\$& from /(.)/"; | |
215 | ||
216 | /([[:alnum:]])/; # taint $&, $`, $', $+, $1. | |
217 | check_taint $&, "\$& from /([[:alnum:]])/"; | |
218 | check_taint $`, "\t\$`"; | |
219 | check_taint $', "\t\$'"; | |
220 | check_taint $+, "\t\$+"; | |
221 | check_taint $1, "\t\$1"; | |
222 | check_taint_not $2, "\t\$2"; | |
223 | ||
224 | /(.)/; # untaint $&, $`, $', $+, $1. | |
225 | check_taint_not $&, "\$& from /(.)/"; | |
226 | ||
227 | /([[:^alnum:]])/; # taint $&, $`, $', $+, $1. | |
228 | check_taint $&, "\$& from /([[:^alnum:]])/"; | |
229 | check_taint $`, "\t\$`"; | |
230 | check_taint $', "\t\$'"; | |
231 | check_taint $+, "\t\$+"; | |
232 | check_taint $1, "\t\$1"; | |
233 | check_taint_not $2, "\t\$2"; | |
234 | ||
63baef57 | 235 | "a" =~ /(a)|(\w)/; # taint $&, $`, $', $+, $1. |
2575405e | 236 | check_taint $&, "\$& from /(a)|(\\w)/"; |
63baef57 KW |
237 | check_taint $`, "\t\$`"; |
238 | check_taint $', "\t\$'"; | |
239 | check_taint $+, "\t\$+"; | |
240 | check_taint $1, "\t\$1"; | |
2575405e KW |
241 | ok($1 eq 'a', ("\t" x 5) . "\$1 is 'a'"); |
242 | ok(! defined $2, ("\t" x 5) . "\$2 is undefined"); | |
63baef57 KW |
243 | check_taint_not $2, "\t\$2"; |
244 | check_taint_not $3, "\t\$3"; | |
245 | ||
246 | /(.)/; # untaint $&, $`, $', $+, $1. | |
2575405e | 247 | check_taint_not $&, "\$& from /(.)/"; |
63baef57 KW |
248 | |
249 | "\N{CYRILLIC SMALL LETTER A}" =~ /(\N{CYRILLIC CAPITAL LETTER A})/i; # no tainting because no locale dependence | |
2575405e | 250 | check_taint_not $&, "\$& from /(\\N{CYRILLIC CAPITAL LETTER A})/i"; |
63baef57 KW |
251 | check_taint_not $`, "\t\$`"; |
252 | check_taint_not $', "\t\$'"; | |
253 | check_taint_not $+, "\t\$+"; | |
254 | check_taint_not $1, "\t\$1"; | |
2575405e | 255 | ok($1 eq "\N{CYRILLIC SMALL LETTER A}", ("\t" x 4) . "\t\$1 is 'small cyrillic a'"); |
63baef57 KW |
256 | check_taint_not $2, "\t\$2"; |
257 | ||
258 | /(.)/; # untaint $&, $`, $', $+, $1. | |
2575405e | 259 | check_taint_not $&, "\$& from /./"; |
63baef57 | 260 | |
40205cab KW |
261 | "(\N{KELVIN SIGN})" =~ /(\N{KELVIN SIGN})/i; # taints because depends on locale |
262 | check_taint $&, "\$& from /(\\N{KELVIN SIGN})/i"; | |
263 | check_taint $`, "\t\$`"; | |
264 | check_taint $', "\t\$'"; | |
265 | check_taint $+, "\t\$+"; | |
266 | check_taint $1, "\t\$1"; | |
267 | check_taint_not $2, "\t\$2"; | |
268 | ||
63baef57 | 269 | /(.)/; # untaint $&, $`, $', $+, $1. |
2575405e | 270 | check_taint_not $&, "\$& from /(.)/"; |
63baef57 KW |
271 | |
272 | "a:" =~ /(.)\b(.)/; # taint $&, $`, $', $+, $1. | |
2575405e | 273 | check_taint $&, "\$& from /(.)\\b(.)/"; |
63baef57 KW |
274 | check_taint $`, "\t\$`"; |
275 | check_taint $', "\t\$'"; | |
276 | check_taint $+, "\t\$+"; | |
277 | check_taint $1, "\t\$1"; | |
278 | check_taint $2, "\t\$2"; | |
279 | check_taint_not $3, "\t\$3"; | |
280 | ||
281 | /(.)/; # untaint $&, $`, $', $+, $1. | |
2575405e | 282 | check_taint_not $&, "\$& from /./"; |
63baef57 KW |
283 | |
284 | "aa" =~ /(.)\B(.)/; # taint $&, $`, $', $+, $1. | |
2575405e | 285 | check_taint $&, "\$& from /(.)\\B(.)/"; |
63baef57 KW |
286 | check_taint $`, "\t\$`"; |
287 | check_taint $', "\t\$'"; | |
288 | check_taint $+, "\t\$+"; | |
289 | check_taint $1, "\t\$1"; | |
290 | check_taint $2, "\t\$2"; | |
291 | check_taint_not $3, "\t\$3"; | |
292 | ||
293 | /(.)/; # untaint $&, $`, $', $+, $1. | |
2575405e | 294 | check_taint_not $&, "\$& from /./"; |
63baef57 KW |
295 | |
296 | "aaa" =~ /(.).(\1)/i; # notaint because not locale dependent | |
2575405e | 297 | check_taint_not $&, "\$ & from /(.).(\\1)/"; |
63baef57 KW |
298 | check_taint_not $`, "\t\$`"; |
299 | check_taint_not $', "\t\$'"; | |
300 | check_taint_not $+, "\t\$+"; | |
301 | check_taint_not $1, "\t\$1"; | |
302 | check_taint_not $2, "\t\$2"; | |
2575405e | 303 | check_taint_not $3, "\t\$3"; |
63baef57 KW |
304 | |
305 | /(.)/; # untaint $&, $`, $', $+, $1. | |
2575405e | 306 | check_taint_not $&, "\$ & from /./"; |
63baef57 | 307 | |
8ebc5c01 | 308 | $_ = $a; # untaint $_ |
309 | ||
2575405e | 310 | check_taint_not $_, 'untainting $_ works'; |
8ebc5c01 | 311 | |
312 | /(b)/; # this must not taint | |
2575405e | 313 | check_taint_not $&, "\$ & from /(b)/"; |
fdf73a7f KW |
314 | check_taint_not $`, "\t\$`"; |
315 | check_taint_not $', "\t\$'"; | |
316 | check_taint_not $+, "\t\$+"; | |
317 | check_taint_not $1, "\t\$1"; | |
318 | check_taint_not $2, "\t\$2"; | |
8ebc5c01 | 319 | |
320 | $_ = $a; # untaint $_ | |
321 | ||
2575405e | 322 | check_taint_not $_, 'untainting $_ works'; |
8ebc5c01 | 323 | |
324 | $b = uc($a); # taint $b | |
325 | s/(.+)/$b/; # this must taint only the $_ | |
326 | ||
2575405e | 327 | check_taint $_, '$_ (wasn\'t tainted) from s/(.+)/$b/ where $b is tainted'; |
fdf73a7f KW |
328 | check_taint_not $&, "\t\$&"; |
329 | check_taint_not $`, "\t\$`"; | |
330 | check_taint_not $', "\t\$'"; | |
331 | check_taint_not $+, "\t\$+"; | |
332 | check_taint_not $1, "\t\$1"; | |
333 | check_taint_not $2, "\t\$2"; | |
8ebc5c01 | 334 | |
335 | $_ = $a; # untaint $_ | |
336 | ||
337 | s/(.+)/b/; # this must not taint | |
2575405e | 338 | check_taint_not $_, '$_ (wasn\'t tainted) from s/(.+)/b/'; |
fdf73a7f KW |
339 | check_taint_not $&, "\t\$&"; |
340 | check_taint_not $`, "\t\$`"; | |
341 | check_taint_not $', "\t\$'"; | |
342 | check_taint_not $+, "\t\$+"; | |
343 | check_taint_not $1, "\t\$1"; | |
344 | check_taint_not $2, "\t\$2"; | |
8ebc5c01 | 345 | |
346 | $b = $a; # untaint $b | |
347 | ||
348 | ($b = $a) =~ s/\w/$&/; | |
2575405e KW |
349 | check_taint $b, '$b from ($b = $a) =~ s/\w/$&/'; # $b should be tainted. |
350 | check_taint_not $a, '$a from ($b = $a) =~ s/\w/$&/'; # $a should be not. | |
8ebc5c01 | 351 | |
352 | $_ = $a; # untaint $_ | |
353 | ||
354 | s/(\w)/\l$1/; # this must taint | |
2575405e | 355 | check_taint $_, '$_ (wasn\'t tainted) from s/(\w)/\l$1/,'; # this must taint |
fdf73a7f KW |
356 | check_taint $&, "\t\$&"; |
357 | check_taint $`, "\t\$`"; | |
358 | check_taint $', "\t\$'"; | |
359 | check_taint $+, "\t\$+"; | |
360 | check_taint $1, "\t\$1"; | |
361 | check_taint_not $2, "\t\$2"; | |
8ebc5c01 | 362 | |
363 | $_ = $a; # untaint $_ | |
364 | ||
365 | s/(\w)/\L$1/; # this must taint | |
2575405e | 366 | check_taint $_, '$_ (wasn\'t tainted) from s/(\w)/\L$1/,'; |
fdf73a7f KW |
367 | check_taint $&, "\t\$&"; |
368 | check_taint $`, "\t\$`"; | |
369 | check_taint $', "\t\$'"; | |
370 | check_taint $+, "\t\$+"; | |
371 | check_taint $1, "\t\$1"; | |
372 | check_taint_not $2, "\t\$2"; | |
8ebc5c01 | 373 | |
374 | $_ = $a; # untaint $_ | |
375 | ||
376 | s/(\w)/\u$1/; # this must taint | |
2575405e | 377 | check_taint $_, '$_ (wasn\'t tainted) from s/(\w)/\u$1/'; |
fdf73a7f KW |
378 | check_taint $&, "\t\$&"; |
379 | check_taint $`, "\t\$`"; | |
380 | check_taint $', "\t\$'"; | |
381 | check_taint $+, "\t\$+"; | |
382 | check_taint $1, "\t\$1"; | |
383 | check_taint_not $2, "\t\$2"; | |
8ebc5c01 | 384 | |
385 | $_ = $a; # untaint $_ | |
386 | ||
387 | s/(\w)/\U$1/; # this must taint | |
2575405e | 388 | check_taint $_, '$_ (wasn\'t tainted) from s/(\w)/\U$1/'; |
fdf73a7f KW |
389 | check_taint $&, "\t\$&"; |
390 | check_taint $`, "\t\$`"; | |
391 | check_taint $', "\t\$'"; | |
392 | check_taint $+, "\t\$+"; | |
393 | check_taint $1, "\t\$1"; | |
394 | check_taint_not $2, "\t\$2"; | |
8ebc5c01 | 395 | |
faab3281 KW |
396 | # After all this tainting $a should be cool. |
397 | ||
2575405e | 398 | check_taint_not $a, '$a still not tainted'; |
faab3281 | 399 | |
b99851e1 KW |
400 | "a" =~ /([a-z])/; |
401 | check_taint_not $1, '"a" =~ /([a-z])/'; | |
402 | "foo.bar_baz" =~ /^(.*)[._](.*?)$/; # Bug 120675 | |
403 | check_taint_not $1, '"foo.bar_baz" =~ /^(.*)[._](.*?)$/'; | |
404 | ||
7a9d1c02 | 405 | # BE SURE TO COPY ANYTHING YOU ADD to the block below |
8ebc5c01 | 406 | |
66cbab2c KW |
407 | { # This is just the previous tests copied here with a different |
408 | # compile-time pragma. | |
409 | ||
410 | use locale ':not_characters'; # engage restricted locale with different | |
411 | # tainting rules | |
b6fb4de7 KW |
412 | check_taint_not $a, '$a'; |
413 | ||
414 | check_taint_not uc($a), 'uc($a)'; | |
415 | check_taint_not "\U$a", '"\U$a"'; | |
416 | check_taint_not ucfirst($a), 'ucfirst($a)'; | |
417 | check_taint_not "\u$a", '"\u$a"'; | |
418 | check_taint_not lc($a), 'lc($a)'; | |
419 | check_taint_not fc($a), 'fc($a)'; | |
420 | check_taint_not "\L$a", '"\L$a"'; | |
421 | check_taint_not "\F$a", '"\F$a"'; | |
422 | check_taint_not lcfirst($a), 'lcfirst($a)'; | |
423 | check_taint_not "\l$a", '"\l$a"'; | |
424 | ||
425 | check_taint_not sprintf('%e', 123.456), "sprintf('%e', 123.456)"; | |
426 | check_taint_not sprintf('%f', 123.456), "sprintf('%f', 123.456)"; | |
427 | check_taint_not sprintf('%g', 123.456), "sprintf('%g', 123.456)"; | |
428 | check_taint_not sprintf('%d', 123.456), "sprintf('%d', 123.456)"; | |
429 | check_taint_not sprintf('%x', 123.456), "sprintf('%x', 123.456)"; | |
66cbab2c KW |
430 | |
431 | $_ = $a; # untaint $_ | |
432 | ||
b6fb4de7 | 433 | $_ = uc($a); |
66cbab2c | 434 | |
b6fb4de7 | 435 | check_taint_not $_, '$_ = uc($a)'; |
66cbab2c | 436 | |
b6fb4de7 KW |
437 | /(\w)/; |
438 | check_taint_not $&, "\$& from /(\\w)/"; | |
439 | check_taint_not $`, "\t\$`"; | |
440 | check_taint_not $', "\t\$'"; | |
441 | check_taint_not $+, "\t\$+"; | |
442 | check_taint_not $1, "\t\$1"; | |
443 | check_taint_not $2, "\t\$2"; | |
66cbab2c KW |
444 | |
445 | /(.)/; # untaint $&, $`, $', $+, $1. | |
b6fb4de7 KW |
446 | check_taint_not $&, "\$& from /(.)/"; |
447 | check_taint_not $`, "\t\$`"; | |
448 | check_taint_not $', "\t\$'"; | |
449 | check_taint_not $+, "\t\$+"; | |
450 | check_taint_not $1, "\t\$1"; | |
451 | check_taint_not $2, "\t\$2"; | |
452 | ||
453 | /(\W)/; | |
454 | check_taint_not $&, "\$& from /(\\W)/"; | |
455 | check_taint_not $`, "\t\$`"; | |
456 | check_taint_not $', "\t\$'"; | |
457 | check_taint_not $+, "\t\$+"; | |
458 | check_taint_not $1, "\t\$1"; | |
459 | check_taint_not $2, "\t\$2"; | |
66cbab2c | 460 | |
b6fb4de7 KW |
461 | /(.)/; # untaint $&, $`, $', $+, $1. |
462 | check_taint_not $&, "\$& from /(.)/"; | |
463 | check_taint_not $`, "\t\$`"; | |
464 | check_taint_not $', "\t\$'"; | |
465 | check_taint_not $+, "\t\$+"; | |
466 | check_taint_not $1, "\t\$1"; | |
467 | check_taint_not $2, "\t\$2"; | |
468 | ||
469 | /(\s)/; | |
470 | check_taint_not $&, "\$& from /(\\s)/"; | |
471 | check_taint_not $`, "\t\$`"; | |
472 | check_taint_not $', "\t\$'"; | |
473 | check_taint_not $+, "\t\$+"; | |
474 | check_taint_not $1, "\t\$1"; | |
475 | check_taint_not $2, "\t\$2"; | |
66cbab2c | 476 | |
b6fb4de7 KW |
477 | /(.)/; # untaint $&, $`, $', $+, $1. |
478 | check_taint_not $&, "\$& from /(.)/"; | |
66cbab2c | 479 | |
b6fb4de7 KW |
480 | /(\S)/; |
481 | check_taint_not $&, "\$& from /(\\S)/"; | |
482 | check_taint_not $`, "\t\$`"; | |
483 | check_taint_not $', "\t\$'"; | |
484 | check_taint_not $+, "\t\$+"; | |
485 | check_taint_not $1, "\t\$1"; | |
486 | check_taint_not $2, "\t\$2"; | |
66cbab2c | 487 | |
b6fb4de7 KW |
488 | /(.)/; # untaint $&, $`, $', $+, $1. |
489 | check_taint_not $&, "\$& from /(.)/"; | |
490 | ||
491 | "0" =~ /(\d)/; | |
492 | check_taint_not $&, "\$& from /(\\d)/"; | |
493 | check_taint_not $`, "\t\$`"; | |
494 | check_taint_not $', "\t\$'"; | |
495 | check_taint_not $+, "\t\$+"; | |
496 | check_taint_not $1, "\t\$1"; | |
497 | check_taint_not $2, "\t\$2"; | |
498 | ||
499 | /(.)/; # untaint $&, $`, $', $+, $1. | |
500 | check_taint_not $&, "\$& from /(.)/"; | |
66cbab2c | 501 | |
b6fb4de7 KW |
502 | /(\D)/; |
503 | check_taint_not $&, "\$& from /(\\D)/"; | |
504 | check_taint_not $`, "\t\$`"; | |
505 | check_taint_not $', "\t\$'"; | |
506 | check_taint_not $+, "\t\$+"; | |
507 | check_taint_not $1, "\t\$1"; | |
508 | check_taint_not $2, "\t\$2"; | |
66cbab2c | 509 | |
b6fb4de7 KW |
510 | /(.)/; # untaint $&, $`, $', $+, $1. |
511 | check_taint_not $&, "\$& from /(.)/"; | |
512 | ||
513 | /([[:alnum:]])/; | |
514 | check_taint_not $&, "\$& from /([[:alnum:]])/"; | |
515 | check_taint_not $`, "\t\$`"; | |
516 | check_taint_not $', "\t\$'"; | |
517 | check_taint_not $+, "\t\$+"; | |
518 | check_taint_not $1, "\t\$1"; | |
519 | check_taint_not $2, "\t\$2"; | |
520 | ||
521 | /(.)/; # untaint $&, $`, $', $+, $1. | |
522 | check_taint_not $&, "\$& from /(.)/"; | |
523 | ||
524 | /([[:^alnum:]])/; | |
525 | check_taint_not $&, "\$& from /([[:^alnum:]])/"; | |
526 | check_taint_not $`, "\t\$`"; | |
527 | check_taint_not $', "\t\$'"; | |
528 | check_taint_not $+, "\t\$+"; | |
529 | check_taint_not $1, "\t\$1"; | |
530 | check_taint_not $2, "\t\$2"; | |
531 | ||
532 | "a" =~ /(a)|(\w)/; | |
533 | check_taint_not $&, "\$& from /(a)|(\\w)/"; | |
534 | check_taint_not $`, "\t\$`"; | |
535 | check_taint_not $', "\t\$'"; | |
536 | check_taint_not $+, "\t\$+"; | |
537 | check_taint_not $1, "\t\$1"; | |
538 | ok($1 eq 'a', ("\t" x 5) . "\$1 is 'a'"); | |
539 | ok(! defined $2, ("\t" x 5) . "\$2 is undefined"); | |
540 | check_taint_not $2, "\t\$2"; | |
541 | check_taint_not $3, "\t\$3"; | |
542 | ||
543 | /(.)/; # untaint $&, $`, $', $+, $1. | |
544 | check_taint_not $&, "\$& from /(.)/"; | |
545 | ||
546 | "\N{CYRILLIC SMALL LETTER A}" =~ /(\N{CYRILLIC CAPITAL LETTER A})/i; | |
547 | check_taint_not $&, "\$& from /(\\N{CYRILLIC CAPITAL LETTER A})/i"; | |
548 | check_taint_not $`, "\t\$`"; | |
549 | check_taint_not $', "\t\$'"; | |
550 | check_taint_not $+, "\t\$+"; | |
551 | check_taint_not $1, "\t\$1"; | |
552 | ok($1 eq "\N{CYRILLIC SMALL LETTER A}", ("\t" x 4) . "\t\$1 is 'small cyrillic a'"); | |
553 | check_taint_not $2, "\t\$2"; | |
554 | ||
555 | /(.)/; # untaint $&, $`, $', $+, $1. | |
556 | check_taint_not $&, "\$& from /./"; | |
66cbab2c | 557 | |
b6fb4de7 KW |
558 | "(\N{KELVIN SIGN})" =~ /(\N{KELVIN SIGN})/i; |
559 | check_taint_not $&, "\$& from /(\\N{KELVIN SIGN})/i"; | |
560 | check_taint_not $`, "\t\$`"; | |
561 | check_taint_not $', "\t\$'"; | |
562 | check_taint_not $+, "\t\$+"; | |
563 | check_taint_not $1, "\t\$1"; | |
564 | check_taint_not $2, "\t\$2"; | |
565 | ||
566 | /(.)/; # untaint $&, $`, $', $+, $1. | |
567 | check_taint_not $&, "\$& from /(.)/"; | |
568 | ||
569 | "a:" =~ /(.)\b(.)/; | |
570 | check_taint_not $&, "\$& from /(.)\\b(.)/"; | |
571 | check_taint_not $`, "\t\$`"; | |
572 | check_taint_not $', "\t\$'"; | |
573 | check_taint_not $+, "\t\$+"; | |
574 | check_taint_not $1, "\t\$1"; | |
575 | check_taint_not $2, "\t\$2"; | |
576 | check_taint_not $3, "\t\$3"; | |
577 | ||
578 | /(.)/; # untaint $&, $`, $', $+, $1. | |
579 | check_taint_not $&, "\$& from /./"; | |
580 | ||
581 | "aa" =~ /(.)\B(.)/; | |
582 | check_taint_not $&, "\$& from /(.)\\B(.)/"; | |
583 | check_taint_not $`, "\t\$`"; | |
584 | check_taint_not $', "\t\$'"; | |
585 | check_taint_not $+, "\t\$+"; | |
586 | check_taint_not $1, "\t\$1"; | |
587 | check_taint_not $2, "\t\$2"; | |
588 | check_taint_not $3, "\t\$3"; | |
589 | ||
590 | /(.)/; # untaint $&, $`, $', $+, $1. | |
591 | check_taint_not $&, "\$& from /./"; | |
592 | ||
593 | "aaa" =~ /(.).(\1)/i; # notaint because not locale dependent | |
594 | check_taint_not $&, "\$ & from /(.).(\\1)/"; | |
595 | check_taint_not $`, "\t\$`"; | |
596 | check_taint_not $', "\t\$'"; | |
597 | check_taint_not $+, "\t\$+"; | |
598 | check_taint_not $1, "\t\$1"; | |
599 | check_taint_not $2, "\t\$2"; | |
600 | check_taint_not $3, "\t\$3"; | |
601 | ||
602 | /(.)/; # untaint $&, $`, $', $+, $1. | |
603 | check_taint_not $&, "\$ & from /./"; | |
604 | ||
605 | $_ = $a; # untaint $_ | |
606 | ||
607 | check_taint_not $_, 'untainting $_ works'; | |
608 | ||
609 | /(b)/; | |
610 | check_taint_not $&, "\$ & from /(b)/"; | |
611 | check_taint_not $`, "\t\$`"; | |
612 | check_taint_not $', "\t\$'"; | |
613 | check_taint_not $+, "\t\$+"; | |
614 | check_taint_not $1, "\t\$1"; | |
615 | check_taint_not $2, "\t\$2"; | |
66cbab2c KW |
616 | |
617 | $_ = $a; # untaint $_ | |
618 | ||
b6fb4de7 KW |
619 | check_taint_not $_, 'untainting $_ works'; |
620 | ||
621 | s/(.+)/b/; | |
622 | check_taint_not $_, '$_ (wasn\'t tainted) from s/(.+)/b/'; | |
623 | check_taint_not $&, "\t\$&"; | |
624 | check_taint_not $`, "\t\$`"; | |
625 | check_taint_not $', "\t\$'"; | |
626 | check_taint_not $+, "\t\$+"; | |
627 | check_taint_not $1, "\t\$1"; | |
628 | check_taint_not $2, "\t\$2"; | |
66cbab2c KW |
629 | |
630 | $b = $a; # untaint $b | |
631 | ||
632 | ($b = $a) =~ s/\w/$&/; | |
b6fb4de7 KW |
633 | check_taint_not $b, '$b from ($b = $a) =~ s/\w/$&/'; |
634 | check_taint_not $a, '$a from ($b = $a) =~ s/\w/$&/'; | |
66cbab2c KW |
635 | |
636 | $_ = $a; # untaint $_ | |
637 | ||
b6fb4de7 KW |
638 | s/(\w)/\l$1/; |
639 | check_taint_not $_, '$_ (wasn\'t tainted) from s/(\w)/\l$1/,'; # this must taint | |
640 | check_taint_not $&, "\t\$&"; | |
641 | check_taint_not $`, "\t\$`"; | |
642 | check_taint_not $', "\t\$'"; | |
643 | check_taint_not $+, "\t\$+"; | |
644 | check_taint_not $1, "\t\$1"; | |
645 | check_taint_not $2, "\t\$2"; | |
66cbab2c KW |
646 | |
647 | $_ = $a; # untaint $_ | |
648 | ||
b6fb4de7 KW |
649 | s/(\w)/\L$1/; |
650 | check_taint_not $_, '$_ (wasn\'t tainted) from s/(\w)/\L$1/,'; | |
651 | check_taint_not $&, "\t\$&"; | |
652 | check_taint_not $`, "\t\$`"; | |
653 | check_taint_not $', "\t\$'"; | |
654 | check_taint_not $+, "\t\$+"; | |
655 | check_taint_not $1, "\t\$1"; | |
656 | check_taint_not $2, "\t\$2"; | |
66cbab2c KW |
657 | |
658 | $_ = $a; # untaint $_ | |
659 | ||
b6fb4de7 KW |
660 | s/(\w)/\u$1/; |
661 | check_taint_not $_, '$_ (wasn\'t tainted) from s/(\w)/\u$1/'; | |
662 | check_taint_not $&, "\t\$&"; | |
663 | check_taint_not $`, "\t\$`"; | |
664 | check_taint_not $', "\t\$'"; | |
665 | check_taint_not $+, "\t\$+"; | |
666 | check_taint_not $1, "\t\$1"; | |
667 | check_taint_not $2, "\t\$2"; | |
66cbab2c KW |
668 | |
669 | $_ = $a; # untaint $_ | |
670 | ||
b6fb4de7 KW |
671 | s/(\w)/\U$1/; |
672 | check_taint_not $_, '$_ (wasn\'t tainted) from s/(\w)/\U$1/'; | |
673 | check_taint_not $&, "\t\$&"; | |
674 | check_taint_not $`, "\t\$`"; | |
675 | check_taint_not $', "\t\$'"; | |
676 | check_taint_not $+, "\t\$+"; | |
677 | check_taint_not $1, "\t\$1"; | |
678 | check_taint_not $2, "\t\$2"; | |
66cbab2c KW |
679 | |
680 | # After all this tainting $a should be cool. | |
681 | ||
b6fb4de7 | 682 | check_taint_not $a, '$a still not tainted'; |
7a9d1c02 KW |
683 | |
684 | "a" =~ /([a-z])/; | |
685 | check_taint_not $1, '"a" =~ /([a-z])/'; | |
686 | "foo.bar_baz" =~ /^(.*)[._](.*?)$/; # Bug 120675 | |
687 | check_taint_not $1, '"foo.bar_baz" =~ /^(.*)[._](.*?)$/'; | |
b6fb4de7 | 688 | |
66cbab2c KW |
689 | } |
690 | ||
691 | # Here are in scope of 'use locale' | |
692 | ||
8ebc5c01 | 693 | # I think we've seen quite enough of taint. |
694 | # Let us do some *real* locale work now, | |
284102e8 | 695 | # unless setlocale() is missing (i.e. minitest). |
8ebc5c01 | 696 | |
6cf0b567 | 697 | # The test number before our first setlocale() |
66330f13 | 698 | my $final_without_setlocale = $test_num; |
6cf0b567 | 699 | |
284102e8 JH |
700 | # Find locales. |
701 | ||
6be75cd7 JH |
702 | debug "# Scanning for locales...\n"; |
703 | ||
9b0711ee KW |
704 | require POSIX; import POSIX ':locale_h'; |
705 | ||
706 | my @Locale = find_locales([ &POSIX::LC_CTYPE, &POSIX::LC_ALL ]); | |
4599a1de | 707 | |
887ef7ed PP |
708 | debug "# Locales =\n"; |
709 | for ( @Locale ) { | |
710 | debug "# $_\n"; | |
711 | } | |
8ebc5c01 | 712 | |
d369fd5b KW |
713 | unless (@Locale) { |
714 | print "1..$test_num\n"; | |
715 | exit; | |
716 | } | |
717 | ||
d369fd5b KW |
718 | |
719 | setlocale(&POSIX::LC_ALL, "C"); | |
720 | ||
73fc293b KW |
721 | my %posixes; |
722 | ||
284102e8 | 723 | my %Problem; |
2a680da6 JH |
724 | my %Okay; |
725 | my %Testing; | |
30032ef4 | 726 | my @Added_alpha; # Alphas that aren't in the C locale. |
c08acc4c | 727 | my %test_names; |
284102e8 | 728 | |
705af3af | 729 | sub disp_chars { |
019bf7dd KW |
730 | # This returns a display string denoting the input parameter @_, each |
731 | # entry of which is a single character in the range 0-255. The first part | |
732 | # of the output is a string of the characters in @_ that are ASCII | |
733 | # graphics, and hence unambiguously displayable. They are given by code | |
734 | # point order. The second part is the remaining code points, the ordinals | |
735 | # of which are each displayed as 2-digit hex. Blanks are inserted so as | |
736 | # to keep anything from the first part looking like a 2-digit hex number. | |
737 | ||
738 | no locale; | |
739 | my @chars = sort { ord $a <=> ord $b } @_; | |
740 | my $output = ""; | |
019bf7dd KW |
741 | my $range_start; |
742 | my $start_class; | |
743 | push @chars, chr(258); # This sentinel simplifies the loop termination | |
744 | # logic | |
745 | foreach my $i (0 .. @chars - 1) { | |
746 | my $char = $chars[$i]; | |
747 | my $range_end; | |
748 | my $class; | |
749 | ||
750 | # We avoid using [:posix:] classes, as these are being tested in this | |
751 | # file. Each equivalence class below is for things that can appear in | |
752 | # a range; those that can't be in a range have class -1. 0 for those | |
753 | # which should be output in hex; and >0 for the other ranges | |
754 | if ($char =~ /[A-Z]/) { | |
755 | $class = 2; | |
756 | } | |
757 | elsif ($char =~ /[a-z]/) { | |
758 | $class = 3; | |
759 | } | |
760 | elsif ($char =~ /[0-9]/) { | |
761 | $class = 4; | |
762 | } | |
78fbfaf5 KW |
763 | # Uncomment to get literal punctuation displayed instead of hex |
764 | #elsif ($char =~ /[[\]!"#\$\%&\'()*+,.\/:\\;<=>?\@\^_`{|}~-]/) { | |
765 | # $class = -1; # Punct never appears in a range | |
766 | #} | |
019bf7dd KW |
767 | else { |
768 | $class = 0; # Output in hex | |
769 | } | |
770 | ||
771 | if (! defined $range_start) { | |
772 | if ($class < 0) { | |
78fbfaf5 | 773 | $output .= " " . $char; |
019bf7dd KW |
774 | } |
775 | else { | |
776 | $range_start = ord $char; | |
777 | $start_class = $class; | |
778 | } | |
779 | } # A range ends if not consecutive, or the class-type changes | |
780 | elsif (ord $char != ($range_end = ord($chars[$i-1])) + 1 | |
781 | || $class != $start_class) | |
782 | { | |
783 | ||
784 | # Here, the current character is not in the range. This means the | |
785 | # previous character must have been. Output the range up through | |
786 | # that one. | |
787 | my $range_length = $range_end - $range_start + 1; | |
788 | if ($start_class > 0) { | |
789 | $output .= " " . chr($range_start); | |
790 | $output .= "-" . chr($range_end) if $range_length > 1; | |
791 | } | |
792 | else { | |
78fbfaf5 KW |
793 | $output .= sprintf(" %02X", $range_start); |
794 | $output .= sprintf("-%02X", $range_end) if $range_length > 1; | |
019bf7dd KW |
795 | } |
796 | ||
797 | # Handle the new current character, as potentially beginning a new | |
798 | # range | |
799 | undef $range_start; | |
800 | redo; | |
801 | } | |
802 | } | |
803 | ||
804 | $output =~ s/^ //; | |
78fbfaf5 | 805 | return $output; |
019bf7dd KW |
806 | } |
807 | ||
30032ef4 KW |
808 | sub report_result { |
809 | my ($Locale, $i, $pass_fail, $message) = @_; | |
15bbd6a2 KW |
810 | $message //= ""; |
811 | $message = " ($message)" if $message; | |
30032ef4 | 812 | unless ($pass_fail) { |
2a680da6 | 813 | $Problem{$i}{$Locale} = 1; |
baae13cb | 814 | debug "# failed $i ($test_names{$i}) with locale '$Locale'$message\n"; |
2a680da6 JH |
815 | } else { |
816 | push @{$Okay{$i}}, $Locale; | |
817 | } | |
818 | } | |
819 | ||
7c844d17 KW |
820 | sub report_multi_result { |
821 | my ($Locale, $i, $results_ref) = @_; | |
822 | ||
823 | # $results_ref points to an array, each element of which is a character that was | |
824 | # in error for this test numbered '$i'. If empty, the test passed | |
825 | ||
826 | my $message = ""; | |
827 | if (@$results_ref) { | |
705af3af | 828 | $message = join " ", "for", disp_chars(@$results_ref); |
7c844d17 KW |
829 | } |
830 | report_result($Locale, $i, @$results_ref == 0, $message); | |
831 | } | |
832 | ||
c4093d7d KW |
833 | my $first_locales_test_number = $final_without_setlocale + 1; |
834 | my $locales_test_number; | |
835 | my $not_necessarily_a_problem_test_number; | |
6d5d702a | 836 | my $first_casing_test_number; |
c4093d7d KW |
837 | my %setlocale_failed; # List of locales that setlocale() didn't work on |
838 | ||
73fc293b | 839 | foreach my $Locale (@Locale) { |
c4093d7d | 840 | $locales_test_number = $first_locales_test_number - 1; |
78fbfaf5 | 841 | debug "#\n"; |
284102e8 | 842 | debug "# Locale = $Locale\n"; |
284102e8 | 843 | |
a810e350 | 844 | unless (setlocale(&POSIX::LC_ALL, $Locale)) { |
c4093d7d | 845 | $setlocale_failed{$Locale} = $Locale; |
284102e8 | 846 | next; |
8ebc5c01 | 847 | } |
8ebc5c01 | 848 | |
31f05a37 KW |
849 | # We test UTF-8 locales only under ':not_characters'; It is easier to |
850 | # test them in other test files than here. Non- UTF-8 locales are tested | |
851 | # only under plain 'use locale', as otherwise we would have to convert | |
852 | # everything in them to Unicode. | |
66cbab2c | 853 | |
95eaa1bf KW |
854 | my %UPPER = (); # All alpha X for which uc(X) == X and lc(X) != X |
855 | my %lower = (); # All alpha X for which lc(X) == X and uc(X) != X | |
856 | my %BoThCaSe = (); # All alpha X for which uc(X) == lc(X) == X | |
66cbab2c | 857 | |
ab8b8bcc | 858 | my $is_utf8_locale = is_locale_utf8($Locale); |
b9df08e4 KW |
859 | |
860 | debug "# is utf8 locale? = $is_utf8_locale\n"; | |
861 | ||
862 | my $radix = localeconv()->{decimal_point}; | |
863 | if ($radix !~ / ^ [[:ascii:]] + $/x) { | |
864 | use bytes; | |
865 | $radix = disp_chars(split "", $radix); | |
866 | } | |
867 | debug "# radix = $radix\n"; | |
868 | ||
66cbab2c KW |
869 | if (! $is_utf8_locale) { |
870 | use locale; | |
fb844330 KW |
871 | @{$posixes{'word'}} = grep /\w/, map { chr } 0..255; |
872 | @{$posixes{'digit'}} = grep /\d/, map { chr } 0..255; | |
873 | @{$posixes{'space'}} = grep /\s/, map { chr } 0..255; | |
874 | @{$posixes{'alpha'}} = grep /[[:alpha:]]/, map {chr } 0..255; | |
875 | @{$posixes{'alnum'}} = grep /[[:alnum:]]/, map {chr } 0..255; | |
876 | @{$posixes{'ascii'}} = grep /[[:ascii:]]/, map {chr } 0..255; | |
877 | @{$posixes{'blank'}} = grep /[[:blank:]]/, map {chr } 0..255; | |
878 | @{$posixes{'cntrl'}} = grep /[[:cntrl:]]/, map {chr } 0..255; | |
879 | @{$posixes{'graph'}} = grep /[[:graph:]]/, map {chr } 0..255; | |
880 | @{$posixes{'lower'}} = grep /[[:lower:]]/, map {chr } 0..255; | |
881 | @{$posixes{'print'}} = grep /[[:print:]]/, map {chr } 0..255; | |
98ed56e7 | 882 | @{$posixes{'punct'}} = grep /[[:punct:]]/, map {chr } 0..255; |
fb844330 KW |
883 | @{$posixes{'upper'}} = grep /[[:upper:]]/, map {chr } 0..255; |
884 | @{$posixes{'xdigit'}} = grep /[[:xdigit:]]/, map {chr } 0..255; | |
885 | @{$posixes{'cased'}} = grep /[[:upper:]]/i, map {chr } 0..255; | |
e5272a46 | 886 | |
71e5cbb3 KW |
887 | # Sieve the uppercase and the lowercase. |
888 | ||
fb844330 | 889 | for (@{$posixes{'word'}}) { |
71e5cbb3 KW |
890 | if (/[^\d_]/) { # skip digits and the _ |
891 | if (uc($_) eq $_) { | |
892 | $UPPER{$_} = $_; | |
893 | } | |
894 | if (lc($_) eq $_) { | |
895 | $lower{$_} = $_; | |
896 | } | |
897 | } | |
898 | } | |
66cbab2c KW |
899 | } |
900 | else { | |
901 | use locale ':not_characters'; | |
fb844330 KW |
902 | @{$posixes{'word'}} = grep /\w/, map { chr } 0..255; |
903 | @{$posixes{'digit'}} = grep /\d/, map { chr } 0..255; | |
904 | @{$posixes{'space'}} = grep /\s/, map { chr } 0..255; | |
905 | @{$posixes{'alpha'}} = grep /[[:alpha:]]/, map {chr } 0..255; | |
906 | @{$posixes{'alnum'}} = grep /[[:alnum:]]/, map {chr } 0..255; | |
907 | @{$posixes{'ascii'}} = grep /[[:ascii:]]/, map {chr } 0..255; | |
908 | @{$posixes{'blank'}} = grep /[[:blank:]]/, map {chr } 0..255; | |
909 | @{$posixes{'cntrl'}} = grep /[[:cntrl:]]/, map {chr } 0..255; | |
910 | @{$posixes{'graph'}} = grep /[[:graph:]]/, map {chr } 0..255; | |
911 | @{$posixes{'lower'}} = grep /[[:lower:]]/, map {chr } 0..255; | |
912 | @{$posixes{'print'}} = grep /[[:print:]]/, map {chr } 0..255; | |
98ed56e7 | 913 | @{$posixes{'punct'}} = grep /[[:punct:]]/, map {chr } 0..255; |
fb844330 KW |
914 | @{$posixes{'upper'}} = grep /[[:upper:]]/, map {chr } 0..255; |
915 | @{$posixes{'xdigit'}} = grep /[[:xdigit:]]/, map {chr } 0..255; | |
916 | @{$posixes{'cased'}} = grep /[[:upper:]]/i, map {chr } 0..255; | |
917 | for (@{$posixes{'word'}}) { | |
66cbab2c KW |
918 | if (/[^\d_]/) { # skip digits and the _ |
919 | if (uc($_) eq $_) { | |
920 | $UPPER{$_} = $_; | |
921 | } | |
922 | if (lc($_) eq $_) { | |
923 | $lower{$_} = $_; | |
924 | } | |
925 | } | |
926 | } | |
927 | } | |
a160ac48 | 928 | |
fb844330 KW |
929 | # Ordered, where possible, in groups of "this is a subset of the next |
930 | # one" | |
705af3af KW |
931 | debug "# :upper: = ", disp_chars(@{$posixes{'upper'}}), "\n"; |
932 | debug "# :lower: = ", disp_chars(@{$posixes{'lower'}}), "\n"; | |
933 | debug "# :cased: = ", disp_chars(@{$posixes{'cased'}}), "\n"; | |
934 | debug "# :alpha: = ", disp_chars(@{$posixes{'alpha'}}), "\n"; | |
935 | debug "# :alnum: = ", disp_chars(@{$posixes{'alnum'}}), "\n"; | |
936 | debug "# w = ", disp_chars(@{$posixes{'word'}}), "\n"; | |
937 | debug "# :graph: = ", disp_chars(@{$posixes{'graph'}}), "\n"; | |
938 | debug "# :print: = ", disp_chars(@{$posixes{'print'}}), "\n"; | |
939 | debug "# d = ", disp_chars(@{$posixes{'digit'}}), "\n"; | |
940 | debug "# :xdigit: = ", disp_chars(@{$posixes{'xdigit'}}), "\n"; | |
941 | debug "# :blank: = ", disp_chars(@{$posixes{'blank'}}), "\n"; | |
942 | debug "# s = ", disp_chars(@{$posixes{'space'}}), "\n"; | |
943 | debug "# :punct: = ", disp_chars(@{$posixes{'punct'}}), "\n"; | |
944 | debug "# :cntrl: = ", disp_chars(@{$posixes{'cntrl'}}), "\n"; | |
945 | debug "# :ascii: = ", disp_chars(@{$posixes{'ascii'}}), "\n"; | |
a160ac48 | 946 | |
284102e8 | 947 | foreach (keys %UPPER) { |
a160ac48 | 948 | |
097ee67d | 949 | $BoThCaSe{$_}++ if exists $lower{$_}; |
284102e8 JH |
950 | } |
951 | foreach (keys %lower) { | |
097ee67d | 952 | $BoThCaSe{$_}++ if exists $UPPER{$_}; |
284102e8 | 953 | } |
097ee67d | 954 | foreach (keys %BoThCaSe) { |
284102e8 JH |
955 | delete $UPPER{$_}; |
956 | delete $lower{$_}; | |
957 | } | |
958 | ||
e5cc0528 KW |
959 | my %Unassigned; |
960 | foreach my $ord ( 0 .. 255 ) { | |
961 | $Unassigned{chr $ord} = 1; | |
962 | } | |
963 | foreach my $class (keys %posixes) { | |
964 | foreach my $char (@{$posixes{$class}}) { | |
965 | delete $Unassigned{$char}; | |
966 | } | |
967 | } | |
968 | ||
b9df08e4 KW |
969 | debug "# UPPER = ", disp_chars(sort { ord $a <=> ord $b } keys %UPPER), "\n"; |
970 | debug "# lower = ", disp_chars(sort { ord $a <=> ord $b } keys %lower), "\n"; | |
971 | debug "# BoThCaSe = ", disp_chars(sort { ord $a <=> ord $b } keys %BoThCaSe), "\n"; | |
705af3af | 972 | debug "# Unassigned = ", disp_chars(sort { ord $a <=> ord $b } keys %Unassigned), "\n"; |
284102e8 | 973 | |
baa71cfd | 974 | my @failures; |
3da38613 | 975 | my @fold_failures; |
b9df08e4 | 976 | foreach my $x (sort { ord $a <=> ord $b } keys %UPPER) { |
baa71cfd | 977 | my $ok; |
3da38613 | 978 | my $fold_ok; |
baa71cfd KW |
979 | if ($is_utf8_locale) { |
980 | use locale ':not_characters'; | |
981 | $ok = $x =~ /[[:upper:]]/; | |
3da38613 | 982 | $fold_ok = $x =~ /[[:lower:]]/i; |
baa71cfd KW |
983 | } |
984 | else { | |
985 | use locale; | |
986 | $ok = $x =~ /[[:upper:]]/; | |
3da38613 | 987 | $fold_ok = $x =~ /[[:lower:]]/i; |
baa71cfd KW |
988 | } |
989 | push @failures, $x unless $ok; | |
3da38613 | 990 | push @fold_failures, $x unless $fold_ok; |
baa71cfd | 991 | } |
baa71cfd | 992 | $locales_test_number++; |
6d5d702a | 993 | $first_casing_test_number = $locales_test_number; |
95eaa1bf | 994 | $test_names{$locales_test_number} = 'Verify that /[[:upper:]]/ matches all alpha X for which uc(X) == X and lc(X) != X'; |
7c844d17 | 995 | report_multi_result($Locale, $locales_test_number, \@failures); |
6d5d702a | 996 | |
3da38613 | 997 | $locales_test_number++; |
6d5d702a | 998 | |
95eaa1bf | 999 | $test_names{$locales_test_number} = 'Verify that /[[:lower:]]/i matches all alpha X for which uc(X) == X and lc(X) != X'; |
7c844d17 | 1000 | report_multi_result($Locale, $locales_test_number, \@fold_failures); |
baa71cfd | 1001 | |
baa71cfd | 1002 | undef @failures; |
3da38613 | 1003 | undef @fold_failures; |
baa71cfd | 1004 | |
b9df08e4 | 1005 | foreach my $x (sort { ord $a <=> ord $b } keys %lower) { |
baa71cfd | 1006 | my $ok; |
3da38613 | 1007 | my $fold_ok; |
baa71cfd KW |
1008 | if ($is_utf8_locale) { |
1009 | use locale ':not_characters'; | |
1010 | $ok = $x =~ /[[:lower:]]/; | |
3da38613 | 1011 | $fold_ok = $x =~ /[[:upper:]]/i; |
baa71cfd KW |
1012 | } |
1013 | else { | |
1014 | use locale; | |
1015 | $ok = $x =~ /[[:lower:]]/; | |
3da38613 | 1016 | $fold_ok = $x =~ /[[:upper:]]/i; |
baa71cfd KW |
1017 | } |
1018 | push @failures, $x unless $ok; | |
3da38613 | 1019 | push @fold_failures, $x unless $fold_ok; |
baa71cfd KW |
1020 | } |
1021 | ||
1022 | $locales_test_number++; | |
95eaa1bf | 1023 | $test_names{$locales_test_number} = 'Verify that /[[:lower:]]/ matches all alpha X for which lc(X) == X and uc(X) != X'; |
7c844d17 KW |
1024 | report_multi_result($Locale, $locales_test_number, \@failures); |
1025 | ||
3da38613 | 1026 | $locales_test_number++; |
95eaa1bf | 1027 | $test_names{$locales_test_number} = 'Verify that /[[:upper:]]/i matches all alpha X for which lc(X) == X and uc(X) != X'; |
7c844d17 | 1028 | report_multi_result($Locale, $locales_test_number, \@fold_failures); |
baa71cfd | 1029 | |
9445c837 KW |
1030 | { # Find the alphabetic characters that are not considered alphabetics |
1031 | # in the default (C) locale. | |
8ebc5c01 | 1032 | |
284102e8 | 1033 | no locale; |
71e5cbb3 | 1034 | |
30032ef4 | 1035 | @Added_alpha = (); |
5e7a1028 | 1036 | for (keys %UPPER, keys %lower, keys %BoThCaSe) { |
30032ef4 | 1037 | push(@Added_alpha, $_) if (/\W/); |
284102e8 | 1038 | } |
8ebc5c01 | 1039 | } |
8ebc5c01 | 1040 | |
b9df08e4 | 1041 | @Added_alpha = sort { ord $a <=> ord $b } @Added_alpha; |
8ebc5c01 | 1042 | |
705af3af | 1043 | debug "# Added_alpha = ", disp_chars(@Added_alpha), "\n"; |
8ebc5c01 | 1044 | |
db31898d | 1045 | # Cross-check the whole 8-bit character set. |
8ebc5c01 | 1046 | |
db31898d KW |
1047 | ++$locales_test_number; |
1048 | my @f; | |
1049 | $test_names{$locales_test_number} = 'Verify that \w and [:word:] are identical'; | |
1050 | for (map { chr } 0..255) { | |
1051 | if ($is_utf8_locale) { | |
1052 | use locale ':not_characters'; | |
1053 | push @f, $_ unless /[[:word:]]/ == /\w/; | |
1054 | } | |
1055 | else { | |
1056 | push @f, $_ unless /[[:word:]]/ == /\w/; | |
1057 | } | |
1058 | } | |
1059 | report_multi_result($Locale, $locales_test_number, \@f); | |
8ebc5c01 | 1060 | |
db31898d KW |
1061 | ++$locales_test_number; |
1062 | undef @f; | |
1063 | $test_names{$locales_test_number} = 'Verify that \d and [:digit:] are identical'; | |
1064 | for (map { chr } 0..255) { | |
1065 | if ($is_utf8_locale) { | |
1066 | use locale ':not_characters'; | |
1067 | push @f, $_ unless /[[:digit:]]/ == /\d/; | |
1068 | } | |
1069 | else { | |
1070 | push @f, $_ unless /[[:digit:]]/ == /\d/; | |
1071 | } | |
1072 | } | |
1073 | report_multi_result($Locale, $locales_test_number, \@f); | |
32e8aa3f | 1074 | |
db31898d KW |
1075 | ++$locales_test_number; |
1076 | undef @f; | |
1077 | $test_names{$locales_test_number} = 'Verify that \s and [:space:] are identical'; | |
1078 | for (map { chr } 0..255) { | |
1079 | if ($is_utf8_locale) { | |
1080 | use locale ':not_characters'; | |
1081 | push @f, $_ unless /[[:space:]]/ == /\s/; | |
1082 | } | |
1083 | else { | |
1084 | push @f, $_ unless /[[:space:]]/ == /\s/; | |
1085 | } | |
1086 | } | |
1087 | report_multi_result($Locale, $locales_test_number, \@f); | |
32e8aa3f | 1088 | |
db31898d KW |
1089 | ++$locales_test_number; |
1090 | undef @f; | |
1091 | $test_names{$locales_test_number} = 'Verify that [:posix:] and [:^posix:] are mutually exclusive'; | |
1092 | for (map { chr } 0..255) { | |
1093 | if ($is_utf8_locale) { | |
1094 | use locale ':not_characters'; | |
1095 | push @f, $_ unless (/[[:alpha:]]/ xor /[[:^alpha:]]/) || | |
1096 | (/[[:alnum:]]/ xor /[[:^alnum:]]/) || | |
1097 | (/[[:ascii:]]/ xor /[[:^ascii:]]/) || | |
1098 | (/[[:blank:]]/ xor /[[:^blank:]]/) || | |
1099 | (/[[:cntrl:]]/ xor /[[:^cntrl:]]/) || | |
1100 | (/[[:digit:]]/ xor /[[:^digit:]]/) || | |
1101 | (/[[:graph:]]/ xor /[[:^graph:]]/) || | |
1102 | (/[[:lower:]]/ xor /[[:^lower:]]/) || | |
1103 | (/[[:print:]]/ xor /[[:^print:]]/) || | |
1104 | (/[[:space:]]/ xor /[[:^space:]]/) || | |
1105 | (/[[:upper:]]/ xor /[[:^upper:]]/) || | |
1106 | (/[[:word:]]/ xor /[[:^word:]]/) || | |
1107 | (/[[:xdigit:]]/ xor /[[:^xdigit:]]/) || | |
1108 | ||
1109 | # effectively is what [:cased:] would be if it existed. | |
1110 | (/[[:upper:]]/i xor /[[:^upper:]]/i); | |
1111 | } | |
1112 | else { | |
1113 | push @f, $_ unless (/[[:alpha:]]/ xor /[[:^alpha:]]/) || | |
1114 | (/[[:alnum:]]/ xor /[[:^alnum:]]/) || | |
1115 | (/[[:ascii:]]/ xor /[[:^ascii:]]/) || | |
1116 | (/[[:blank:]]/ xor /[[:^blank:]]/) || | |
1117 | (/[[:cntrl:]]/ xor /[[:^cntrl:]]/) || | |
1118 | (/[[:digit:]]/ xor /[[:^digit:]]/) || | |
1119 | (/[[:graph:]]/ xor /[[:^graph:]]/) || | |
1120 | (/[[:lower:]]/ xor /[[:^lower:]]/) || | |
1121 | (/[[:print:]]/ xor /[[:^print:]]/) || | |
1122 | (/[[:space:]]/ xor /[[:^space:]]/) || | |
1123 | (/[[:upper:]]/ xor /[[:^upper:]]/) || | |
1124 | (/[[:word:]]/ xor /[[:^word:]]/) || | |
1125 | (/[[:xdigit:]]/ xor /[[:^xdigit:]]/) || | |
1126 | (/[[:upper:]]/i xor /[[:^upper:]]/i); | |
1127 | } | |
1128 | } | |
1129 | report_multi_result($Locale, $locales_test_number, \@f); | |
32e8aa3f | 1130 | |
db31898d KW |
1131 | # The rules for the relationships are given in: |
1132 | # http://www.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap07.html | |
32e8aa3f | 1133 | |
660c7bcc KW |
1134 | |
1135 | ++$locales_test_number; | |
1136 | undef @f; | |
1137 | $test_names{$locales_test_number} = 'Verify that [:lower:] contains at least a-z'; | |
1138 | for ('a' .. 'z') { | |
1139 | if ($is_utf8_locale) { | |
1140 | use locale ':not_characters'; | |
1141 | push @f, $_ unless /[[:lower:]]/; | |
1142 | } | |
1143 | else { | |
1144 | push @f, $_ unless /[[:lower:]]/; | |
1145 | } | |
1146 | } | |
1147 | report_multi_result($Locale, $locales_test_number, \@f); | |
1148 | ||
db31898d KW |
1149 | ++$locales_test_number; |
1150 | undef @f; | |
1151 | $test_names{$locales_test_number} = 'Verify that [:lower:] is a subset of [:alpha:]'; | |
1152 | for (map { chr } 0..255) { | |
1153 | if ($is_utf8_locale) { | |
1154 | use locale ':not_characters'; | |
1155 | push @f, $_ if /[[:lower:]]/ and ! /[[:alpha:]]/; | |
1156 | } | |
1157 | else { | |
1158 | push @f, $_ if /[[:lower:]]/ and ! /[[:alpha:]]/; | |
1159 | } | |
1160 | } | |
1161 | report_multi_result($Locale, $locales_test_number, \@f); | |
32e8aa3f | 1162 | |
db31898d KW |
1163 | ++$locales_test_number; |
1164 | undef @f; | |
660c7bcc KW |
1165 | $test_names{$locales_test_number} = 'Verify that [:upper:] contains at least A-Z'; |
1166 | for ('A' .. 'Z') { | |
1167 | if ($is_utf8_locale) { | |
1168 | use locale ':not_characters'; | |
1169 | push @f, $_ unless /[[:upper:]]/; | |
1170 | } | |
1171 | else { | |
1172 | push @f, $_ unless /[[:upper:]]/; | |
1173 | } | |
1174 | } | |
1175 | report_multi_result($Locale, $locales_test_number, \@f); | |
1176 | ||
1177 | ++$locales_test_number; | |
1178 | undef @f; | |
db31898d KW |
1179 | $test_names{$locales_test_number} = 'Verify that [:upper:] is a subset of [:alpha:]'; |
1180 | for (map { chr } 0..255) { | |
1181 | if ($is_utf8_locale) { | |
1182 | use locale ':not_characters'; | |
1183 | push @f, $_ if /[[:upper:]]/ and ! /[[:alpha:]]/; | |
1184 | } | |
1185 | else { | |
1186 | push @f, $_ if /[[:upper:]]/ and ! /[[:alpha:]]/; | |
1187 | } | |
1188 | } | |
1189 | report_multi_result($Locale, $locales_test_number, \@f); | |
32e8aa3f | 1190 | |
db31898d KW |
1191 | ++$locales_test_number; |
1192 | undef @f; | |
1193 | $test_names{$locales_test_number} = 'Verify that /[[:lower:]]/i is a subset of [:alpha:]'; | |
1194 | for (map { chr } 0..255) { | |
1195 | if ($is_utf8_locale) { | |
1196 | use locale ':not_characters'; | |
1197 | push @f, $_ if /[[:lower:]]/i and ! /[[:alpha:]]/; | |
1198 | } | |
1199 | else { | |
1200 | push @f, $_ if /[[:lower:]]/i and ! /[[:alpha:]]/; | |
1201 | } | |
1202 | } | |
1203 | report_multi_result($Locale, $locales_test_number, \@f); | |
32e8aa3f | 1204 | |
db31898d KW |
1205 | ++$locales_test_number; |
1206 | undef @f; | |
1207 | $test_names{$locales_test_number} = 'Verify that [:alpha:] is a subset of [:alnum:]'; | |
1208 | for (map { chr } 0..255) { | |
1209 | if ($is_utf8_locale) { | |
1210 | use locale ':not_characters'; | |
1211 | push @f, $_ if /[[:alpha:]]/ and ! /[[:alnum:]]/; | |
1212 | } | |
1213 | else { | |
1214 | push @f, $_ if /[[:alpha:]]/ and ! /[[:alnum:]]/; | |
1215 | } | |
1216 | } | |
1217 | report_multi_result($Locale, $locales_test_number, \@f); | |
32e8aa3f | 1218 | |
db31898d KW |
1219 | ++$locales_test_number; |
1220 | undef @f; | |
660c7bcc KW |
1221 | $test_names{$locales_test_number} = 'Verify that [:digit:] contains at least 0-9'; |
1222 | for ('0' .. '9') { | |
1223 | if ($is_utf8_locale) { | |
1224 | use locale ':not_characters'; | |
1225 | push @f, $_ unless /[[:digit:]]/; | |
1226 | } | |
1227 | else { | |
1228 | push @f, $_ unless /[[:digit:]]/; | |
1229 | } | |
1230 | } | |
1231 | report_multi_result($Locale, $locales_test_number, \@f); | |
1232 | ||
1233 | ++$locales_test_number; | |
1234 | undef @f; | |
db31898d KW |
1235 | $test_names{$locales_test_number} = 'Verify that [:digit:] is a subset of [:alnum:]'; |
1236 | for (map { chr } 0..255) { | |
1237 | if ($is_utf8_locale) { | |
1238 | use locale ':not_characters'; | |
1239 | push @f, $_ if /[[:digit:]]/ and ! /[[:alnum:]]/; | |
1240 | } | |
1241 | else { | |
1242 | push @f, $_ if /[[:digit:]]/ and ! /[[:alnum:]]/; | |
1243 | } | |
1244 | } | |
1245 | report_multi_result($Locale, $locales_test_number, \@f); | |
32e8aa3f | 1246 | |
db31898d KW |
1247 | ++$locales_test_number; |
1248 | undef @f; | |
aac995d2 | 1249 | $test_names{$locales_test_number} = 'Verify that [:digit:] matches either 10 or 20 code points'; |
fb844330 | 1250 | report_result($Locale, $locales_test_number, @{$posixes{'digit'}} == 10 || @{$posixes{'digit'}} == 20); |
aac995d2 KW |
1251 | |
1252 | ++$locales_test_number; | |
1253 | undef @f; | |
660c7bcc KW |
1254 | $test_names{$locales_test_number} = 'Verify that if there is a second set of digits in [:digit:], they are consecutive'; |
1255 | if (@{$posixes{'digit'}} == 20) { | |
1256 | my $previous_ord; | |
fc81f5f2 | 1257 | for (map { chr } 0..255) { |
660c7bcc KW |
1258 | next unless /[[:digit:]]/; |
1259 | next if /[0-9]/; | |
1260 | if (defined $previous_ord) { | |
1261 | if ($is_utf8_locale) { | |
1262 | use locale ':not_characters'; | |
1263 | push @f, $_ if ord $_ != $previous_ord + 1; | |
1264 | } | |
1265 | else { | |
1266 | push @f, $_ if ord $_ != $previous_ord + 1; | |
1267 | } | |
fc81f5f2 | 1268 | } |
660c7bcc | 1269 | $previous_ord = ord $_; |
db31898d KW |
1270 | } |
1271 | } | |
1272 | report_multi_result($Locale, $locales_test_number, \@f); | |
32e8aa3f | 1273 | |
db31898d KW |
1274 | ++$locales_test_number; |
1275 | undef @f; | |
d458c02c KW |
1276 | my @xdigit_digits; # :digit: & :xdigit: |
1277 | $test_names{$locales_test_number} = 'Verify that [:xdigit:] contains one or two blocks of 10 consecutive [:digit:] chars'; | |
db31898d KW |
1278 | for (map { chr } 0..255) { |
1279 | if ($is_utf8_locale) { | |
1280 | use locale ':not_characters'; | |
d458c02c KW |
1281 | # For utf8 locales, we actually use a stricter test: that :digit: |
1282 | # is a subset of :xdigit:, as we know that only 0-9 should match | |
1283 | push @f, $_ if /[[:digit:]]/ and ! /[[:xdigit:]]/; | |
db31898d KW |
1284 | } |
1285 | else { | |
d458c02c | 1286 | push @xdigit_digits, $_ if /[[:digit:]]/ and /[[:xdigit:]]/; |
db31898d KW |
1287 | } |
1288 | } | |
d458c02c KW |
1289 | if (! $is_utf8_locale) { |
1290 | ||
1291 | # For non-utf8 locales, @xdigit_digits is a list of the characters | |
1292 | # that are both :xdigit: and :digit:. Because :digit: is stored in | |
1293 | # increasing code point order (unless the tests above failed), | |
1294 | # @xdigit_digits is as well. There should be exactly 10 or | |
1295 | # 20 of these. | |
1296 | if (@xdigit_digits != 10 && @xdigit_digits != 20) { | |
1297 | @f = @xdigit_digits; | |
1298 | } | |
1299 | else { | |
1300 | ||
1301 | # Look for contiguity in the series, adding any wrong ones to @f | |
1302 | my @temp = @xdigit_digits; | |
1303 | while (@temp > 1) { | |
1304 | push @f, $temp[1] if ($temp[0] != $temp[1] - 1) | |
1305 | ||
1306 | # Skip this test for the 0th character of | |
1307 | # the second block of 10, as it won't be | |
1308 | # contiguous with the previous block | |
1309 | && (! defined $xdigit_digits[10] | |
1310 | || $temp[1] != $xdigit_digits[10]); | |
1311 | shift @temp; | |
1312 | } | |
1313 | } | |
1314 | } | |
1315 | ||
db31898d | 1316 | report_multi_result($Locale, $locales_test_number, \@f); |
32e8aa3f | 1317 | |
660c7bcc KW |
1318 | ++$locales_test_number; |
1319 | undef @f; | |
1320 | $test_names{$locales_test_number} = 'Verify that [:xdigit:] contains at least A-F, a-f'; | |
1321 | for ('A' .. 'F', 'a' .. 'f') { | |
1322 | if ($is_utf8_locale) { | |
1323 | use locale ':not_characters'; | |
1324 | push @f, $_ unless /[[:xdigit:]]/; | |
1325 | } | |
1326 | else { | |
1327 | push @f, $_ unless /[[:xdigit:]]/; | |
1328 | } | |
1329 | } | |
1330 | report_multi_result($Locale, $locales_test_number, \@f); | |
1331 | ||
1332 | ++$locales_test_number; | |
1333 | undef @f; | |
1334 | $test_names{$locales_test_number} = 'Verify that any additional members of [:xdigit:], are in groups of 6 consecutive code points'; | |
1335 | my $previous_ord; | |
1336 | my $count = 0; | |
d458c02c KW |
1337 | for my $chr (map { chr } 0..255) { |
1338 | next unless $chr =~ /[[:xdigit:]]/; | |
1339 | if ($is_utf8_locale) { | |
1340 | next if $chr =~ /[[:digit:]]/; | |
1341 | } | |
1342 | else { | |
1343 | next if grep { $chr eq $_ } @xdigit_digits; | |
1344 | } | |
1345 | next if $chr =~ /[A-Fa-f]/; | |
660c7bcc KW |
1346 | if (defined $previous_ord) { |
1347 | if ($is_utf8_locale) { | |
1348 | use locale ':not_characters'; | |
d458c02c | 1349 | push @f, $chr if ord $chr != $previous_ord + 1; |
660c7bcc KW |
1350 | } |
1351 | else { | |
d458c02c | 1352 | push @f, $chr if ord $chr != $previous_ord + 1; |
660c7bcc KW |
1353 | } |
1354 | } | |
1355 | $count++; | |
1356 | if ($count == 6) { | |
1357 | undef $previous_ord; | |
1358 | } | |
1359 | else { | |
d458c02c | 1360 | $previous_ord = ord $chr; |
660c7bcc KW |
1361 | } |
1362 | } | |
1363 | report_multi_result($Locale, $locales_test_number, \@f); | |
32e8aa3f | 1364 | |
db31898d KW |
1365 | ++$locales_test_number; |
1366 | undef @f; | |
1367 | $test_names{$locales_test_number} = 'Verify that [:xdigit:] is a subset of [:graph:]'; | |
1368 | for (map { chr } 0..255) { | |
1369 | if ($is_utf8_locale) { | |
1370 | use locale ':not_characters'; | |
1371 | push @f, $_ if /[[:xdigit:]]/ and ! /[[:graph:]]/; | |
1372 | } | |
1373 | else { | |
1374 | push @f, $_ if /[[:xdigit:]]/ and ! /[[:graph:]]/; | |
1375 | } | |
1376 | } | |
1377 | report_multi_result($Locale, $locales_test_number, \@f); | |
32e8aa3f | 1378 | |
660c7bcc KW |
1379 | # Note that xdigit doesn't have to be a subset of alnum |
1380 | ||
db31898d KW |
1381 | ++$locales_test_number; |
1382 | undef @f; | |
1383 | $test_names{$locales_test_number} = 'Verify that [:punct:] is a subset of [:graph:]'; | |
1384 | for (map { chr } 0..255) { | |
1385 | if ($is_utf8_locale) { | |
1386 | use locale ':not_characters'; | |
1387 | push @f, $_ if /[[:punct:]]/ and ! /[[:graph:]]/; | |
1388 | } | |
1389 | else { | |
1390 | push @f, $_ if /[[:punct:]]/ and ! /[[:graph:]]/; | |
1391 | } | |
1392 | } | |
1393 | report_multi_result($Locale, $locales_test_number, \@f); | |
32e8aa3f | 1394 | |
db31898d KW |
1395 | ++$locales_test_number; |
1396 | undef @f; | |
660c7bcc KW |
1397 | $test_names{$locales_test_number} = 'Verify that the space character is not in [:graph:]'; |
1398 | if ($is_utf8_locale) { | |
1399 | use locale ':not_characters'; | |
1400 | push @f, " " if " " =~ /[[:graph:]]/; | |
1401 | } | |
1402 | else { | |
1403 | push @f, " " if " " =~ /[[:graph:]]/; | |
1404 | } | |
1405 | report_multi_result($Locale, $locales_test_number, \@f); | |
1406 | ||
1407 | ++$locales_test_number; | |
1408 | undef @f; | |
1409 | $test_names{$locales_test_number} = 'Verify that [:space:] contains at least [\f\n\r\t\cK ]'; | |
1410 | for (' ', "\f", "\n", "\r", "\t", "\cK") { | |
1411 | if ($is_utf8_locale) { | |
1412 | use locale ':not_characters'; | |
1413 | push @f, $_ unless /[[:space:]]/; | |
1414 | } | |
1415 | else { | |
1416 | push @f, $_ unless /[[:space:]]/; | |
1417 | } | |
1418 | } | |
1419 | report_multi_result($Locale, $locales_test_number, \@f); | |
1420 | ||
1421 | ++$locales_test_number; | |
1422 | undef @f; | |
1423 | $test_names{$locales_test_number} = 'Verify that [:blank:] contains at least [\t ]'; | |
1424 | for (' ', "\t") { | |
1425 | if ($is_utf8_locale) { | |
1426 | use locale ':not_characters'; | |
1427 | push @f, $_ unless /[[:blank:]]/; | |
1428 | } | |
1429 | else { | |
1430 | push @f, $_ unless /[[:blank:]]/; | |
1431 | } | |
1432 | } | |
1433 | report_multi_result($Locale, $locales_test_number, \@f); | |
1434 | ||
1435 | ++$locales_test_number; | |
1436 | undef @f; | |
db31898d KW |
1437 | $test_names{$locales_test_number} = 'Verify that [:blank:] is a subset of [:space:]'; |
1438 | for (map { chr } 0..255) { | |
1439 | if ($is_utf8_locale) { | |
1440 | use locale ':not_characters'; | |
1441 | push @f, $_ if /[[:blank:]]/ and ! /[[:space:]]/; | |
1442 | } | |
1443 | else { | |
1444 | push @f, $_ if /[[:blank:]]/ and ! /[[:space:]]/; | |
1445 | } | |
1446 | } | |
1447 | report_multi_result($Locale, $locales_test_number, \@f); | |
32e8aa3f | 1448 | |
db31898d KW |
1449 | ++$locales_test_number; |
1450 | undef @f; | |
1451 | $test_names{$locales_test_number} = 'Verify that [:graph:] is a subset of [:print:]'; | |
1452 | for (map { chr } 0..255) { | |
1453 | if ($is_utf8_locale) { | |
1454 | use locale ':not_characters'; | |
1455 | push @f, $_ if /[[:graph:]]/ and ! /[[:print:]]/; | |
1456 | } | |
1457 | else { | |
1458 | push @f, $_ if /[[:graph:]]/ and ! /[[:print:]]/; | |
1459 | } | |
1460 | } | |
1461 | report_multi_result($Locale, $locales_test_number, \@f); | |
32e8aa3f | 1462 | |
db31898d KW |
1463 | ++$locales_test_number; |
1464 | undef @f; | |
660c7bcc KW |
1465 | $test_names{$locales_test_number} = 'Verify that the space character is in [:print:]'; |
1466 | if ($is_utf8_locale) { | |
1467 | use locale ':not_characters'; | |
1468 | push @f, " " if " " !~ /[[:print:]]/; | |
1469 | } | |
1470 | else { | |
1471 | push @f, " " if " " !~ /[[:print:]]/; | |
1472 | } | |
1473 | report_multi_result($Locale, $locales_test_number, \@f); | |
1474 | ||
1475 | ++$locales_test_number; | |
1476 | undef @f; | |
db31898d KW |
1477 | $test_names{$locales_test_number} = 'Verify that isn\'t both [:cntrl:] and [:print:]'; |
1478 | for (map { chr } 0..255) { | |
1479 | if ($is_utf8_locale) { | |
1480 | use locale ':not_characters'; | |
1481 | push @f, $_ if (/[[:print:]]/ and /[[:cntrl:]]/); | |
1482 | } | |
1483 | else { | |
1484 | push @f, $_ if (/[[:print:]]/ and /[[:cntrl:]]/); | |
1485 | } | |
1486 | } | |
1487 | report_multi_result($Locale, $locales_test_number, \@f); | |
32e8aa3f | 1488 | |
db31898d KW |
1489 | ++$locales_test_number; |
1490 | undef @f; | |
660c7bcc KW |
1491 | $test_names{$locales_test_number} = 'Verify that isn\'t both [:alpha:] and [:digit:]'; |
1492 | for (map { chr } 0..255) { | |
1493 | if ($is_utf8_locale) { | |
1494 | use locale ':not_characters'; | |
1495 | push @f, $_ if /[[:alpha:]]/ and /[[:digit:]]/; | |
1496 | } | |
1497 | else { | |
1498 | push @f, $_ if /[[:alpha:]]/ and /[[:digit:]]/; | |
1499 | } | |
1500 | } | |
1501 | report_multi_result($Locale, $locales_test_number, \@f); | |
1502 | ||
1503 | ++$locales_test_number; | |
1504 | undef @f; | |
db31898d KW |
1505 | $test_names{$locales_test_number} = 'Verify that isn\'t both [:alnum:] and [:punct:]'; |
1506 | for (map { chr } 0..255) { | |
1507 | if ($is_utf8_locale) { | |
1508 | use locale ':not_characters'; | |
1509 | push @f, $_ if /[[:alnum:]]/ and /[[:punct:]]/; | |
1510 | } | |
1511 | else { | |
1512 | push @f, $_ if /[[:alnum:]]/ and /[[:punct:]]/; | |
1513 | } | |
1514 | } | |
1515 | report_multi_result($Locale, $locales_test_number, \@f); | |
32e8aa3f | 1516 | |
db31898d KW |
1517 | ++$locales_test_number; |
1518 | undef @f; | |
1519 | $test_names{$locales_test_number} = 'Verify that isn\'t both [:xdigit:] and [:punct:]'; | |
1520 | for (map { chr } 0..255) { | |
1521 | if ($is_utf8_locale) { | |
1522 | use locale ':not_characters'; | |
1523 | push @f, $_ if (/[[:punct:]]/ and /[[:xdigit:]]/); | |
1524 | } | |
1525 | else { | |
1526 | push @f, $_ if (/[[:punct:]]/ and /[[:xdigit:]]/); | |
1527 | } | |
1528 | } | |
1529 | report_multi_result($Locale, $locales_test_number, \@f); | |
32e8aa3f | 1530 | |
db31898d KW |
1531 | ++$locales_test_number; |
1532 | undef @f; | |
1533 | $test_names{$locales_test_number} = 'Verify that isn\'t both [:graph:] and [:space:]'; | |
1534 | for (map { chr } 0..255) { | |
1535 | if ($is_utf8_locale) { | |
1536 | use locale ':not_characters'; | |
1537 | push @f, $_ if (/[[:graph:]]/ and /[[:space:]]/); | |
1538 | } | |
1539 | else { | |
1540 | push @f, $_ if (/[[:graph:]]/ and /[[:space:]]/); | |
1541 | } | |
1542 | } | |
1543 | report_multi_result($Locale, $locales_test_number, \@f); | |
32e8aa3f | 1544 | |
7493b8f2 | 1545 | foreach ($first_casing_test_number..$locales_test_number) { |
5b1f18a7 | 1546 | $problematical_tests{$_} = 1; |
7493b8f2 KW |
1547 | } |
1548 | ||
32e8aa3f | 1549 | |
db31898d KW |
1550 | # Test for read-only scalars' locale vs non-locale comparisons. |
1551 | ||
1552 | { | |
1553 | no locale; | |
1554 | my $ok; | |
1555 | $a = "qwerty"; | |
1556 | if ($is_utf8_locale) { | |
1557 | use locale ':not_characters'; | |
1558 | $ok = ($a cmp "qwerty") == 0; | |
1559 | } | |
1560 | else { | |
1561 | use locale; | |
1562 | $ok = ($a cmp "qwerty") == 0; | |
1563 | } | |
1564 | report_result($Locale, ++$locales_test_number, $ok); | |
1565 | $test_names{$locales_test_number} = 'Verify that cmp works with a read-only scalar; no- vs locale'; | |
1566 | } | |
8ebc5c01 | 1567 | |
db31898d KW |
1568 | { |
1569 | my ($from, $to, $lesser, $greater, | |
1570 | @test, %test, $test, $yes, $no, $sign); | |
284102e8 | 1571 | |
db31898d KW |
1572 | ++$locales_test_number; |
1573 | $test_names{$locales_test_number} = 'Verify that "le", "ne", etc work'; | |
1574 | $not_necessarily_a_problem_test_number = $locales_test_number; | |
1575 | for (0..9) { | |
1576 | # Select a slice. | |
fb844330 KW |
1577 | $from = int(($_*@{$posixes{'word'}})/10); |
1578 | $to = $from + int(@{$posixes{'word'}}/10); | |
1579 | $to = $#{$posixes{'word'}} if ($to > $#{$posixes{'word'}}); | |
1580 | $lesser = join('', @{$posixes{'word'}}[$from..$to]); | |
db31898d KW |
1581 | # Select a slice one character on. |
1582 | $from++; $to++; | |
fb844330 KW |
1583 | $to = $#{$posixes{'word'}} if ($to > $#{$posixes{'word'}}); |
1584 | $greater = join('', @{$posixes{'word'}}[$from..$to]); | |
66cbab2c KW |
1585 | if ($is_utf8_locale) { |
1586 | use locale ':not_characters'; | |
db31898d KW |
1587 | ($yes, $no, $sign) = ($lesser lt $greater |
1588 | ? (" ", "not ", 1) | |
1589 | : ("not ", " ", -1)); | |
66cbab2c KW |
1590 | } |
1591 | else { | |
1592 | use locale; | |
db31898d KW |
1593 | ($yes, $no, $sign) = ($lesser lt $greater |
1594 | ? (" ", "not ", 1) | |
1595 | : ("not ", " ", -1)); | |
66cbab2c | 1596 | } |
db31898d KW |
1597 | # all these tests should FAIL (return 0). Exact lt or gt cannot |
1598 | # be tested because in some locales, say, eacute and E may test | |
1599 | # equal. | |
1600 | @test = | |
1601 | ( | |
1602 | $no.' ($lesser le $greater)', # 1 | |
1603 | 'not ($lesser ne $greater)', # 2 | |
1604 | ' ($lesser eq $greater)', # 3 | |
1605 | $yes.' ($lesser ge $greater)', # 4 | |
1606 | $yes.' ($lesser ge $greater)', # 5 | |
1607 | $yes.' ($greater le $lesser )', # 7 | |
1608 | 'not ($greater ne $lesser )', # 8 | |
1609 | ' ($greater eq $lesser )', # 9 | |
1610 | $no.' ($greater ge $lesser )', # 10 | |
1611 | 'not (($lesser cmp $greater) == -($sign))' # 11 | |
1612 | ); | |
1613 | @test{@test} = 0 x @test; | |
1614 | $test = 0; | |
1615 | for my $ti (@test) { | |
66cbab2c KW |
1616 | if ($is_utf8_locale) { |
1617 | use locale ':not_characters'; | |
db31898d | 1618 | $test{$ti} = eval $ti; |
66cbab2c KW |
1619 | } |
1620 | else { | |
db31898d KW |
1621 | # Already in 'use locale'; |
1622 | $test{$ti} = eval $ti; | |
66cbab2c | 1623 | } |
db31898d KW |
1624 | $test ||= $test{$ti} |
1625 | } | |
1626 | report_result($Locale, $locales_test_number, $test == 0); | |
1627 | if ($test) { | |
1628 | debug "# lesser = '$lesser'\n"; | |
1629 | debug "# greater = '$greater'\n"; | |
1630 | debug "# lesser cmp greater = ", | |
1631 | $lesser cmp $greater, "\n"; | |
1632 | debug "# greater cmp lesser = ", | |
1633 | $greater cmp $lesser, "\n"; | |
1634 | debug "# (greater) from = $from, to = $to\n"; | |
1635 | for my $ti (@test) { | |
1636 | debugf("# %-40s %-4s", $ti, | |
1637 | $test{$ti} ? 'FAIL' : 'ok'); | |
1638 | if ($ti =~ /\(\.*(\$.+ +cmp +\$[^\)]+)\.*\)/) { | |
1639 | debugf("(%s == %4d)", $1, eval $1); | |
66cbab2c | 1640 | } |
db31898d KW |
1641 | debug "\n#"; |
1642 | } | |
1643 | ||
1644 | last; | |
1645 | } | |
1646 | } | |
1647 | } | |
c4093d7d | 1648 | |
66cbab2c KW |
1649 | my $ok1; |
1650 | my $ok2; | |
1651 | my $ok3; | |
1652 | my $ok4; | |
1653 | my $ok5; | |
1654 | my $ok6; | |
1655 | my $ok7; | |
1656 | my $ok8; | |
1657 | my $ok9; | |
1658 | my $ok10; | |
1659 | my $ok11; | |
1660 | my $ok12; | |
1661 | my $ok13; | |
1500bd91 | 1662 | my $ok14; |
2c6ee1a7 | 1663 | my $ok14_5; |
28acfe03 KW |
1664 | my $ok15; |
1665 | my $ok16; | |
e46375fa KW |
1666 | my $ok17; |
1667 | my $ok18; | |
f406a445 | 1668 | my $ok19; |
9717af6d | 1669 | my $ok20; |
8ee4b769 | 1670 | my $ok21; |
66cbab2c KW |
1671 | |
1672 | my $c; | |
1673 | my $d; | |
1674 | my $e; | |
1675 | my $f; | |
1676 | my $g; | |
e46375fa KW |
1677 | my $h; |
1678 | my $i; | |
1679 | my $j; | |
66cbab2c | 1680 | |
f406a445 KW |
1681 | my @times = CORE::localtime(); |
1682 | ||
66cbab2c | 1683 | if (! $is_utf8_locale) { |
71e5cbb3 | 1684 | use locale; |
6be75cd7 | 1685 | |
71e5cbb3 | 1686 | my ($x, $y) = (1.23, 1.23); |
6be75cd7 | 1687 | |
71e5cbb3 KW |
1688 | $a = "$x"; |
1689 | printf ''; # printf used to reset locale to "C" | |
1690 | $b = "$y"; | |
1691 | $ok1 = $a eq $b; | |
6be75cd7 | 1692 | |
71e5cbb3 KW |
1693 | $c = "$x"; |
1694 | my $z = sprintf ''; # sprintf used to reset locale to "C" | |
1695 | $d = "$y"; | |
1696 | $ok2 = $c eq $d; | |
1697 | { | |
66cbab2c | 1698 | |
71e5cbb3 KW |
1699 | use warnings; |
1700 | my $w = 0; | |
1701 | local $SIG{__WARN__} = | |
1702 | sub { | |
1703 | print "# @_\n"; | |
1704 | $w++; | |
1705 | }; | |
6be75cd7 | 1706 | |
71e5cbb3 KW |
1707 | # The == (among other ops) used to warn for locales |
1708 | # that had something else than "." as the radix character. | |
6be75cd7 | 1709 | |
71e5cbb3 KW |
1710 | $ok3 = $c == 1.23; |
1711 | $ok4 = $c == $x; | |
1712 | $ok5 = $c == $d; | |
1713 | { | |
1714 | no locale; | |
66cbab2c | 1715 | |
b79536ea | 1716 | $e = "$x"; |
71e5cbb3 KW |
1717 | |
1718 | $ok6 = $e == 1.23; | |
1719 | $ok7 = $e == $x; | |
1720 | $ok8 = $e == $c; | |
1721 | } | |
66cbab2c | 1722 | |
71e5cbb3 KW |
1723 | $f = "1.23"; |
1724 | $g = 2.34; | |
e46375fa KW |
1725 | $h = 1.5; |
1726 | $i = 1.25; | |
1727 | $j = "$h:$i"; | |
66cbab2c | 1728 | |
71e5cbb3 KW |
1729 | $ok9 = $f == 1.23; |
1730 | $ok10 = $f == $x; | |
1731 | $ok11 = $f == $c; | |
1732 | $ok12 = abs(($f + $g) - 3.57) < 0.01; | |
1733 | $ok13 = $w == 0; | |
2c6ee1a7 | 1734 | $ok14 = $ok14_5 = $ok15 = $ok16 = 1; # Skip for non-utf8 locales |
71e5cbb3 | 1735 | } |
e46375fa KW |
1736 | { |
1737 | no locale; | |
1738 | $ok17 = "1.5:1.25" eq sprintf("%g:%g", $h, $i); | |
1739 | } | |
1740 | $ok18 = $j eq sprintf("%g:%g", $h, $i); | |
f406a445 | 1741 | $ok19 = POSIX::strftime("%p",@times) ne "%p"; # [perl #119425] |
9717af6d KW |
1742 | my $date = POSIX::strftime("%A %B %Z",@times); |
1743 | $ok20 = $date =~ / ^ \p{ASCII}+ $ /x || ! utf8::is_utf8($date); | |
66cbab2c KW |
1744 | } |
1745 | else { | |
1746 | use locale ':not_characters'; | |
1747 | ||
1748 | my ($x, $y) = (1.23, 1.23); | |
1749 | $a = "$x"; | |
1750 | printf ''; # printf used to reset locale to "C" | |
1751 | $b = "$y"; | |
1752 | $ok1 = $a eq $b; | |
1753 | ||
1754 | $c = "$x"; | |
1755 | my $z = sprintf ''; # sprintf used to reset locale to "C" | |
1756 | $d = "$y"; | |
1757 | $ok2 = $c eq $d; | |
1758 | { | |
1759 | use warnings; | |
1760 | my $w = 0; | |
1761 | local $SIG{__WARN__} = | |
1762 | sub { | |
1763 | print "# @_\n"; | |
1764 | $w++; | |
1765 | }; | |
1766 | $ok3 = $c == 1.23; | |
1767 | $ok4 = $c == $x; | |
1768 | $ok5 = $c == $d; | |
1769 | { | |
1770 | no locale; | |
b79536ea | 1771 | $e = "$x"; |
66cbab2c KW |
1772 | |
1773 | $ok6 = $e == 1.23; | |
1774 | $ok7 = $e == $x; | |
1775 | $ok8 = $e == $c; | |
1776 | } | |
1777 | ||
1778 | $f = "1.23"; | |
1779 | $g = 2.34; | |
e46375fa KW |
1780 | $h = 1.5; |
1781 | $i = 1.25; | |
1782 | $j = "$h:$i"; | |
66cbab2c KW |
1783 | |
1784 | $ok9 = $f == 1.23; | |
1785 | $ok10 = $f == $x; | |
1786 | $ok11 = $f == $c; | |
1787 | $ok12 = abs(($f + $g) - 3.57) < 0.01; | |
1788 | $ok13 = $w == 0; | |
1500bd91 KW |
1789 | |
1790 | # Look for non-ASCII error messages, and verify that the first | |
5320b60d KW |
1791 | # such is in UTF-8 (the others almost certainly will be like the |
1792 | # first). | |
1500bd91 | 1793 | $ok14 = 1; |
2c6ee1a7 | 1794 | $ok14_5 = 1; |
1500bd91 KW |
1795 | foreach my $err (keys %!) { |
1796 | use Errno; | |
1797 | $! = eval "&Errno::$err"; # Convert to strerror() output | |
1798 | my $strerror = "$!"; | |
1799 | if ("$strerror" =~ /\P{ASCII}/) { | |
5320b60d | 1800 | $ok14 = utf8::is_utf8($strerror); |
2c6ee1a7 KW |
1801 | no locale; |
1802 | $ok14_5 = "$!" !~ /\P{ASCII}/; | |
1500bd91 KW |
1803 | last; |
1804 | } | |
1805 | } | |
28acfe03 KW |
1806 | |
1807 | # Similarly, we verify that a non-ASCII radix is in UTF-8. This | |
1808 | # also catches if there is a disparity between sprintf and | |
1809 | # stringification. | |
1810 | ||
1811 | my $string_g = "$g"; | |
2ef12107 | 1812 | my $sprintf_g = sprintf("%g", $g); |
28acfe03 | 1813 | |
2ef12107 KW |
1814 | $ok15 = $string_g =~ / ^ \p{ASCII}+ $ /x || utf8::is_utf8($string_g); |
1815 | $ok16 = $sprintf_g eq $string_g; | |
66cbab2c | 1816 | } |
e46375fa KW |
1817 | { |
1818 | no locale; | |
1819 | $ok17 = "1.5:1.25" eq sprintf("%g:%g", $h, $i); | |
1820 | } | |
1821 | $ok18 = $j eq sprintf("%g:%g", $h, $i); | |
f406a445 | 1822 | $ok19 = POSIX::strftime("%p",@times) ne "%p"; # [perl #119425] |
9717af6d KW |
1823 | my $date = POSIX::strftime("%A %B %Z",@times); |
1824 | $ok20 = $date =~ / ^ \p{ASCII}+ $ /x || utf8::is_utf8($date); | |
66cbab2c KW |
1825 | } |
1826 | ||
8ee4b769 KW |
1827 | $ok21 = 1; |
1828 | foreach my $err (keys %!) { | |
1829 | no locale; | |
1830 | use Errno; | |
1831 | $! = eval "&Errno::$err"; # Convert to strerror() output | |
1832 | my $strerror = "$!"; | |
1833 | if ("$strerror" =~ /\P{ASCII}/) { | |
1834 | $ok21 = 0; | |
1835 | last; | |
1836 | } | |
1837 | } | |
1838 | ||
30032ef4 | 1839 | report_result($Locale, ++$locales_test_number, $ok1); |
66cbab2c KW |
1840 | $test_names{$locales_test_number} = 'Verify that an intervening printf doesn\'t change assignment results'; |
1841 | my $first_a_test = $locales_test_number; | |
1842 | ||
1843 | debug "# $first_a_test..$locales_test_number: \$a = $a, \$b = $b, Locale = $Locale\n"; | |
1844 | ||
30032ef4 | 1845 | report_result($Locale, ++$locales_test_number, $ok2); |
66cbab2c KW |
1846 | $test_names{$locales_test_number} = 'Verify that an intervening sprintf doesn\'t change assignment results'; |
1847 | ||
1848 | my $first_c_test = $locales_test_number; | |
1849 | ||
30032ef4 | 1850 | report_result($Locale, ++$locales_test_number, $ok3); |
71e5cbb3 | 1851 | $test_names{$locales_test_number} = 'Verify that a different locale radix works when doing "==" with a constant'; |
b057411d | 1852 | $problematical_tests{$locales_test_number} = 1; |
6be75cd7 | 1853 | |
30032ef4 | 1854 | report_result($Locale, ++$locales_test_number, $ok4); |
71e5cbb3 | 1855 | $test_names{$locales_test_number} = 'Verify that a different locale radix works when doing "==" with a scalar'; |
b057411d | 1856 | $problematical_tests{$locales_test_number} = 1; |
66cbab2c | 1857 | |
30032ef4 | 1858 | report_result($Locale, ++$locales_test_number, $ok5); |
71e5cbb3 | 1859 | $test_names{$locales_test_number} = 'Verify that a different locale radix works when doing "==" with a scalar and an intervening sprintf'; |
b057411d | 1860 | $problematical_tests{$locales_test_number} = 1; |
66cbab2c | 1861 | |
71e5cbb3 | 1862 | debug "# $first_c_test..$locales_test_number: \$c = $c, \$d = $d, Locale = $Locale\n"; |
66cbab2c | 1863 | |
30032ef4 | 1864 | report_result($Locale, ++$locales_test_number, $ok6); |
b79536ea | 1865 | $test_names{$locales_test_number} = 'Verify that can assign stringified under inner no-locale block'; |
71e5cbb3 | 1866 | my $first_e_test = $locales_test_number; |
6be75cd7 | 1867 | |
30032ef4 | 1868 | report_result($Locale, ++$locales_test_number, $ok7); |
71e5cbb3 | 1869 | $test_names{$locales_test_number} = 'Verify that "==" with a scalar still works in inner no locale'; |
66cbab2c | 1870 | |
30032ef4 | 1871 | report_result($Locale, ++$locales_test_number, $ok8); |
71e5cbb3 | 1872 | $test_names{$locales_test_number} = 'Verify that "==" with a scalar and an intervening sprintf still works in inner no locale'; |
b057411d | 1873 | $problematical_tests{$locales_test_number} = 1; |
c4093d7d | 1874 | |
71e5cbb3 | 1875 | debug "# $first_e_test..$locales_test_number: \$e = $e, no locale\n"; |
2a680da6 | 1876 | |
30032ef4 | 1877 | report_result($Locale, ++$locales_test_number, $ok9); |
71e5cbb3 | 1878 | $test_names{$locales_test_number} = 'Verify that after a no-locale block, a different locale radix still works when doing "==" with a constant'; |
b057411d | 1879 | $problematical_tests{$locales_test_number} = 1; |
71e5cbb3 | 1880 | my $first_f_test = $locales_test_number; |
6be75cd7 | 1881 | |
30032ef4 | 1882 | report_result($Locale, ++$locales_test_number, $ok10); |
71e5cbb3 | 1883 | $test_names{$locales_test_number} = 'Verify that after a no-locale block, a different locale radix still works when doing "==" with a scalar'; |
b057411d | 1884 | $problematical_tests{$locales_test_number} = 1; |
66cbab2c | 1885 | |
30032ef4 | 1886 | report_result($Locale, ++$locales_test_number, $ok11); |
71e5cbb3 | 1887 | $test_names{$locales_test_number} = 'Verify that after a no-locale block, a different locale radix still works when doing "==" with a scalar and an intervening sprintf'; |
b057411d | 1888 | $problematical_tests{$locales_test_number} = 1; |
906f284f | 1889 | |
30032ef4 | 1890 | report_result($Locale, ++$locales_test_number, $ok12); |
71e5cbb3 | 1891 | $test_names{$locales_test_number} = 'Verify that after a no-locale block, a different locale radix can participate in an addition and function call as numeric'; |
b057411d | 1892 | $problematical_tests{$locales_test_number} = 1; |
c4093d7d | 1893 | |
30032ef4 | 1894 | report_result($Locale, ++$locales_test_number, $ok13); |
71e5cbb3 | 1895 | $test_names{$locales_test_number} = 'Verify that don\'t get warning under "==" even if radix is not a dot'; |
b057411d | 1896 | $problematical_tests{$locales_test_number} = 1; |
c4093d7d | 1897 | |
30032ef4 | 1898 | report_result($Locale, ++$locales_test_number, $ok14); |
5320b60d | 1899 | $test_names{$locales_test_number} = 'Verify that non-ASCII UTF-8 error messages are in UTF-8'; |
1500bd91 | 1900 | |
2c6ee1a7 KW |
1901 | report_result($Locale, ++$locales_test_number, $ok14_5); |
1902 | $test_names{$locales_test_number} = '... and are ASCII outside "use locale"'; | |
1903 | ||
30032ef4 | 1904 | report_result($Locale, ++$locales_test_number, $ok15); |
28acfe03 KW |
1905 | $test_names{$locales_test_number} = 'Verify that a number with a UTF-8 radix has a UTF-8 stringification'; |
1906 | ||
30032ef4 | 1907 | report_result($Locale, ++$locales_test_number, $ok16); |
28acfe03 KW |
1908 | $test_names{$locales_test_number} = 'Verify that a sprintf of a number with a UTF-8 radix yields UTF-8'; |
1909 | ||
e46375fa KW |
1910 | report_result($Locale, ++$locales_test_number, $ok17); |
1911 | $test_names{$locales_test_number} = 'Verify that a sprintf of a number outside locale scope uses a dot radix'; | |
1912 | ||
1913 | report_result($Locale, ++$locales_test_number, $ok18); | |
1914 | $test_names{$locales_test_number} = 'Verify that a sprintf of a number back within locale scope uses locale radix'; | |
1915 | ||
f406a445 KW |
1916 | report_result($Locale, ++$locales_test_number, $ok19); |
1917 | $test_names{$locales_test_number} = 'Verify that strftime doesn\'t return "%p" in locales where %p is empty'; | |
1918 | ||
9717af6d KW |
1919 | report_result($Locale, ++$locales_test_number, $ok20); |
1920 | $test_names{$locales_test_number} = 'Verify that strftime returns date with UTF-8 flag appropriately set'; | |
1921 | ||
8ee4b769 KW |
1922 | report_result($Locale, ++$locales_test_number, $ok21); |
1923 | $test_names{$locales_test_number} = '"$!" is ASCII only outside of locale scope'; | |
1924 | ||
71e5cbb3 | 1925 | debug "# $first_f_test..$locales_test_number: \$f = $f, \$g = $g, back to locale = $Locale\n"; |
906f284f | 1926 | |
26d80d95 LC |
1927 | # Does taking lc separately differ from taking |
1928 | # the lc "in-line"? (This was the bug 19990704.002, change #3568.) | |
1929 | # The bug was in the caching of the 'o'-magic. | |
66cbab2c | 1930 | if (! $is_utf8_locale) { |
2a680da6 | 1931 | use locale; |
6be75cd7 | 1932 | |
2a680da6 JH |
1933 | sub lcA { |
1934 | my $lc0 = lc $_[0]; | |
1935 | my $lc1 = lc $_[1]; | |
1936 | return $lc0 cmp $lc1; | |
1937 | } | |
6be75cd7 | 1938 | |
2a680da6 JH |
1939 | sub lcB { |
1940 | return lc($_[0]) cmp lc($_[1]); | |
1941 | } | |
6be75cd7 | 1942 | |
2a680da6 JH |
1943 | my $x = "ab"; |
1944 | my $y = "aa"; | |
1945 | my $z = "AB"; | |
6be75cd7 | 1946 | |
30032ef4 | 1947 | report_result($Locale, ++$locales_test_number, |
2a680da6 JH |
1948 | lcA($x, $y) == 1 && lcB($x, $y) == 1 || |
1949 | lcA($x, $z) == 0 && lcB($x, $z) == 0); | |
6be75cd7 | 1950 | } |
66cbab2c KW |
1951 | else { |
1952 | use locale ':not_characters'; | |
1953 | ||
1954 | sub lcC { | |
1955 | my $lc0 = lc $_[0]; | |
1956 | my $lc1 = lc $_[1]; | |
1957 | return $lc0 cmp $lc1; | |
1958 | } | |
1959 | ||
1960 | sub lcD { | |
1961 | return lc($_[0]) cmp lc($_[1]); | |
1962 | } | |
1963 | ||
1964 | my $x = "ab"; | |
1965 | my $y = "aa"; | |
1966 | my $z = "AB"; | |
1967 | ||
30032ef4 | 1968 | report_result($Locale, ++$locales_test_number, |
66cbab2c KW |
1969 | lcC($x, $y) == 1 && lcD($x, $y) == 1 || |
1970 | lcC($x, $z) == 0 && lcD($x, $z) == 0); | |
1971 | } | |
1972 | $test_names{$locales_test_number} = 'Verify "lc(foo) cmp lc(bar)" is the same as using intermediaries for the cmp'; | |
d8093b23 | 1973 | |
26d80d95 LC |
1974 | # Does lc of an UPPER (if different from the UPPER) match |
1975 | # case-insensitively the UPPER, and does the UPPER match | |
1976 | # case-insensitively the lc of the UPPER. And vice versa. | |
3ba0e062 | 1977 | { |
ef4a39e5 JH |
1978 | use locale; |
1979 | no utf8; | |
1980 | my $re = qr/[\[\(\{\*\+\?\|\^\$\\]/; | |
1981 | ||
1982 | my @f = (); | |
c4093d7d | 1983 | ++$locales_test_number; |
c08acc4c | 1984 | $test_names{$locales_test_number} = 'Verify case insensitive matching works'; |
b9df08e4 | 1985 | foreach my $x (sort { ord $a <=> ord $b } keys %UPPER) { |
66cbab2c | 1986 | if (! $is_utf8_locale) { |
71e5cbb3 KW |
1987 | my $y = lc $x; |
1988 | next unless uc $y eq $x; | |
92891c66 KW |
1989 | debug_more( "# UPPER=", disp_chars(($x)), |
1990 | "; lc=", disp_chars(($y)), "; ", | |
4eac893c | 1991 | "; fc=", disp_chars((fc $x)), "; ", |
92891c66 KW |
1992 | disp_chars(($x)), "=~/", disp_chars(($y)), "/i=", |
1993 | $x =~ /$y/i ? 1 : 0, | |
1994 | "; ", | |
1995 | disp_chars(($y)), "=~/", disp_chars(($x)), "/i=", | |
1996 | $y =~ /$x/i ? 1 : 0, | |
1997 | "\n"); | |
71e5cbb3 KW |
1998 | # |
1999 | # If $x and $y contain regular expression characters | |
2000 | # AND THEY lowercase (/i) to regular expression characters, | |
2001 | # regcomp() will be mightily confused. No, the \Q doesn't | |
2002 | # help here (maybe regex engine internal lowercasing | |
2003 | # is done after the \Q?) An example of this happening is | |
2004 | # the bg_BG (Bulgarian) locale under EBCDIC (OS/390 USS): | |
2005 | # the chr(173) (the "[") is the lowercase of the chr(235). | |
2006 | # | |
2007 | # Similarly losing EBCDIC locales include cs_cz, cs_CZ, | |
2008 | # el_gr, el_GR, en_us.IBM-037 (!), en_US.IBM-037 (!), | |
2009 | # et_ee, et_EE, hr_hr, hr_HR, hu_hu, hu_HU, lt_LT, | |
2010 | # mk_mk, mk_MK, nl_nl.IBM-037, nl_NL.IBM-037, | |
2011 | # pl_pl, pl_PL, ro_ro, ro_RO, ru_ru, ru_RU, | |
2012 | # sk_sk, sk_SK, sl_si, sl_SI, tr_tr, tr_TR. | |
2013 | # | |
2014 | # Similar things can happen even under (bastardised) | |
2015 | # non-EBCDIC locales: in many European countries before the | |
2016 | # advent of ISO 8859-x nationally customised versions of | |
2017 | # ISO 646 were devised, reusing certain punctuation | |
2018 | # characters for modified characters needed by the | |
2019 | # country/language. For example, the "|" might have | |
2020 | # stood for U+00F6 or LATIN SMALL LETTER O WITH DIAERESIS. | |
2021 | # | |
2022 | if ($x =~ $re || $y =~ $re) { | |
2023 | print "# Regex characters in '$x' or '$y', skipping test $locales_test_number for locale '$Locale'\n"; | |
2024 | next; | |
2025 | } | |
69d77f94 | 2026 | push @f, $x unless $x =~ /$y/i && $y =~ /$x/i; |
26c1569f KW |
2027 | |
2028 | # fc is not a locale concept, so Perl uses lc for it. | |
2029 | push @f, $x unless lc $x eq fc $x; | |
66cbab2c KW |
2030 | } |
2031 | else { | |
2032 | use locale ':not_characters'; | |
2033 | my $y = lc $x; | |
2034 | next unless uc $y eq $x; | |
92891c66 KW |
2035 | debug_more( "# UPPER=", disp_chars(($x)), |
2036 | "; lc=", disp_chars(($y)), "; ", | |
4eac893c | 2037 | "; fc=", disp_chars((fc $x)), "; ", |
92891c66 KW |
2038 | disp_chars(($x)), "=~/", disp_chars(($y)), "/i=", |
2039 | $x =~ /$y/i ? 1 : 0, | |
2040 | "; ", | |
2041 | disp_chars(($y)), "=~/", disp_chars(($x)), "/i=", | |
2042 | $y =~ /$x/i ? 1 : 0, | |
2043 | "\n"); | |
66cbab2c | 2044 | |
66cbab2c | 2045 | push @f, $x unless $x =~ /$y/i && $y =~ /$x/i; |
26c1569f KW |
2046 | |
2047 | # The places where Unicode's lc is different from fc are | |
2048 | # skipped here by virtue of the 'next unless uc...' line above | |
2049 | push @f, $x unless lc $x eq fc $x; | |
66cbab2c | 2050 | } |
c00ff1c7 | 2051 | } |
ef4a39e5 | 2052 | |
b9df08e4 | 2053 | foreach my $x (sort { ord $a <=> ord $b } keys %lower) { |
66cbab2c | 2054 | if (! $is_utf8_locale) { |
71e5cbb3 KW |
2055 | my $y = uc $x; |
2056 | next unless lc $y eq $x; | |
92891c66 KW |
2057 | debug_more( "# lower=", disp_chars(($x)), |
2058 | "; uc=", disp_chars(($y)), "; ", | |
4eac893c | 2059 | "; fc=", disp_chars((fc $x)), "; ", |
92891c66 KW |
2060 | disp_chars(($x)), "=~/", disp_chars(($y)), "/i=", |
2061 | $x =~ /$y/i ? 1 : 0, | |
2062 | "; ", | |
2063 | disp_chars(($y)), "=~/", disp_chars(($x)), "/i=", | |
2064 | $y =~ /$x/i ? 1 : 0, | |
2065 | "\n"); | |
71e5cbb3 KW |
2066 | if ($x =~ $re || $y =~ $re) { # See above. |
2067 | print "# Regex characters in '$x' or '$y', skipping test $locales_test_number for locale '$Locale'\n"; | |
2068 | next; | |
2069 | } | |
69d77f94 | 2070 | push @f, $x unless $x =~ /$y/i && $y =~ /$x/i; |
26c1569f KW |
2071 | |
2072 | push @f, $x unless lc $x eq fc $x; | |
66cbab2c KW |
2073 | } |
2074 | else { | |
2075 | use locale ':not_characters'; | |
2076 | my $y = uc $x; | |
2077 | next unless lc $y eq $x; | |
92891c66 KW |
2078 | debug_more( "# lower=", disp_chars(($x)), |
2079 | "; uc=", disp_chars(($y)), "; ", | |
4eac893c | 2080 | "; fc=", disp_chars((fc $x)), "; ", |
92891c66 KW |
2081 | disp_chars(($x)), "=~/", disp_chars(($y)), "/i=", |
2082 | $x =~ /$y/i ? 1 : 0, | |
2083 | "; ", | |
2084 | disp_chars(($y)), "=~/", disp_chars(($x)), "/i=", | |
2085 | $y =~ /$x/i ? 1 : 0, | |
2086 | "\n"); | |
66cbab2c | 2087 | push @f, $x unless $x =~ /$y/i && $y =~ /$x/i; |
26c1569f KW |
2088 | |
2089 | push @f, $x unless lc $x eq fc $x; | |
66cbab2c | 2090 | } |
c00ff1c7 | 2091 | } |
7c844d17 | 2092 | report_multi_result($Locale, $locales_test_number, \@f); |
5b1f18a7 | 2093 | $problematical_tests{$locales_test_number} = 1; |
d8093b23 | 2094 | } |
78787052 JL |
2095 | |
2096 | # [perl #109318] | |
2097 | { | |
2098 | my @f = (); | |
2099 | ++$locales_test_number; | |
2100 | $test_names{$locales_test_number} = 'Verify atof with locale radix and negative exponent'; | |
b057411d | 2101 | $problematical_tests{$locales_test_number} = 1; |
78787052 JL |
2102 | |
2103 | my $radix = POSIX::localeconv()->{decimal_point}; | |
2104 | my @nums = ( | |
2105 | "3.14e+9", "3${radix}14e+9", "3.14e-9", "3${radix}14e-9", | |
2106 | "-3.14e+9", "-3${radix}14e+9", "-3.14e-9", "-3${radix}14e-9", | |
2107 | ); | |
2108 | ||
2109 | if (! $is_utf8_locale) { | |
2110 | use locale; | |
2111 | for my $num (@nums) { | |
2112 | push @f, $num | |
2113 | unless sprintf("%g", $num) =~ /3.+14/; | |
2114 | } | |
2115 | } | |
2116 | else { | |
2117 | use locale ':not_characters'; | |
2118 | for my $num (@nums) { | |
2119 | push @f, $num | |
2120 | unless sprintf("%g", $num) =~ /3.+14/; | |
2121 | } | |
2122 | } | |
2123 | ||
30032ef4 | 2124 | report_result($Locale, $locales_test_number, @f == 0); |
78787052 JL |
2125 | if (@f) { |
2126 | print "# failed $locales_test_number locale '$Locale' numbers @f\n" | |
2127 | } | |
2128 | } | |
8ebc5c01 | 2129 | } |
284102e8 | 2130 | |
c4093d7d | 2131 | my $final_locales_test_number = $locales_test_number; |
6cf0b567 | 2132 | |
2a680da6 JH |
2133 | # Recount the errors. |
2134 | ||
0d3899b1 | 2135 | foreach $test_num ($first_locales_test_number..$final_locales_test_number) { |
c4093d7d KW |
2136 | if (%setlocale_failed) { |
2137 | print "not "; | |
2138 | } | |
0d3899b1 | 2139 | elsif ($Problem{$test_num} || !defined $Okay{$test_num} || !@{$Okay{$test_num}}) { |
c4093d7d | 2140 | if (defined $not_necessarily_a_problem_test_number |
0d3899b1 | 2141 | && $test_num == $not_necessarily_a_problem_test_number) |
c4093d7d KW |
2142 | { |
2143 | print "# The failure of test $not_necessarily_a_problem_test_number is not necessarily fatal.\n"; | |
b4e009be | 2144 | print "# It usually indicates a problem in the environment,\n"; |
284102e8 JH |
2145 | print "# not in Perl itself.\n"; |
2146 | } | |
5b1f18a7 | 2147 | if ($Okay{$test_num} && grep { $_ == $test_num } keys %problematical_tests) { |
d401967c | 2148 | no warnings 'experimental::autoderef'; |
0a974e2d | 2149 | # Round to nearest .1% |
0d3899b1 | 2150 | my $percent_fail = (int(.5 + (1000 * scalar(keys $Problem{$test_num}) |
0a974e2d KW |
2151 | / scalar(@Locale)))) |
2152 | / 10; | |
31f658ea | 2153 | if (! $debug && $percent_fail < $acceptable_failure_percentage) |
f5627fc1 | 2154 | { |
0d3899b1 | 2155 | $test_names{$test_num} .= 'TODO'; |
6d5d702a KW |
2156 | print "# ", 100 - $percent_fail, "% of locales pass the following test, so it is likely that the failures\n"; |
2157 | print "# are errors in the locale definitions. The test is marked TODO, as the\n"; | |
2158 | print "# problem is not likely to be Perl's\n"; | |
2159 | } | |
2160 | } | |
f5627fc1 KW |
2161 | print "#\n"; |
2162 | if ($debug) { | |
2163 | print "# The code points that had this failure are given above. Look for lines\n"; | |
0d3899b1 | 2164 | print "# that match 'failed $test_num'\n"; |
f5627fc1 KW |
2165 | } |
2166 | else { | |
2167 | print "# For more details, rerun, with environment variable PERL_DEBUG_FULL_TEST=1.\n"; | |
0d3899b1 | 2168 | print "# Then look at that output for lines that match 'failed $test_num'\n"; |
108a305e | 2169 | } |
284102e8 | 2170 | print "not "; |
8ebc5c01 | 2171 | } |
0d3899b1 KW |
2172 | print "ok $test_num"; |
2173 | if (defined $test_names{$test_num}) { | |
6c2e653d | 2174 | # If TODO is in the test name, make it thus |
0d3899b1 KW |
2175 | my $todo = $test_names{$test_num} =~ s/TODO\s*//; |
2176 | print " $test_names{$test_num}"; | |
6c2e653d KW |
2177 | print " # TODO" if $todo; |
2178 | } | |
c4093d7d | 2179 | print "\n"; |
8ebc5c01 | 2180 | } |
fb73857a | 2181 | |
c4093d7d | 2182 | $test_num = $final_locales_test_number; |
c213d471 | 2183 | |
4bc67723 | 2184 | unless ( $^O =~ m!^(dragonfly|openbsd|bitrig|mirbsd)$! ) { |
083ace8f | 2185 | # perl #115808 |
fbd840df KW |
2186 | use warnings; |
2187 | my $warned = 0; | |
2188 | local $SIG{__WARN__} = sub { | |
2189 | $warned = $_[0] =~ /uninitialized/; | |
2190 | }; | |
2191 | my $z = "y" . setlocale(&POSIX::LC_ALL, "xyzzy"); | |
2192 | ok($warned, "variable set to setlocale(BAD LOCALE) is considered uninitialized"); | |
2193 | } | |
2194 | ||
094a2f8c | 2195 | # Test that tainting and case changing works on utf8 strings. These tests are |
1f5852c9 KW |
2196 | # placed last to avoid disturbing the hard-coded test numbers that existed at |
2197 | # the time these were added above this in this file. | |
0099bb8d KW |
2198 | # This also tests that locale overrides unicode_strings in the same scope for |
2199 | # non-utf8 strings. | |
a810e350 | 2200 | setlocale(&POSIX::LC_ALL, "C"); |
094a2f8c KW |
2201 | { |
2202 | use locale; | |
0099bb8d | 2203 | use feature 'unicode_strings'; |
094a2f8c | 2204 | |
26c1569f | 2205 | foreach my $function ("uc", "ucfirst", "lc", "lcfirst", "fc") { |
094a2f8c KW |
2206 | my @list; # List of code points to test for $function |
2207 | ||
2208 | # Used to calculate the changed case for ASCII characters by using the | |
2209 | # ord, instead of using one of the functions under test. | |
2210 | my $ascii_case_change_delta; | |
2211 | my $above_latin1_case_change_delta; # Same for the specific ords > 255 | |
2212 | # that we use | |
2213 | ||
445bf929 | 2214 | # We test an ASCII character, which should change case; |
094a2f8c | 2215 | # a Latin1 character, which shouldn't change case under this C locale, |
094a2f8c | 2216 | # an above-Latin1 character that when the case is changed would cross |
445bf929 KW |
2217 | # the 255/256 boundary, so doesn't change case |
2218 | # (the \x{149} is one of these, but changes into 2 characters, the | |
094a2f8c KW |
2219 | # first one of which doesn't cross the boundary. |
2220 | # the final one in each list is an above-Latin1 character whose case | |
445bf929 KW |
2221 | # does change. The code below uses its position in its list as a |
2222 | # marker to indicate that it, unlike the other code points above | |
2223 | # ASCII, has a successful case change | |
2224 | # | |
2225 | # All casing operations under locale (but not :not_characters) should | |
2226 | # taint | |
094a2f8c | 2227 | if ($function =~ /^u/) { |
094a2f8c KW |
2228 | @list = ("", "a", "\xe0", "\xff", "\x{fb00}", "\x{149}", "\x{101}"); |
2229 | $ascii_case_change_delta = -32; | |
2230 | $above_latin1_case_change_delta = -1; | |
2231 | } | |
2232 | else { | |
1ca267a5 | 2233 | @list = ("", "A", "\xC0", "\x{17F}", "\x{100}"); |
094a2f8c KW |
2234 | $ascii_case_change_delta = +32; |
2235 | $above_latin1_case_change_delta = +1; | |
2236 | } | |
66cbab2c | 2237 | foreach my $is_utf8_locale (0 .. 1) { |
71e5cbb3 KW |
2238 | foreach my $j (0 .. $#list) { |
2239 | my $char = $list[$j]; | |
0099bb8d KW |
2240 | |
2241 | for my $encoded_in_utf8 (0 .. 1) { | |
faf0c248 KW |
2242 | my $should_be; |
2243 | my $changed; | |
2244 | if (! $is_utf8_locale) { | |
2245 | $should_be = ($j == $#list) | |
2246 | ? chr(ord($char) + $above_latin1_case_change_delta) | |
2247 | : (length $char == 0 || ord($char) > 127) | |
2248 | ? $char | |
2249 | : chr(ord($char) + $ascii_case_change_delta); | |
2250 | ||
2251 | # This monstrosity is in order to avoid using an eval, | |
2252 | # which might perturb the results | |
2253 | $changed = ($function eq "uc") | |
2254 | ? uc($char) | |
2255 | : ($function eq "ucfirst") | |
2256 | ? ucfirst($char) | |
2257 | : ($function eq "lc") | |
2258 | ? lc($char) | |
2259 | : ($function eq "lcfirst") | |
2260 | ? lcfirst($char) | |
26c1569f KW |
2261 | : ($function eq "fc") |
2262 | ? fc($char) | |
faf0c248 KW |
2263 | : die("Unexpected function \"$function\""); |
2264 | } | |
2265 | else { | |
2266 | { | |
2267 | no locale; | |
71e5cbb3 | 2268 | |
faf0c248 KW |
2269 | # For utf8-locales the case changing functions |
2270 | # should work just like they do outside of locale. | |
2271 | # Can use eval here because not testing it when | |
2272 | # not in locale. | |
2273 | $should_be = eval "$function('$char')"; | |
2274 | die "Unexpected eval error $@ from 'eval \"$function('$char')\"'" if $@; | |
71e5cbb3 | 2275 | |
faf0c248 KW |
2276 | } |
2277 | use locale ':not_characters'; | |
2278 | $changed = ($function eq "uc") | |
2279 | ? uc($char) | |
2280 | : ($function eq "ucfirst") | |
2281 | ? ucfirst($char) | |
2282 | : ($function eq "lc") | |
2283 | ? lc($char) | |
2284 | : ($function eq "lcfirst") | |
2285 | ? lcfirst($char) | |
26c1569f KW |
2286 | : ($function eq "fc") |
2287 | ? fc($char) | |
faf0c248 | 2288 | : die("Unexpected function \"$function\""); |
71e5cbb3 | 2289 | } |
faf0c248 KW |
2290 | ok($changed eq $should_be, |
2291 | "$function(\"$char\") in C locale " | |
2292 | . (($is_utf8_locale) | |
2293 | ? "(use locale ':not_characters'" | |
2294 | : "(use locale") | |
2295 | . (($encoded_in_utf8) | |
2296 | ? "; encoded in utf8)" | |
2297 | : "; not encoded in utf8)") | |
2298 | . " should be \"$should_be\", got \"$changed\""); | |
2299 | ||
445bf929 KW |
2300 | # Tainting shouldn't happen for use locale :not_character |
2301 | # (a utf8 locale) | |
2302 | (! $is_utf8_locale) | |
faf0c248 KW |
2303 | ? check_taint($changed) |
2304 | : check_taint_not($changed); | |
2305 | ||
2306 | # Use UTF-8 next time through the loop | |
2307 | utf8::upgrade($char); | |
0099bb8d | 2308 | } |
66cbab2c | 2309 | } |
094a2f8c KW |
2310 | } |
2311 | } | |
2312 | } | |
2313 | ||
1bfe8fea KW |
2314 | # Give final advice. |
2315 | ||
2316 | my $didwarn = 0; | |
2317 | ||
2318 | foreach ($first_locales_test_number..$final_locales_test_number) { | |
2319 | if ($Problem{$_}) { | |
2320 | my @f = sort keys %{ $Problem{$_} }; | |
2321 | my $f = join(" ", @f); | |
2322 | $f =~ s/(.{50,60}) /$1\n#\t/g; | |
2323 | ||
2324 | "#\n", | |
2325 | "# The locale ", (@f == 1 ? "definition" : "definitions"), "\n#\n", | |
2326 | "#\t", $f, "\n#\n", | |
2327 | "# on your system may have errors because the locale test $_\n", | |
2328 | "# \"$test_names{$_}\"\n", | |
2329 | "# failed in ", (@f == 1 ? "that locale" : "those locales"), | |
2330 | ".\n"; | |
2331 | print <<EOW; | |
2332 | # | |
2333 | # If your users are not using these locales you are safe for the moment, | |
2334 | # but please report this failure first to perlbug\@perl.com using the | |
2335 | # perlbug script (as described in the INSTALL file) so that the exact | |
2336 | # details of the failures can be sorted out first and then your operating | |
2337 | # system supplier can be alerted about these anomalies. | |
2338 | # | |
2339 | EOW | |
2340 | $didwarn = 1; | |
2341 | } | |
2342 | } | |
2343 | ||
2344 | # Tell which locales were okay and which were not. | |
2345 | ||
2346 | if ($didwarn) { | |
2347 | my (@s, @F); | |
2348 | ||
2349 | foreach my $l (@Locale) { | |
2350 | my $p = 0; | |
2351 | if ($setlocale_failed{$l}) { | |
2352 | $p++; | |
2353 | } | |
2354 | else { | |
2355 | foreach my $t | |
2356 | ($first_locales_test_number..$final_locales_test_number) | |
2357 | { | |
2358 | $p++ if $Problem{$t}{$l}; | |
2359 | } | |
2360 | } | |
2361 | push @s, $l if $p == 0; | |
2362 | push @F, $l unless $p == 0; | |
2363 | } | |
2364 | ||
2365 | if (@s) { | |
2366 | my $s = join(" ", @s); | |
2367 | $s =~ s/(.{50,60}) /$1\n#\t/g; | |
2368 | ||
a5cf558a | 2369 | |
1bfe8fea KW |
2370 | "# The following locales\n#\n", |
2371 | "#\t", $s, "\n#\n", | |
2372 | "# tested okay.\n#\n", | |
2373 | } else { | |
a5cf558a | 2374 | print "# None of your locales were fully okay.\n"; |
1bfe8fea KW |
2375 | } |
2376 | ||
2377 | if (@F) { | |
2378 | my $F = join(" ", @F); | |
2379 | $F =~ s/(.{50,60}) /$1\n#\t/g; | |
2380 | ||
c7e7f6b7 KW |
2381 | my $details = ""; |
2382 | unless ($debug) { | |
2383 | $details = "# For more details, rerun, with environment variable PERL_DEBUG_FULL_TEST=1.\n"; | |
2384 | } | |
2385 | elsif ($debug == 1) { | |
2386 | $details = "# For even more details, rerun, with environment variable PERL_DEBUG_FULL_TEST=2.\n"; | |
2387 | } | |
2388 | ||
a5cf558a | 2389 | |
1bfe8fea KW |
2390 | "# The following locales\n#\n", |
2391 | "#\t", $F, "\n#\n", | |
2392 | "# had problems.\n#\n", | |
c7e7f6b7 | 2393 | $details; |
1bfe8fea | 2394 | } else { |
a5cf558a | 2395 | print "# None of your locales were broken.\n"; |
1bfe8fea KW |
2396 | } |
2397 | } | |
2398 | ||
fdf053ee | 2399 | print "1..$test_num\n"; |
906f284f | 2400 | |
90248788 | 2401 | # eof |