Commit | Line | Data |
---|---|---|
8ebc5c01 | 1 | #!./perl -wT |
2 | ||
3 | BEGIN { | |
4 | chdir 't' if -d 't'; | |
5 | @INC = '../lib'; | |
b002077a CS |
6 | require Config; import Config; |
7 | if ($Config{ccflags} =~ /\bD?NO_LOCALE\b/) { | |
8 | print "1..0\n"; | |
9 | exit; | |
10 | } | |
8ebc5c01 | 11 | } |
12 | ||
13 | use strict; | |
14 | ||
15 | my $have_setlocale = 0; | |
16 | eval { | |
17 | require POSIX; | |
18 | import POSIX ':locale_h'; | |
19 | $have_setlocale++; | |
20 | }; | |
21 | ||
9fc9f3bf | 22 | print "1..", ($have_setlocale ? 102 : 98), "\n"; |
8ebc5c01 | 23 | |
24 | use vars qw($a | |
25 | $English $German $French $Spanish | |
26 | @C @English @German @French @Spanish | |
27 | $Locale @Locale %iLocale %UPPER %lower @Neoalpha); | |
28 | ||
29 | $a = 'abc %'; | |
30 | ||
31 | sub ok { | |
32 | my ($n, $result) = @_; | |
33 | ||
34 | print 'not ' unless ($result); | |
35 | print "ok $n\n"; | |
36 | } | |
37 | ||
38 | # First we'll do a lot of taint checking for locales. | |
39 | # This is the easiest to test, actually, as any locale, | |
40 | # even the default locale will taint under 'use locale'. | |
41 | ||
42 | sub is_tainted { # hello, camel two. | |
43 | my $dummy; | |
44 | not eval { $dummy = join("", @_), kill 0; 1 } | |
45 | } | |
46 | ||
47 | sub check_taint ($$) { | |
48 | ok $_[0], is_tainted($_[1]); | |
49 | } | |
50 | ||
51 | sub check_taint_not ($$) { | |
52 | ok $_[0], not is_tainted($_[1]); | |
53 | } | |
54 | ||
55 | use locale; # engage locale and therefore locale taint. | |
56 | ||
57 | check_taint_not 1, $a; | |
58 | ||
59 | check_taint 2, uc($a); | |
60 | check_taint 3, "\U$a"; | |
61 | check_taint 4, ucfirst($a); | |
62 | check_taint 5, "\u$a"; | |
63 | check_taint 6, lc($a); | |
64 | check_taint 7, "\L$a"; | |
65 | check_taint 8, lcfirst($a); | |
66 | check_taint 9, "\l$a"; | |
67 | ||
68 | check_taint 10, sprintf('%e', 123.456); | |
69 | check_taint 11, sprintf('%f', 123.456); | |
70 | check_taint 12, sprintf('%g', 123.456); | |
71 | check_taint_not 13, sprintf('%d', 123.456); | |
72 | check_taint_not 14, sprintf('%x', 123.456); | |
73 | ||
74 | $_ = $a; # untaint $_ | |
75 | ||
76 | $_ = uc($a); # taint $_ | |
77 | ||
78 | check_taint 15, $_; | |
79 | ||
80 | /(\w)/; # taint $&, $`, $', $+, $1. | |
81 | check_taint 16, $&; | |
82 | check_taint 17, $`; | |
83 | check_taint 18, $'; | |
84 | check_taint 19, $+; | |
85 | check_taint 20, $1; | |
86 | check_taint_not 21, $2; | |
87 | ||
88 | /(.)/; # untaint $&, $`, $', $+, $1. | |
89 | check_taint_not 22, $&; | |
90 | check_taint_not 23, $`; | |
91 | check_taint_not 24, $'; | |
92 | check_taint_not 25, $+; | |
93 | check_taint_not 26, $1; | |
94 | check_taint_not 27, $2; | |
95 | ||
96 | /(\W)/; # taint $&, $`, $', $+, $1. | |
97 | check_taint 28, $&; | |
98 | check_taint 29, $`; | |
99 | check_taint 30, $'; | |
100 | check_taint 31, $+; | |
101 | check_taint 32, $1; | |
102 | check_taint_not 33, $2; | |
103 | ||
104 | /(\s)/; # taint $&, $`, $', $+, $1. | |
105 | check_taint 34, $&; | |
106 | check_taint 35, $`; | |
107 | check_taint 36, $'; | |
108 | check_taint 37, $+; | |
109 | check_taint 38, $1; | |
110 | check_taint_not 39, $2; | |
111 | ||
112 | /(\S)/; # taint $&, $`, $', $+, $1. | |
113 | check_taint 40, $&; | |
114 | check_taint 41, $`; | |
115 | check_taint 42, $'; | |
116 | check_taint 43, $+; | |
117 | check_taint 44, $1; | |
118 | check_taint_not 45, $2; | |
119 | ||
120 | $_ = $a; # untaint $_ | |
121 | ||
122 | check_taint_not 46, $_; | |
123 | ||
124 | /(b)/; # this must not taint | |
125 | check_taint_not 47, $&; | |
126 | check_taint_not 48, $`; | |
127 | check_taint_not 49, $'; | |
128 | check_taint_not 50, $+; | |
129 | check_taint_not 51, $1; | |
130 | check_taint_not 52, $2; | |
131 | ||
132 | $_ = $a; # untaint $_ | |
133 | ||
134 | check_taint_not 53, $_; | |
135 | ||
136 | $b = uc($a); # taint $b | |
137 | s/(.+)/$b/; # this must taint only the $_ | |
138 | ||
139 | check_taint 54, $_; | |
140 | check_taint_not 55, $&; | |
141 | check_taint_not 56, $`; | |
142 | check_taint_not 57, $'; | |
143 | check_taint_not 58, $+; | |
144 | check_taint_not 59, $1; | |
145 | check_taint_not 60, $2; | |
146 | ||
147 | $_ = $a; # untaint $_ | |
148 | ||
149 | s/(.+)/b/; # this must not taint | |
150 | check_taint_not 61, $_; | |
151 | check_taint_not 62, $&; | |
152 | check_taint_not 63, $`; | |
153 | check_taint_not 64, $'; | |
154 | check_taint_not 65, $+; | |
155 | check_taint_not 66, $1; | |
156 | check_taint_not 67, $2; | |
157 | ||
158 | $b = $a; # untaint $b | |
159 | ||
160 | ($b = $a) =~ s/\w/$&/; | |
161 | check_taint 68, $b; # $b should be tainted. | |
162 | check_taint_not 69, $a; # $a should be not. | |
163 | ||
164 | $_ = $a; # untaint $_ | |
165 | ||
166 | s/(\w)/\l$1/; # this must taint | |
167 | check_taint 70, $_; | |
168 | check_taint 71, $&; | |
169 | check_taint 72, $`; | |
170 | check_taint 73, $'; | |
171 | check_taint 74, $+; | |
172 | check_taint 75, $1; | |
173 | check_taint_not 76, $2; | |
174 | ||
175 | $_ = $a; # untaint $_ | |
176 | ||
177 | s/(\w)/\L$1/; # this must taint | |
178 | check_taint 77, $_; | |
179 | check_taint 78, $&; | |
180 | check_taint 79, $`; | |
181 | check_taint 80, $'; | |
182 | check_taint 81, $+; | |
183 | check_taint 82, $1; | |
184 | check_taint_not 83, $2; | |
185 | ||
186 | $_ = $a; # untaint $_ | |
187 | ||
188 | s/(\w)/\u$1/; # this must taint | |
189 | check_taint 84, $_; | |
190 | check_taint 85, $&; | |
191 | check_taint 86, $`; | |
192 | check_taint 87, $'; | |
193 | check_taint 88, $+; | |
194 | check_taint 89, $1; | |
195 | check_taint_not 90, $2; | |
196 | ||
197 | $_ = $a; # untaint $_ | |
198 | ||
199 | s/(\w)/\U$1/; # this must taint | |
200 | check_taint 91, $_; | |
201 | check_taint 92, $&; | |
202 | check_taint 93, $`; | |
203 | check_taint 94, $'; | |
204 | check_taint 95, $+; | |
205 | check_taint 96, $1; | |
206 | check_taint_not 97, $2; | |
207 | ||
208 | # After all this tainting $a should be cool. | |
209 | ||
210 | check_taint_not 98, $a; | |
211 | ||
212 | # I think we've seen quite enough of taint. | |
213 | # Let us do some *real* locale work now, | |
214 | # unless setlocale() is missing (i.e. minitest). | |
215 | ||
216 | exit unless $have_setlocale; | |
217 | ||
218 | sub getalnum { | |
219 | sort grep /\w/, map { chr } 0..255 | |
220 | } | |
221 | ||
222 | sub locatelocale ($$@) { | |
223 | my ($lcall, $alnum, @try) = @_; | |
224 | ||
225 | undef $$lcall; | |
226 | ||
227 | for (@try) { | |
228 | local $^W = 0; # suppress "Subroutine LC_ALL redefined" | |
229 | if (setlocale(&LC_ALL, $_)) { | |
230 | $$lcall = $_; | |
231 | @$alnum = &getalnum; | |
232 | last; | |
233 | } | |
234 | } | |
235 | ||
236 | @$alnum = () unless (defined $$lcall); | |
237 | } | |
238 | ||
239 | # Find some default locale | |
240 | ||
241 | locatelocale(\$Locale, \@Locale, qw(C POSIX)); | |
242 | ||
243 | # Find some English locale | |
244 | ||
245 | locatelocale(\$English, \@English, | |
246 | qw(en_US.ISO8859-1 en_GB.ISO8859-1 | |
247 | en en_US en_UK en_IE en_CA en_AU en_NZ | |
248 | english english.iso88591 | |
249 | american american.iso88591 | |
250 | british british.iso88591 | |
251 | )); | |
252 | ||
253 | # Find some German locale | |
254 | ||
255 | locatelocale(\$German, \@German, | |
256 | qw(de_DE.ISO8859-1 de_AT.ISO8859-1 de_CH.ISO8859-1 | |
257 | de de_DE de_AT de_CH | |
258 | german german.iso88591)); | |
259 | ||
260 | # Find some French locale | |
261 | ||
262 | locatelocale(\$French, \@French, | |
263 | qw(fr_FR.ISO8859-1 fr_BE.ISO8859-1 fr_CA.ISO8859-1 fr_CH.ISO8859-1 | |
264 | fr fr_FR fr_BE fr_CA fr_CH | |
265 | french french.iso88591)); | |
266 | ||
267 | # Find some Spanish locale | |
268 | ||
269 | locatelocale(\$Spanish, \@Spanish, | |
270 | qw(es_AR.ISO8859-1 es_BO.ISO8859-1 es_CL.ISO8859-1 | |
271 | es_CO.ISO8859-1 es_CR.ISO8859-1 es_EC.ISO8859-1 | |
272 | es_ES.ISO8859-1 es_GT.ISO8859-1 es_MX.ISO8859-1 | |
273 | es_NI.ISO8859-1 es_PA.ISO8859-1 es_PE.ISO8859-1 | |
274 | es_PY.ISO8859-1 es_SV.ISO8859-1 es_UY.ISO8859-1 es_VE.ISO8859-1 | |
275 | es es_AR es_BO es_CL | |
276 | es_CO es_CR es_EC | |
277 | es_ES es_GT es_MX | |
278 | es_NI es_PA es_PE | |
279 | es_PY es_SV es_UY es_VE | |
280 | spanish spanish.iso88591)); | |
281 | ||
282 | # Select the largest of the alpha(num)bets. | |
283 | ||
284 | ($Locale, @Locale) = ($English, @English) | |
285 | if (length(@English) > length(@Locale)); | |
286 | ($Locale, @Locale) = ($German, @German) | |
287 | if (length(@German) > length(@Locale)); | |
288 | ($Locale, @Locale) = ($French, @French) | |
289 | if (length(@French) > length(@Locale)); | |
290 | ($Locale, @Locale) = ($Spanish, @Spanish) | |
291 | if (length(@Spanish) > length(@Locale)); | |
292 | ||
293 | print "# Locale = $Locale\n"; | |
294 | print "# Alnum_ = @Locale\n"; | |
295 | ||
296 | { | |
297 | local $^W = 0; | |
298 | setlocale(&LC_ALL, $Locale); | |
299 | } | |
300 | ||
301 | { | |
302 | my $i = 0; | |
303 | ||
304 | for (@Locale) { | |
305 | $iLocale{$_} = $i++; | |
306 | } | |
307 | } | |
308 | ||
309 | # Sieve the uppercase and the lowercase. | |
310 | ||
311 | for (@Locale) { | |
312 | if (/[^\d_]/) { # skip digits and the _ | |
313 | if (lc eq $_) { | |
314 | $UPPER{$_} = uc; | |
315 | } else { | |
316 | $lower{$_} = lc; | |
317 | } | |
318 | } | |
319 | } | |
320 | ||
8ebc5c01 | 321 | # Find the alphabets that are not alphabets in the default locale. |
322 | ||
323 | { | |
324 | no locale; | |
325 | ||
326 | for (keys %UPPER, keys %lower) { | |
327 | push(@Neoalpha, $_) if (/\W/); | |
328 | } | |
329 | } | |
330 | ||
331 | @Neoalpha = sort @Neoalpha; | |
332 | ||
333 | # Test \w. | |
334 | ||
335 | { | |
336 | my $word = join('', @Neoalpha); | |
337 | ||
338 | $word =~ /^(\w*)$/; | |
339 | ||
340 | print 'not ' if ($1 ne $word); | |
341 | } | |
9fc9f3bf | 342 | print "ok 99\n"; |
8ebc5c01 | 343 | |
344 | # Find places where the collation order differs from the default locale. | |
345 | ||
9fc9f3bf | 346 | print "# testing 100\n"; |
8ebc5c01 | 347 | { |
348 | my (@k, $i, $j, @d); | |
349 | ||
350 | { | |
351 | no locale; | |
352 | ||
353 | @k = sort (keys %UPPER, keys %lower); | |
354 | } | |
355 | ||
356 | for ($i = 0; $i < @k; $i++) { | |
357 | for ($j = $i + 1; $j < @k; $j++) { | |
358 | if ($iLocale{$k[$j]} < $iLocale{$k[$i]}) { | |
359 | push(@d, [$k[$j], $k[$i]]); | |
360 | } | |
361 | } | |
362 | } | |
363 | ||
364 | # Cross-check those places. | |
365 | ||
366 | for (@d) { | |
367 | ($i, $j) = @$_; | |
368 | if ($i gt $j) { | |
9fc9f3bf | 369 | print "# failed 100 at:\n"; |
8ebc5c01 | 370 | print "# i = $i, j = $j, i ", |
371 | $i le $j ? 'le' : 'gt', " j\n"; | |
372 | print 'not '; | |
373 | last; | |
374 | } | |
375 | } | |
376 | } | |
9fc9f3bf | 377 | print "ok 100\n"; |
8ebc5c01 | 378 | |
379 | # Cross-check whole character set. | |
380 | ||
9fc9f3bf | 381 | print "# testing 101\n"; |
8ebc5c01 | 382 | for (map { chr } 0..255) { |
383 | if (/\w/ and /\W/) { print 'not '; last } | |
384 | if (/\d/ and /\D/) { print 'not '; last } | |
385 | if (/\s/ and /\S/) { print 'not '; last } | |
386 | if (/\w/ and /\D/ and not /_/ and | |
387 | not (exists $UPPER{$_} or exists $lower{$_})) { | |
9fc9f3bf | 388 | print "# failed 101 at:\n"; |
774d564b | 389 | print "# ", ord($_), " '$_'\n"; |
8ebc5c01 | 390 | print 'not '; |
391 | last; | |
392 | } | |
393 | } | |
9fc9f3bf | 394 | print "ok 101\n"; |
8ebc5c01 | 395 | |
396 | # The @Locale should be internally consistent. | |
397 | ||
9fc9f3bf | 398 | print "# testing 102\n"; |
8ebc5c01 | 399 | { |
774d564b | 400 | my ($from, $to, $lesser, $greater, @test, %test, $test); |
8ebc5c01 | 401 | |
402 | for (0..9) { | |
403 | # Select a slice. | |
404 | $from = int(($_*@Locale)/10); | |
405 | $to = $from + int(@Locale/10); | |
406 | $to = $#Locale if ($to > $#Locale); | |
407 | $lesser = join('', @Locale[$from..$to]); | |
408 | # Select a slice one character on. | |
409 | $from++; $to++; | |
410 | $to = $#Locale if ($to > $#Locale); | |
411 | $greater = join('', @Locale[$from..$to]); | |
774d564b | 412 | @test = |
413 | ( | |
414 | 'not ($lesser lt $greater)', # 0 | |
415 | 'not ($lesser le $greater)', # 1 | |
416 | 'not ($lesser ne $greater)', # 2 | |
417 | ' ($lesser eq $greater)', # 3 | |
418 | ' ($lesser ge $greater)', # 4 | |
419 | ' ($lesser gt $greater)', # 5 | |
420 | ' ($greater lt $lesser )', # 6 | |
421 | ' ($greater le $lesser )', # 7 | |
422 | 'not ($greater ne $lesser )', # 8 | |
423 | ' ($greater eq $lesser )', # 9 | |
424 | 'not ($greater ge $lesser )', # 10 | |
425 | 'not ($greater gt $lesser )', # 11 | |
426 | # Well, these two are sort of redundant | |
427 | # because @Locale was derived using cmp. | |
428 | 'not (($lesser cmp $greater) == -1)', # 12 | |
429 | 'not (($greater cmp $lesser ) == 1)' # 13 | |
430 | ); | |
431 | @test{@test} = 0 x @test; | |
432 | $test = 0; | |
433 | for my $ti (@test) { $test{$ti} = eval $ti ; $test ||= $test{$ti} } | |
434 | if ($test) { | |
9fc9f3bf | 435 | print "# failed 102 at:\n"; |
774d564b | 436 | print "# lesser = '$lesser'\n"; |
437 | print "# greater = '$greater'\n"; | |
438 | print "# (greater) from = $from, to = $to\n"; | |
439 | for my $ti (@test) { | |
440 | printf("# %-40s %-4s", $ti, | |
441 | $test{$ti} ? 'FAIL' : 'ok'); | |
442 | if ($ti =~ /\(\.*(\$.+ +cmp +\$[^\)]+)\.*\)/) { | |
443 | printf("(%s == %4d)", $1, eval $1); | |
444 | } | |
445 | print "\n"; | |
446 | } | |
447 | ||
8ebc5c01 | 448 | print 'not '; |
449 | last; | |
450 | } | |
451 | } | |
452 | } | |
9fc9f3bf | 453 | print "ok 102\n"; |