Commit | Line | Data |
---|---|---|
8ebc5c01 | 1 | #!./perl -wT |
2 | ||
3 | BEGIN { | |
4 | chdir 't' if -d 't'; | |
5 | @INC = '../lib'; | |
b002077a | 6 | require Config; import Config; |
97a0514d | 7 | if (!$Config{d_setlocale} || $Config{ccflags} =~ /\bD?NO_LOCALE\b/) { |
b002077a CS |
8 | print "1..0\n"; |
9 | exit; | |
10 | } | |
8ebc5c01 | 11 | } |
12 | ||
13 | use strict; | |
14 | ||
15 | my $have_setlocale = 0; | |
16 | eval { | |
17 | require POSIX; | |
18 | import POSIX ':locale_h'; | |
19 | $have_setlocale++; | |
20 | }; | |
21 | ||
9fc9f3bf | 22 | print "1..", ($have_setlocale ? 102 : 98), "\n"; |
8ebc5c01 | 23 | |
24 | use vars qw($a | |
25 | $English $German $French $Spanish | |
26 | @C @English @German @French @Spanish | |
27 | $Locale @Locale %iLocale %UPPER %lower @Neoalpha); | |
28 | ||
29 | $a = 'abc %'; | |
30 | ||
31 | sub ok { | |
32 | my ($n, $result) = @_; | |
33 | ||
34 | print 'not ' unless ($result); | |
35 | print "ok $n\n"; | |
36 | } | |
37 | ||
38 | # First we'll do a lot of taint checking for locales. | |
39 | # This is the easiest to test, actually, as any locale, | |
40 | # even the default locale will taint under 'use locale'. | |
41 | ||
42 | sub is_tainted { # hello, camel two. | |
3fe9a6f1 | 43 | local $^W; # no warnings 'undef' |
8ebc5c01 | 44 | my $dummy; |
45 | not eval { $dummy = join("", @_), kill 0; 1 } | |
46 | } | |
47 | ||
48 | sub check_taint ($$) { | |
49 | ok $_[0], is_tainted($_[1]); | |
50 | } | |
51 | ||
52 | sub check_taint_not ($$) { | |
53 | ok $_[0], not is_tainted($_[1]); | |
54 | } | |
55 | ||
56 | use locale; # engage locale and therefore locale taint. | |
57 | ||
58 | check_taint_not 1, $a; | |
59 | ||
60 | check_taint 2, uc($a); | |
61 | check_taint 3, "\U$a"; | |
62 | check_taint 4, ucfirst($a); | |
63 | check_taint 5, "\u$a"; | |
64 | check_taint 6, lc($a); | |
65 | check_taint 7, "\L$a"; | |
66 | check_taint 8, lcfirst($a); | |
67 | check_taint 9, "\l$a"; | |
68 | ||
69 | check_taint 10, sprintf('%e', 123.456); | |
70 | check_taint 11, sprintf('%f', 123.456); | |
71 | check_taint 12, sprintf('%g', 123.456); | |
72 | check_taint_not 13, sprintf('%d', 123.456); | |
73 | check_taint_not 14, sprintf('%x', 123.456); | |
74 | ||
75 | $_ = $a; # untaint $_ | |
76 | ||
77 | $_ = uc($a); # taint $_ | |
78 | ||
79 | check_taint 15, $_; | |
80 | ||
81 | /(\w)/; # taint $&, $`, $', $+, $1. | |
82 | check_taint 16, $&; | |
83 | check_taint 17, $`; | |
84 | check_taint 18, $'; | |
85 | check_taint 19, $+; | |
86 | check_taint 20, $1; | |
87 | check_taint_not 21, $2; | |
88 | ||
89 | /(.)/; # untaint $&, $`, $', $+, $1. | |
90 | check_taint_not 22, $&; | |
91 | check_taint_not 23, $`; | |
92 | check_taint_not 24, $'; | |
93 | check_taint_not 25, $+; | |
94 | check_taint_not 26, $1; | |
95 | check_taint_not 27, $2; | |
96 | ||
97 | /(\W)/; # taint $&, $`, $', $+, $1. | |
98 | check_taint 28, $&; | |
99 | check_taint 29, $`; | |
100 | check_taint 30, $'; | |
101 | check_taint 31, $+; | |
102 | check_taint 32, $1; | |
103 | check_taint_not 33, $2; | |
104 | ||
105 | /(\s)/; # taint $&, $`, $', $+, $1. | |
106 | check_taint 34, $&; | |
107 | check_taint 35, $`; | |
108 | check_taint 36, $'; | |
109 | check_taint 37, $+; | |
110 | check_taint 38, $1; | |
111 | check_taint_not 39, $2; | |
112 | ||
113 | /(\S)/; # taint $&, $`, $', $+, $1. | |
114 | check_taint 40, $&; | |
115 | check_taint 41, $`; | |
116 | check_taint 42, $'; | |
117 | check_taint 43, $+; | |
118 | check_taint 44, $1; | |
119 | check_taint_not 45, $2; | |
120 | ||
121 | $_ = $a; # untaint $_ | |
122 | ||
123 | check_taint_not 46, $_; | |
124 | ||
125 | /(b)/; # this must not taint | |
126 | check_taint_not 47, $&; | |
127 | check_taint_not 48, $`; | |
128 | check_taint_not 49, $'; | |
129 | check_taint_not 50, $+; | |
130 | check_taint_not 51, $1; | |
131 | check_taint_not 52, $2; | |
132 | ||
133 | $_ = $a; # untaint $_ | |
134 | ||
135 | check_taint_not 53, $_; | |
136 | ||
137 | $b = uc($a); # taint $b | |
138 | s/(.+)/$b/; # this must taint only the $_ | |
139 | ||
140 | check_taint 54, $_; | |
141 | check_taint_not 55, $&; | |
142 | check_taint_not 56, $`; | |
143 | check_taint_not 57, $'; | |
144 | check_taint_not 58, $+; | |
145 | check_taint_not 59, $1; | |
146 | check_taint_not 60, $2; | |
147 | ||
148 | $_ = $a; # untaint $_ | |
149 | ||
150 | s/(.+)/b/; # this must not taint | |
151 | check_taint_not 61, $_; | |
152 | check_taint_not 62, $&; | |
153 | check_taint_not 63, $`; | |
154 | check_taint_not 64, $'; | |
155 | check_taint_not 65, $+; | |
156 | check_taint_not 66, $1; | |
157 | check_taint_not 67, $2; | |
158 | ||
159 | $b = $a; # untaint $b | |
160 | ||
161 | ($b = $a) =~ s/\w/$&/; | |
162 | check_taint 68, $b; # $b should be tainted. | |
163 | check_taint_not 69, $a; # $a should be not. | |
164 | ||
165 | $_ = $a; # untaint $_ | |
166 | ||
167 | s/(\w)/\l$1/; # this must taint | |
168 | check_taint 70, $_; | |
169 | check_taint 71, $&; | |
170 | check_taint 72, $`; | |
171 | check_taint 73, $'; | |
172 | check_taint 74, $+; | |
173 | check_taint 75, $1; | |
174 | check_taint_not 76, $2; | |
175 | ||
176 | $_ = $a; # untaint $_ | |
177 | ||
178 | s/(\w)/\L$1/; # this must taint | |
179 | check_taint 77, $_; | |
180 | check_taint 78, $&; | |
181 | check_taint 79, $`; | |
182 | check_taint 80, $'; | |
183 | check_taint 81, $+; | |
184 | check_taint 82, $1; | |
185 | check_taint_not 83, $2; | |
186 | ||
187 | $_ = $a; # untaint $_ | |
188 | ||
189 | s/(\w)/\u$1/; # this must taint | |
190 | check_taint 84, $_; | |
191 | check_taint 85, $&; | |
192 | check_taint 86, $`; | |
193 | check_taint 87, $'; | |
194 | check_taint 88, $+; | |
195 | check_taint 89, $1; | |
196 | check_taint_not 90, $2; | |
197 | ||
198 | $_ = $a; # untaint $_ | |
199 | ||
200 | s/(\w)/\U$1/; # this must taint | |
201 | check_taint 91, $_; | |
202 | check_taint 92, $&; | |
203 | check_taint 93, $`; | |
204 | check_taint 94, $'; | |
205 | check_taint 95, $+; | |
206 | check_taint 96, $1; | |
207 | check_taint_not 97, $2; | |
208 | ||
209 | # After all this tainting $a should be cool. | |
210 | ||
211 | check_taint_not 98, $a; | |
212 | ||
213 | # I think we've seen quite enough of taint. | |
214 | # Let us do some *real* locale work now, | |
215 | # unless setlocale() is missing (i.e. minitest). | |
216 | ||
217 | exit unless $have_setlocale; | |
218 | ||
219 | sub getalnum { | |
220 | sort grep /\w/, map { chr } 0..255 | |
221 | } | |
222 | ||
223 | sub locatelocale ($$@) { | |
224 | my ($lcall, $alnum, @try) = @_; | |
225 | ||
226 | undef $$lcall; | |
227 | ||
228 | for (@try) { | |
229 | local $^W = 0; # suppress "Subroutine LC_ALL redefined" | |
230 | if (setlocale(&LC_ALL, $_)) { | |
231 | $$lcall = $_; | |
232 | @$alnum = &getalnum; | |
233 | last; | |
234 | } | |
235 | } | |
236 | ||
237 | @$alnum = () unless (defined $$lcall); | |
238 | } | |
239 | ||
240 | # Find some default locale | |
241 | ||
242 | locatelocale(\$Locale, \@Locale, qw(C POSIX)); | |
243 | ||
244 | # Find some English locale | |
245 | ||
246 | locatelocale(\$English, \@English, | |
247 | qw(en_US.ISO8859-1 en_GB.ISO8859-1 | |
248 | en en_US en_UK en_IE en_CA en_AU en_NZ | |
249 | english english.iso88591 | |
250 | american american.iso88591 | |
251 | british british.iso88591 | |
252 | )); | |
253 | ||
254 | # Find some German locale | |
255 | ||
256 | locatelocale(\$German, \@German, | |
257 | qw(de_DE.ISO8859-1 de_AT.ISO8859-1 de_CH.ISO8859-1 | |
258 | de de_DE de_AT de_CH | |
259 | german german.iso88591)); | |
260 | ||
261 | # Find some French locale | |
262 | ||
263 | locatelocale(\$French, \@French, | |
264 | qw(fr_FR.ISO8859-1 fr_BE.ISO8859-1 fr_CA.ISO8859-1 fr_CH.ISO8859-1 | |
265 | fr fr_FR fr_BE fr_CA fr_CH | |
266 | french french.iso88591)); | |
267 | ||
268 | # Find some Spanish locale | |
269 | ||
270 | locatelocale(\$Spanish, \@Spanish, | |
271 | qw(es_AR.ISO8859-1 es_BO.ISO8859-1 es_CL.ISO8859-1 | |
272 | es_CO.ISO8859-1 es_CR.ISO8859-1 es_EC.ISO8859-1 | |
273 | es_ES.ISO8859-1 es_GT.ISO8859-1 es_MX.ISO8859-1 | |
274 | es_NI.ISO8859-1 es_PA.ISO8859-1 es_PE.ISO8859-1 | |
275 | es_PY.ISO8859-1 es_SV.ISO8859-1 es_UY.ISO8859-1 es_VE.ISO8859-1 | |
276 | es es_AR es_BO es_CL | |
277 | es_CO es_CR es_EC | |
278 | es_ES es_GT es_MX | |
279 | es_NI es_PA es_PE | |
280 | es_PY es_SV es_UY es_VE | |
281 | spanish spanish.iso88591)); | |
282 | ||
283 | # Select the largest of the alpha(num)bets. | |
284 | ||
285 | ($Locale, @Locale) = ($English, @English) | |
545c8fcc | 286 | if (@English > @Locale); |
8ebc5c01 | 287 | ($Locale, @Locale) = ($German, @German) |
545c8fcc | 288 | if (@German > @Locale); |
8ebc5c01 | 289 | ($Locale, @Locale) = ($French, @French) |
545c8fcc | 290 | if (@French > @Locale); |
8ebc5c01 | 291 | ($Locale, @Locale) = ($Spanish, @Spanish) |
545c8fcc | 292 | if (@Spanish > @Locale); |
8ebc5c01 | 293 | |
294 | print "# Locale = $Locale\n"; | |
295 | print "# Alnum_ = @Locale\n"; | |
296 | ||
297 | { | |
298 | local $^W = 0; | |
299 | setlocale(&LC_ALL, $Locale); | |
300 | } | |
301 | ||
302 | { | |
303 | my $i = 0; | |
304 | ||
305 | for (@Locale) { | |
306 | $iLocale{$_} = $i++; | |
307 | } | |
308 | } | |
309 | ||
310 | # Sieve the uppercase and the lowercase. | |
311 | ||
312 | for (@Locale) { | |
313 | if (/[^\d_]/) { # skip digits and the _ | |
314 | if (lc eq $_) { | |
315 | $UPPER{$_} = uc; | |
316 | } else { | |
317 | $lower{$_} = lc; | |
318 | } | |
319 | } | |
320 | } | |
321 | ||
8ebc5c01 | 322 | # Find the alphabets that are not alphabets in the default locale. |
323 | ||
324 | { | |
325 | no locale; | |
326 | ||
327 | for (keys %UPPER, keys %lower) { | |
328 | push(@Neoalpha, $_) if (/\W/); | |
329 | } | |
330 | } | |
331 | ||
332 | @Neoalpha = sort @Neoalpha; | |
333 | ||
334 | # Test \w. | |
335 | ||
336 | { | |
337 | my $word = join('', @Neoalpha); | |
338 | ||
339 | $word =~ /^(\w*)$/; | |
340 | ||
341 | print 'not ' if ($1 ne $word); | |
342 | } | |
9fc9f3bf | 343 | print "ok 99\n"; |
8ebc5c01 | 344 | |
345 | # Find places where the collation order differs from the default locale. | |
346 | ||
9fc9f3bf | 347 | print "# testing 100\n"; |
8ebc5c01 | 348 | { |
349 | my (@k, $i, $j, @d); | |
350 | ||
351 | { | |
352 | no locale; | |
353 | ||
354 | @k = sort (keys %UPPER, keys %lower); | |
355 | } | |
356 | ||
357 | for ($i = 0; $i < @k; $i++) { | |
358 | for ($j = $i + 1; $j < @k; $j++) { | |
359 | if ($iLocale{$k[$j]} < $iLocale{$k[$i]}) { | |
360 | push(@d, [$k[$j], $k[$i]]); | |
361 | } | |
362 | } | |
363 | } | |
364 | ||
365 | # Cross-check those places. | |
366 | ||
367 | for (@d) { | |
368 | ($i, $j) = @$_; | |
369 | if ($i gt $j) { | |
9fc9f3bf | 370 | print "# failed 100 at:\n"; |
8ebc5c01 | 371 | print "# i = $i, j = $j, i ", |
372 | $i le $j ? 'le' : 'gt', " j\n"; | |
373 | print 'not '; | |
374 | last; | |
375 | } | |
376 | } | |
377 | } | |
9fc9f3bf | 378 | print "ok 100\n"; |
8ebc5c01 | 379 | |
380 | # Cross-check whole character set. | |
381 | ||
9fc9f3bf | 382 | print "# testing 101\n"; |
8ebc5c01 | 383 | for (map { chr } 0..255) { |
384 | if (/\w/ and /\W/) { print 'not '; last } | |
385 | if (/\d/ and /\D/) { print 'not '; last } | |
386 | if (/\s/ and /\S/) { print 'not '; last } | |
387 | if (/\w/ and /\D/ and not /_/ and | |
388 | not (exists $UPPER{$_} or exists $lower{$_})) { | |
9fc9f3bf | 389 | print "# failed 101 at:\n"; |
774d564b | 390 | print "# ", ord($_), " '$_'\n"; |
8ebc5c01 | 391 | print 'not '; |
392 | last; | |
393 | } | |
394 | } | |
9fc9f3bf | 395 | print "ok 101\n"; |
8ebc5c01 | 396 | |
fb73857a | 397 | # Test for read-onlys. |
398 | ||
399 | { | |
400 | no locale; | |
401 | $a = "qwerty"; | |
402 | { | |
403 | use locale; | |
404 | print "not " if $a cmp "qwerty"; | |
405 | } | |
406 | } | |
407 | print "ok 102\n"; | |
408 | ||
409 | # This test must be the last one because its failure is not fatal. | |
8ebc5c01 | 410 | # The @Locale should be internally consistent. |
90248788 TB |
411 | # Thanks to Hallvard Furuseth <h.b.furuseth@usit.uio.no> |
412 | # for inventing a way to test for ordering consistency | |
413 | # without requiring any particular order. | |
414 | # ++$jhi;#@iki.fi | |
8ebc5c01 | 415 | |
fb73857a | 416 | print "# testing 103\n"; |
8ebc5c01 | 417 | { |
90248788 | 418 | my ($from, $to, $lesser, $greater, @test, %test, $test, $yes, $no, $sign); |
8ebc5c01 | 419 | |
420 | for (0..9) { | |
421 | # Select a slice. | |
422 | $from = int(($_*@Locale)/10); | |
423 | $to = $from + int(@Locale/10); | |
424 | $to = $#Locale if ($to > $#Locale); | |
425 | $lesser = join('', @Locale[$from..$to]); | |
426 | # Select a slice one character on. | |
427 | $from++; $to++; | |
428 | $to = $#Locale if ($to > $#Locale); | |
429 | $greater = join('', @Locale[$from..$to]); | |
90248788 TB |
430 | ($yes, $no, $sign) = ($lesser lt $greater |
431 | ? (" ", "not ", 1) | |
432 | : ("not ", " ", -1)); | |
433 | # all these tests should FAIL (return 0). | |
774d564b | 434 | @test = |
435 | ( | |
90248788 TB |
436 | $no.' ($lesser lt $greater)', # 0 |
437 | $no.' ($lesser le $greater)', # 1 | |
fb73857a | 438 | 'not ($lesser ne $greater)', # 2 |
439 | ' ($lesser eq $greater)', # 3 | |
90248788 TB |
440 | $yes.' ($lesser ge $greater)', # 4 |
441 | $yes.' ($lesser gt $greater)', # 5 | |
442 | $yes.' ($greater lt $lesser )', # 6 | |
443 | $yes.' ($greater le $lesser )', # 7 | |
fb73857a | 444 | 'not ($greater ne $lesser )', # 8 |
445 | ' ($greater eq $lesser )', # 9 | |
90248788 TB |
446 | $no.' ($greater ge $lesser )', # 10 |
447 | $no.' ($greater gt $lesser )', # 11 | |
448 | 'not (($lesser cmp $greater) == -$sign)' # 12 | |
774d564b | 449 | ); |
450 | @test{@test} = 0 x @test; | |
451 | $test = 0; | |
452 | for my $ti (@test) { $test{$ti} = eval $ti ; $test ||= $test{$ti} } | |
453 | if ($test) { | |
fb73857a | 454 | print "# failed 103 at:\n"; |
774d564b | 455 | print "# lesser = '$lesser'\n"; |
456 | print "# greater = '$greater'\n"; | |
90248788 TB |
457 | print "# lesser cmp greater = ", $lesser cmp $greater, "\n"; |
458 | print "# greater cmp lesser = ", $greater cmp $lesser, "\n"; | |
774d564b | 459 | print "# (greater) from = $from, to = $to\n"; |
460 | for my $ti (@test) { | |
461 | printf("# %-40s %-4s", $ti, | |
462 | $test{$ti} ? 'FAIL' : 'ok'); | |
463 | if ($ti =~ /\(\.*(\$.+ +cmp +\$[^\)]+)\.*\)/) { | |
464 | printf("(%s == %4d)", $1, eval $1); | |
465 | } | |
466 | print "\n"; | |
467 | } | |
468 | ||
fb73857a | 469 | warn "The locale definition on your system may have errors.\n"; |
8ebc5c01 | 470 | last; |
471 | } | |
472 | } | |
473 | } | |
90248788 TB |
474 | |
475 | # eof |