This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Don't run locale test if -DNO_LOCALE
[perl5.git] / t / pragma / locale.t
CommitLineData
8ebc5c01 1#!./perl -wT
2
3BEGIN {
4 chdir 't' if -d 't';
5 @INC = '../lib';
b002077a
CS
6 require Config; import Config;
7 if ($Config{ccflags} =~ /\bD?NO_LOCALE\b/) {
8 print "1..0\n";
9 exit;
10 }
8ebc5c01 11}
12
13use strict;
14
15my $have_setlocale = 0;
16eval {
17 require POSIX;
18 import POSIX ':locale_h';
19 $have_setlocale++;
20};
21
9fc9f3bf 22print "1..", ($have_setlocale ? 102 : 98), "\n";
8ebc5c01 23
24use vars qw($a
25 $English $German $French $Spanish
26 @C @English @German @French @Spanish
27 $Locale @Locale %iLocale %UPPER %lower @Neoalpha);
28
29$a = 'abc %';
30
31sub ok {
32 my ($n, $result) = @_;
33
34 print 'not ' unless ($result);
35 print "ok $n\n";
36}
37
38# First we'll do a lot of taint checking for locales.
39# This is the easiest to test, actually, as any locale,
40# even the default locale will taint under 'use locale'.
41
42sub is_tainted { # hello, camel two.
43 my $dummy;
44 not eval { $dummy = join("", @_), kill 0; 1 }
45}
46
47sub check_taint ($$) {
48 ok $_[0], is_tainted($_[1]);
49}
50
51sub check_taint_not ($$) {
52 ok $_[0], not is_tainted($_[1]);
53}
54
55use locale; # engage locale and therefore locale taint.
56
57check_taint_not 1, $a;
58
59check_taint 2, uc($a);
60check_taint 3, "\U$a";
61check_taint 4, ucfirst($a);
62check_taint 5, "\u$a";
63check_taint 6, lc($a);
64check_taint 7, "\L$a";
65check_taint 8, lcfirst($a);
66check_taint 9, "\l$a";
67
68check_taint 10, sprintf('%e', 123.456);
69check_taint 11, sprintf('%f', 123.456);
70check_taint 12, sprintf('%g', 123.456);
71check_taint_not 13, sprintf('%d', 123.456);
72check_taint_not 14, sprintf('%x', 123.456);
73
74$_ = $a; # untaint $_
75
76$_ = uc($a); # taint $_
77
78check_taint 15, $_;
79
80/(\w)/; # taint $&, $`, $', $+, $1.
81check_taint 16, $&;
82check_taint 17, $`;
83check_taint 18, $';
84check_taint 19, $+;
85check_taint 20, $1;
86check_taint_not 21, $2;
87
88/(.)/; # untaint $&, $`, $', $+, $1.
89check_taint_not 22, $&;
90check_taint_not 23, $`;
91check_taint_not 24, $';
92check_taint_not 25, $+;
93check_taint_not 26, $1;
94check_taint_not 27, $2;
95
96/(\W)/; # taint $&, $`, $', $+, $1.
97check_taint 28, $&;
98check_taint 29, $`;
99check_taint 30, $';
100check_taint 31, $+;
101check_taint 32, $1;
102check_taint_not 33, $2;
103
104/(\s)/; # taint $&, $`, $', $+, $1.
105check_taint 34, $&;
106check_taint 35, $`;
107check_taint 36, $';
108check_taint 37, $+;
109check_taint 38, $1;
110check_taint_not 39, $2;
111
112/(\S)/; # taint $&, $`, $', $+, $1.
113check_taint 40, $&;
114check_taint 41, $`;
115check_taint 42, $';
116check_taint 43, $+;
117check_taint 44, $1;
118check_taint_not 45, $2;
119
120$_ = $a; # untaint $_
121
122check_taint_not 46, $_;
123
124/(b)/; # this must not taint
125check_taint_not 47, $&;
126check_taint_not 48, $`;
127check_taint_not 49, $';
128check_taint_not 50, $+;
129check_taint_not 51, $1;
130check_taint_not 52, $2;
131
132$_ = $a; # untaint $_
133
134check_taint_not 53, $_;
135
136$b = uc($a); # taint $b
137s/(.+)/$b/; # this must taint only the $_
138
139check_taint 54, $_;
140check_taint_not 55, $&;
141check_taint_not 56, $`;
142check_taint_not 57, $';
143check_taint_not 58, $+;
144check_taint_not 59, $1;
145check_taint_not 60, $2;
146
147$_ = $a; # untaint $_
148
149s/(.+)/b/; # this must not taint
150check_taint_not 61, $_;
151check_taint_not 62, $&;
152check_taint_not 63, $`;
153check_taint_not 64, $';
154check_taint_not 65, $+;
155check_taint_not 66, $1;
156check_taint_not 67, $2;
157
158$b = $a; # untaint $b
159
160($b = $a) =~ s/\w/$&/;
161check_taint 68, $b; # $b should be tainted.
162check_taint_not 69, $a; # $a should be not.
163
164$_ = $a; # untaint $_
165
166s/(\w)/\l$1/; # this must taint
167check_taint 70, $_;
168check_taint 71, $&;
169check_taint 72, $`;
170check_taint 73, $';
171check_taint 74, $+;
172check_taint 75, $1;
173check_taint_not 76, $2;
174
175$_ = $a; # untaint $_
176
177s/(\w)/\L$1/; # this must taint
178check_taint 77, $_;
179check_taint 78, $&;
180check_taint 79, $`;
181check_taint 80, $';
182check_taint 81, $+;
183check_taint 82, $1;
184check_taint_not 83, $2;
185
186$_ = $a; # untaint $_
187
188s/(\w)/\u$1/; # this must taint
189check_taint 84, $_;
190check_taint 85, $&;
191check_taint 86, $`;
192check_taint 87, $';
193check_taint 88, $+;
194check_taint 89, $1;
195check_taint_not 90, $2;
196
197$_ = $a; # untaint $_
198
199s/(\w)/\U$1/; # this must taint
200check_taint 91, $_;
201check_taint 92, $&;
202check_taint 93, $`;
203check_taint 94, $';
204check_taint 95, $+;
205check_taint 96, $1;
206check_taint_not 97, $2;
207
208# After all this tainting $a should be cool.
209
210check_taint_not 98, $a;
211
212# I think we've seen quite enough of taint.
213# Let us do some *real* locale work now,
214# unless setlocale() is missing (i.e. minitest).
215
216exit unless $have_setlocale;
217
218sub getalnum {
219 sort grep /\w/, map { chr } 0..255
220}
221
222sub locatelocale ($$@) {
223 my ($lcall, $alnum, @try) = @_;
224
225 undef $$lcall;
226
227 for (@try) {
228 local $^W = 0; # suppress "Subroutine LC_ALL redefined"
229 if (setlocale(&LC_ALL, $_)) {
230 $$lcall = $_;
231 @$alnum = &getalnum;
232 last;
233 }
234 }
235
236 @$alnum = () unless (defined $$lcall);
237}
238
239# Find some default locale
240
241locatelocale(\$Locale, \@Locale, qw(C POSIX));
242
243# Find some English locale
244
245locatelocale(\$English, \@English,
246 qw(en_US.ISO8859-1 en_GB.ISO8859-1
247 en en_US en_UK en_IE en_CA en_AU en_NZ
248 english english.iso88591
249 american american.iso88591
250 british british.iso88591
251 ));
252
253# Find some German locale
254
255locatelocale(\$German, \@German,
256 qw(de_DE.ISO8859-1 de_AT.ISO8859-1 de_CH.ISO8859-1
257 de de_DE de_AT de_CH
258 german german.iso88591));
259
260# Find some French locale
261
262locatelocale(\$French, \@French,
263 qw(fr_FR.ISO8859-1 fr_BE.ISO8859-1 fr_CA.ISO8859-1 fr_CH.ISO8859-1
264 fr fr_FR fr_BE fr_CA fr_CH
265 french french.iso88591));
266
267# Find some Spanish locale
268
269locatelocale(\$Spanish, \@Spanish,
270 qw(es_AR.ISO8859-1 es_BO.ISO8859-1 es_CL.ISO8859-1
271 es_CO.ISO8859-1 es_CR.ISO8859-1 es_EC.ISO8859-1
272 es_ES.ISO8859-1 es_GT.ISO8859-1 es_MX.ISO8859-1
273 es_NI.ISO8859-1 es_PA.ISO8859-1 es_PE.ISO8859-1
274 es_PY.ISO8859-1 es_SV.ISO8859-1 es_UY.ISO8859-1 es_VE.ISO8859-1
275 es es_AR es_BO es_CL
276 es_CO es_CR es_EC
277 es_ES es_GT es_MX
278 es_NI es_PA es_PE
279 es_PY es_SV es_UY es_VE
280 spanish spanish.iso88591));
281
282# Select the largest of the alpha(num)bets.
283
284($Locale, @Locale) = ($English, @English)
285 if (length(@English) > length(@Locale));
286($Locale, @Locale) = ($German, @German)
287 if (length(@German) > length(@Locale));
288($Locale, @Locale) = ($French, @French)
289 if (length(@French) > length(@Locale));
290($Locale, @Locale) = ($Spanish, @Spanish)
291 if (length(@Spanish) > length(@Locale));
292
293print "# Locale = $Locale\n";
294print "# Alnum_ = @Locale\n";
295
296{
297 local $^W = 0;
298 setlocale(&LC_ALL, $Locale);
299}
300
301{
302 my $i = 0;
303
304 for (@Locale) {
305 $iLocale{$_} = $i++;
306 }
307}
308
309# Sieve the uppercase and the lowercase.
310
311for (@Locale) {
312 if (/[^\d_]/) { # skip digits and the _
313 if (lc eq $_) {
314 $UPPER{$_} = uc;
315 } else {
316 $lower{$_} = lc;
317 }
318 }
319}
320
8ebc5c01 321# Find the alphabets that are not alphabets in the default locale.
322
323{
324 no locale;
325
326 for (keys %UPPER, keys %lower) {
327 push(@Neoalpha, $_) if (/\W/);
328 }
329}
330
331@Neoalpha = sort @Neoalpha;
332
333# Test \w.
334
335{
336 my $word = join('', @Neoalpha);
337
338 $word =~ /^(\w*)$/;
339
340 print 'not ' if ($1 ne $word);
341}
9fc9f3bf 342print "ok 99\n";
8ebc5c01 343
344# Find places where the collation order differs from the default locale.
345
9fc9f3bf 346print "# testing 100\n";
8ebc5c01 347{
348 my (@k, $i, $j, @d);
349
350 {
351 no locale;
352
353 @k = sort (keys %UPPER, keys %lower);
354 }
355
356 for ($i = 0; $i < @k; $i++) {
357 for ($j = $i + 1; $j < @k; $j++) {
358 if ($iLocale{$k[$j]} < $iLocale{$k[$i]}) {
359 push(@d, [$k[$j], $k[$i]]);
360 }
361 }
362 }
363
364 # Cross-check those places.
365
366 for (@d) {
367 ($i, $j) = @$_;
368 if ($i gt $j) {
9fc9f3bf 369 print "# failed 100 at:\n";
8ebc5c01 370 print "# i = $i, j = $j, i ",
371 $i le $j ? 'le' : 'gt', " j\n";
372 print 'not ';
373 last;
374 }
375 }
376}
9fc9f3bf 377print "ok 100\n";
8ebc5c01 378
379# Cross-check whole character set.
380
9fc9f3bf 381print "# testing 101\n";
8ebc5c01 382for (map { chr } 0..255) {
383 if (/\w/ and /\W/) { print 'not '; last }
384 if (/\d/ and /\D/) { print 'not '; last }
385 if (/\s/ and /\S/) { print 'not '; last }
386 if (/\w/ and /\D/ and not /_/ and
387 not (exists $UPPER{$_} or exists $lower{$_})) {
9fc9f3bf 388 print "# failed 101 at:\n";
774d564b 389 print "# ", ord($_), " '$_'\n";
8ebc5c01 390 print 'not ';
391 last;
392 }
393}
9fc9f3bf 394print "ok 101\n";
8ebc5c01 395
396# The @Locale should be internally consistent.
397
9fc9f3bf 398print "# testing 102\n";
8ebc5c01 399{
774d564b 400 my ($from, $to, $lesser, $greater, @test, %test, $test);
8ebc5c01 401
402 for (0..9) {
403 # Select a slice.
404 $from = int(($_*@Locale)/10);
405 $to = $from + int(@Locale/10);
406 $to = $#Locale if ($to > $#Locale);
407 $lesser = join('', @Locale[$from..$to]);
408 # Select a slice one character on.
409 $from++; $to++;
410 $to = $#Locale if ($to > $#Locale);
411 $greater = join('', @Locale[$from..$to]);
774d564b 412 @test =
413 (
414 'not ($lesser lt $greater)', # 0
415 'not ($lesser le $greater)', # 1
416 'not ($lesser ne $greater)', # 2
417 ' ($lesser eq $greater)', # 3
418 ' ($lesser ge $greater)', # 4
419 ' ($lesser gt $greater)', # 5
420 ' ($greater lt $lesser )', # 6
421 ' ($greater le $lesser )', # 7
422 'not ($greater ne $lesser )', # 8
423 ' ($greater eq $lesser )', # 9
424 'not ($greater ge $lesser )', # 10
425 'not ($greater gt $lesser )', # 11
426 # Well, these two are sort of redundant
427 # because @Locale was derived using cmp.
428 'not (($lesser cmp $greater) == -1)', # 12
429 'not (($greater cmp $lesser ) == 1)' # 13
430 );
431 @test{@test} = 0 x @test;
432 $test = 0;
433 for my $ti (@test) { $test{$ti} = eval $ti ; $test ||= $test{$ti} }
434 if ($test) {
9fc9f3bf 435 print "# failed 102 at:\n";
774d564b 436 print "# lesser = '$lesser'\n";
437 print "# greater = '$greater'\n";
438 print "# (greater) from = $from, to = $to\n";
439 for my $ti (@test) {
440 printf("# %-40s %-4s", $ti,
441 $test{$ti} ? 'FAIL' : 'ok');
442 if ($ti =~ /\(\.*(\$.+ +cmp +\$[^\)]+)\.*\)/) {
443 printf("(%s == %4d)", $1, eval $1);
444 }
445 print "\n";
446 }
447
8ebc5c01 448 print 'not ';
449 last;
450 }
451 }
452}
9fc9f3bf 453print "ok 102\n";