This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
filecopy.t #3 fails on dos-djgpp
[perl5.git] / t / pragma / locale.t
CommitLineData
8ebc5c01 1#!./perl -wT
2
3BEGIN {
4 chdir 't' if -d 't';
5 @INC = '../lib';
b002077a 6 require Config; import Config;
97a0514d 7 if (!$Config{d_setlocale} || $Config{ccflags} =~ /\bD?NO_LOCALE\b/) {
b002077a
CS
8 print "1..0\n";
9 exit;
10 }
8ebc5c01 11}
12
13use strict;
14
15my $have_setlocale = 0;
16eval {
17 require POSIX;
18 import POSIX ':locale_h';
19 $have_setlocale++;
20};
21
9fc9f3bf 22print "1..", ($have_setlocale ? 102 : 98), "\n";
8ebc5c01 23
24use vars qw($a
25 $English $German $French $Spanish
26 @C @English @German @French @Spanish
27 $Locale @Locale %iLocale %UPPER %lower @Neoalpha);
28
29$a = 'abc %';
30
31sub ok {
32 my ($n, $result) = @_;
33
34 print 'not ' unless ($result);
35 print "ok $n\n";
36}
37
38# First we'll do a lot of taint checking for locales.
39# This is the easiest to test, actually, as any locale,
40# even the default locale will taint under 'use locale'.
41
42sub is_tainted { # hello, camel two.
3fe9a6f1 43 local $^W; # no warnings 'undef'
8ebc5c01 44 my $dummy;
45 not eval { $dummy = join("", @_), kill 0; 1 }
46}
47
48sub check_taint ($$) {
49 ok $_[0], is_tainted($_[1]);
50}
51
52sub check_taint_not ($$) {
53 ok $_[0], not is_tainted($_[1]);
54}
55
56use locale; # engage locale and therefore locale taint.
57
58check_taint_not 1, $a;
59
60check_taint 2, uc($a);
61check_taint 3, "\U$a";
62check_taint 4, ucfirst($a);
63check_taint 5, "\u$a";
64check_taint 6, lc($a);
65check_taint 7, "\L$a";
66check_taint 8, lcfirst($a);
67check_taint 9, "\l$a";
68
69check_taint 10, sprintf('%e', 123.456);
70check_taint 11, sprintf('%f', 123.456);
71check_taint 12, sprintf('%g', 123.456);
72check_taint_not 13, sprintf('%d', 123.456);
73check_taint_not 14, sprintf('%x', 123.456);
74
75$_ = $a; # untaint $_
76
77$_ = uc($a); # taint $_
78
79check_taint 15, $_;
80
81/(\w)/; # taint $&, $`, $', $+, $1.
82check_taint 16, $&;
83check_taint 17, $`;
84check_taint 18, $';
85check_taint 19, $+;
86check_taint 20, $1;
87check_taint_not 21, $2;
88
89/(.)/; # untaint $&, $`, $', $+, $1.
90check_taint_not 22, $&;
91check_taint_not 23, $`;
92check_taint_not 24, $';
93check_taint_not 25, $+;
94check_taint_not 26, $1;
95check_taint_not 27, $2;
96
97/(\W)/; # taint $&, $`, $', $+, $1.
98check_taint 28, $&;
99check_taint 29, $`;
100check_taint 30, $';
101check_taint 31, $+;
102check_taint 32, $1;
103check_taint_not 33, $2;
104
105/(\s)/; # taint $&, $`, $', $+, $1.
106check_taint 34, $&;
107check_taint 35, $`;
108check_taint 36, $';
109check_taint 37, $+;
110check_taint 38, $1;
111check_taint_not 39, $2;
112
113/(\S)/; # taint $&, $`, $', $+, $1.
114check_taint 40, $&;
115check_taint 41, $`;
116check_taint 42, $';
117check_taint 43, $+;
118check_taint 44, $1;
119check_taint_not 45, $2;
120
121$_ = $a; # untaint $_
122
123check_taint_not 46, $_;
124
125/(b)/; # this must not taint
126check_taint_not 47, $&;
127check_taint_not 48, $`;
128check_taint_not 49, $';
129check_taint_not 50, $+;
130check_taint_not 51, $1;
131check_taint_not 52, $2;
132
133$_ = $a; # untaint $_
134
135check_taint_not 53, $_;
136
137$b = uc($a); # taint $b
138s/(.+)/$b/; # this must taint only the $_
139
140check_taint 54, $_;
141check_taint_not 55, $&;
142check_taint_not 56, $`;
143check_taint_not 57, $';
144check_taint_not 58, $+;
145check_taint_not 59, $1;
146check_taint_not 60, $2;
147
148$_ = $a; # untaint $_
149
150s/(.+)/b/; # this must not taint
151check_taint_not 61, $_;
152check_taint_not 62, $&;
153check_taint_not 63, $`;
154check_taint_not 64, $';
155check_taint_not 65, $+;
156check_taint_not 66, $1;
157check_taint_not 67, $2;
158
159$b = $a; # untaint $b
160
161($b = $a) =~ s/\w/$&/;
162check_taint 68, $b; # $b should be tainted.
163check_taint_not 69, $a; # $a should be not.
164
165$_ = $a; # untaint $_
166
167s/(\w)/\l$1/; # this must taint
168check_taint 70, $_;
169check_taint 71, $&;
170check_taint 72, $`;
171check_taint 73, $';
172check_taint 74, $+;
173check_taint 75, $1;
174check_taint_not 76, $2;
175
176$_ = $a; # untaint $_
177
178s/(\w)/\L$1/; # this must taint
179check_taint 77, $_;
180check_taint 78, $&;
181check_taint 79, $`;
182check_taint 80, $';
183check_taint 81, $+;
184check_taint 82, $1;
185check_taint_not 83, $2;
186
187$_ = $a; # untaint $_
188
189s/(\w)/\u$1/; # this must taint
190check_taint 84, $_;
191check_taint 85, $&;
192check_taint 86, $`;
193check_taint 87, $';
194check_taint 88, $+;
195check_taint 89, $1;
196check_taint_not 90, $2;
197
198$_ = $a; # untaint $_
199
200s/(\w)/\U$1/; # this must taint
201check_taint 91, $_;
202check_taint 92, $&;
203check_taint 93, $`;
204check_taint 94, $';
205check_taint 95, $+;
206check_taint 96, $1;
207check_taint_not 97, $2;
208
209# After all this tainting $a should be cool.
210
211check_taint_not 98, $a;
212
213# I think we've seen quite enough of taint.
214# Let us do some *real* locale work now,
215# unless setlocale() is missing (i.e. minitest).
216
217exit unless $have_setlocale;
218
219sub getalnum {
220 sort grep /\w/, map { chr } 0..255
221}
222
223sub locatelocale ($$@) {
224 my ($lcall, $alnum, @try) = @_;
225
226 undef $$lcall;
227
228 for (@try) {
229 local $^W = 0; # suppress "Subroutine LC_ALL redefined"
230 if (setlocale(&LC_ALL, $_)) {
231 $$lcall = $_;
232 @$alnum = &getalnum;
233 last;
234 }
235 }
236
237 @$alnum = () unless (defined $$lcall);
238}
239
240# Find some default locale
241
242locatelocale(\$Locale, \@Locale, qw(C POSIX));
243
244# Find some English locale
245
246locatelocale(\$English, \@English,
247 qw(en_US.ISO8859-1 en_GB.ISO8859-1
248 en en_US en_UK en_IE en_CA en_AU en_NZ
249 english english.iso88591
250 american american.iso88591
251 british british.iso88591
252 ));
253
254# Find some German locale
255
256locatelocale(\$German, \@German,
257 qw(de_DE.ISO8859-1 de_AT.ISO8859-1 de_CH.ISO8859-1
258 de de_DE de_AT de_CH
259 german german.iso88591));
260
261# Find some French locale
262
263locatelocale(\$French, \@French,
264 qw(fr_FR.ISO8859-1 fr_BE.ISO8859-1 fr_CA.ISO8859-1 fr_CH.ISO8859-1
265 fr fr_FR fr_BE fr_CA fr_CH
266 french french.iso88591));
267
268# Find some Spanish locale
269
270locatelocale(\$Spanish, \@Spanish,
271 qw(es_AR.ISO8859-1 es_BO.ISO8859-1 es_CL.ISO8859-1
272 es_CO.ISO8859-1 es_CR.ISO8859-1 es_EC.ISO8859-1
273 es_ES.ISO8859-1 es_GT.ISO8859-1 es_MX.ISO8859-1
274 es_NI.ISO8859-1 es_PA.ISO8859-1 es_PE.ISO8859-1
275 es_PY.ISO8859-1 es_SV.ISO8859-1 es_UY.ISO8859-1 es_VE.ISO8859-1
276 es es_AR es_BO es_CL
277 es_CO es_CR es_EC
278 es_ES es_GT es_MX
279 es_NI es_PA es_PE
280 es_PY es_SV es_UY es_VE
281 spanish spanish.iso88591));
282
283# Select the largest of the alpha(num)bets.
284
285($Locale, @Locale) = ($English, @English)
545c8fcc 286 if (@English > @Locale);
8ebc5c01 287($Locale, @Locale) = ($German, @German)
545c8fcc 288 if (@German > @Locale);
8ebc5c01 289($Locale, @Locale) = ($French, @French)
545c8fcc 290 if (@French > @Locale);
8ebc5c01 291($Locale, @Locale) = ($Spanish, @Spanish)
545c8fcc 292 if (@Spanish > @Locale);
8ebc5c01 293
294print "# Locale = $Locale\n";
295print "# Alnum_ = @Locale\n";
296
297{
298 local $^W = 0;
299 setlocale(&LC_ALL, $Locale);
300}
301
302{
303 my $i = 0;
304
305 for (@Locale) {
306 $iLocale{$_} = $i++;
307 }
308}
309
310# Sieve the uppercase and the lowercase.
311
312for (@Locale) {
313 if (/[^\d_]/) { # skip digits and the _
314 if (lc eq $_) {
315 $UPPER{$_} = uc;
316 } else {
317 $lower{$_} = lc;
318 }
319 }
320}
321
8ebc5c01 322# Find the alphabets that are not alphabets in the default locale.
323
324{
325 no locale;
326
327 for (keys %UPPER, keys %lower) {
328 push(@Neoalpha, $_) if (/\W/);
329 }
330}
331
332@Neoalpha = sort @Neoalpha;
333
334# Test \w.
335
336{
337 my $word = join('', @Neoalpha);
338
339 $word =~ /^(\w*)$/;
340
341 print 'not ' if ($1 ne $word);
342}
9fc9f3bf 343print "ok 99\n";
8ebc5c01 344
345# Find places where the collation order differs from the default locale.
346
9fc9f3bf 347print "# testing 100\n";
8ebc5c01 348{
349 my (@k, $i, $j, @d);
350
351 {
352 no locale;
353
354 @k = sort (keys %UPPER, keys %lower);
355 }
356
357 for ($i = 0; $i < @k; $i++) {
358 for ($j = $i + 1; $j < @k; $j++) {
359 if ($iLocale{$k[$j]} < $iLocale{$k[$i]}) {
360 push(@d, [$k[$j], $k[$i]]);
361 }
362 }
363 }
364
365 # Cross-check those places.
366
367 for (@d) {
368 ($i, $j) = @$_;
369 if ($i gt $j) {
9fc9f3bf 370 print "# failed 100 at:\n";
8ebc5c01 371 print "# i = $i, j = $j, i ",
372 $i le $j ? 'le' : 'gt', " j\n";
373 print 'not ';
374 last;
375 }
376 }
377}
9fc9f3bf 378print "ok 100\n";
8ebc5c01 379
380# Cross-check whole character set.
381
9fc9f3bf 382print "# testing 101\n";
8ebc5c01 383for (map { chr } 0..255) {
384 if (/\w/ and /\W/) { print 'not '; last }
385 if (/\d/ and /\D/) { print 'not '; last }
386 if (/\s/ and /\S/) { print 'not '; last }
387 if (/\w/ and /\D/ and not /_/ and
388 not (exists $UPPER{$_} or exists $lower{$_})) {
9fc9f3bf 389 print "# failed 101 at:\n";
774d564b 390 print "# ", ord($_), " '$_'\n";
8ebc5c01 391 print 'not ';
392 last;
393 }
394}
9fc9f3bf 395print "ok 101\n";
8ebc5c01 396
fb73857a 397# Test for read-onlys.
398
399{
400 no locale;
401 $a = "qwerty";
402 {
403 use locale;
404 print "not " if $a cmp "qwerty";
405 }
406}
407print "ok 102\n";
408
409# This test must be the last one because its failure is not fatal.
8ebc5c01 410# The @Locale should be internally consistent.
90248788
TB
411# Thanks to Hallvard Furuseth <h.b.furuseth@usit.uio.no>
412# for inventing a way to test for ordering consistency
413# without requiring any particular order.
414# ++$jhi;#@iki.fi
8ebc5c01 415
fb73857a 416print "# testing 103\n";
8ebc5c01 417{
90248788 418 my ($from, $to, $lesser, $greater, @test, %test, $test, $yes, $no, $sign);
8ebc5c01 419
420 for (0..9) {
421 # Select a slice.
422 $from = int(($_*@Locale)/10);
423 $to = $from + int(@Locale/10);
424 $to = $#Locale if ($to > $#Locale);
425 $lesser = join('', @Locale[$from..$to]);
426 # Select a slice one character on.
427 $from++; $to++;
428 $to = $#Locale if ($to > $#Locale);
429 $greater = join('', @Locale[$from..$to]);
90248788
TB
430 ($yes, $no, $sign) = ($lesser lt $greater
431 ? (" ", "not ", 1)
432 : ("not ", " ", -1));
433 # all these tests should FAIL (return 0).
774d564b 434 @test =
435 (
90248788
TB
436 $no.' ($lesser lt $greater)', # 0
437 $no.' ($lesser le $greater)', # 1
fb73857a 438 'not ($lesser ne $greater)', # 2
439 ' ($lesser eq $greater)', # 3
90248788
TB
440 $yes.' ($lesser ge $greater)', # 4
441 $yes.' ($lesser gt $greater)', # 5
442 $yes.' ($greater lt $lesser )', # 6
443 $yes.' ($greater le $lesser )', # 7
fb73857a 444 'not ($greater ne $lesser )', # 8
445 ' ($greater eq $lesser )', # 9
90248788
TB
446 $no.' ($greater ge $lesser )', # 10
447 $no.' ($greater gt $lesser )', # 11
448 'not (($lesser cmp $greater) == -$sign)' # 12
774d564b 449 );
450 @test{@test} = 0 x @test;
451 $test = 0;
452 for my $ti (@test) { $test{$ti} = eval $ti ; $test ||= $test{$ti} }
453 if ($test) {
fb73857a 454 print "# failed 103 at:\n";
774d564b 455 print "# lesser = '$lesser'\n";
456 print "# greater = '$greater'\n";
90248788
TB
457 print "# lesser cmp greater = ", $lesser cmp $greater, "\n";
458 print "# greater cmp lesser = ", $greater cmp $lesser, "\n";
774d564b 459 print "# (greater) from = $from, to = $to\n";
460 for my $ti (@test) {
461 printf("# %-40s %-4s", $ti,
462 $test{$ti} ? 'FAIL' : 'ok');
463 if ($ti =~ /\(\.*(\$.+ +cmp +\$[^\)]+)\.*\)/) {
464 printf("(%s == %4d)", $1, eval $1);
465 }
466 print "\n";
467 }
468
fb73857a 469 warn "The locale definition on your system may have errors.\n";
8ebc5c01 470 last;
471 }
472 }
473}
90248788
TB
474
475# eof