| 1 | #!./perl -wT |
| 2 | |
| 3 | BEGIN { |
| 4 | chdir 't' if -d 't'; |
| 5 | @INC = '../lib'; |
| 6 | unshift @INC, '.'; |
| 7 | require Config; import Config; |
| 8 | if (!$Config{d_setlocale} || $Config{ccflags} =~ /\bD?NO_LOCALE\b/) { |
| 9 | print "1..0\n"; |
| 10 | exit; |
| 11 | } |
| 12 | $| = 1; |
| 13 | } |
| 14 | |
| 15 | use strict; |
| 16 | |
| 17 | my $debug = 1; |
| 18 | |
| 19 | use Dumpvalue; |
| 20 | |
| 21 | my $dumper = Dumpvalue->new( |
| 22 | tick => qq{"}, |
| 23 | quoteHighBit => 0, |
| 24 | unctrl => "quote" |
| 25 | ); |
| 26 | sub debug { |
| 27 | return unless $debug; |
| 28 | my($mess) = join "", @_; |
| 29 | chop $mess; |
| 30 | print $dumper->stringify($mess,1), "\n"; |
| 31 | } |
| 32 | |
| 33 | sub debugf { |
| 34 | printf @_ if $debug; |
| 35 | } |
| 36 | |
| 37 | my $have_setlocale = 0; |
| 38 | eval { |
| 39 | require POSIX; |
| 40 | import POSIX ':locale_h'; |
| 41 | $have_setlocale++; |
| 42 | }; |
| 43 | |
| 44 | # Visual C's CRT goes silly on strings of the form "en_US.ISO8859-1" |
| 45 | # and mingw32 uses said silly CRT |
| 46 | $have_setlocale = 0 if (($^O eq 'MSWin32' || $^O eq 'NetWare') && $Config{cc} =~ /^(cl|gcc)/i); |
| 47 | |
| 48 | my $last = $have_setlocale ? &last : &last_without_setlocale; |
| 49 | |
| 50 | print "1..$last\n"; |
| 51 | |
| 52 | use vars qw(&LC_ALL); |
| 53 | |
| 54 | $a = 'abc %'; |
| 55 | |
| 56 | sub ok { |
| 57 | my ($n, $result) = @_; |
| 58 | |
| 59 | print 'not ' unless ($result); |
| 60 | print "ok $n\n"; |
| 61 | } |
| 62 | |
| 63 | # First we'll do a lot of taint checking for locales. |
| 64 | # This is the easiest to test, actually, as any locale, |
| 65 | # even the default locale will taint under 'use locale'. |
| 66 | |
| 67 | sub is_tainted { # hello, camel two. |
| 68 | no warnings 'uninitialized' ; |
| 69 | my $dummy; |
| 70 | not eval { $dummy = join("", @_), kill 0; 1 } |
| 71 | } |
| 72 | |
| 73 | sub check_taint ($$) { |
| 74 | ok $_[0], is_tainted($_[1]); |
| 75 | } |
| 76 | |
| 77 | sub check_taint_not ($$) { |
| 78 | ok $_[0], not is_tainted($_[1]); |
| 79 | } |
| 80 | |
| 81 | use locale; # engage locale and therefore locale taint. |
| 82 | |
| 83 | check_taint_not 1, $a; |
| 84 | |
| 85 | check_taint 2, uc($a); |
| 86 | check_taint 3, "\U$a"; |
| 87 | check_taint 4, ucfirst($a); |
| 88 | check_taint 5, "\u$a"; |
| 89 | check_taint 6, lc($a); |
| 90 | check_taint 7, "\L$a"; |
| 91 | check_taint 8, lcfirst($a); |
| 92 | check_taint 9, "\l$a"; |
| 93 | |
| 94 | check_taint_not 10, sprintf('%e', 123.456); |
| 95 | check_taint_not 11, sprintf('%f', 123.456); |
| 96 | check_taint_not 12, sprintf('%g', 123.456); |
| 97 | check_taint_not 13, sprintf('%d', 123.456); |
| 98 | check_taint_not 14, sprintf('%x', 123.456); |
| 99 | |
| 100 | $_ = $a; # untaint $_ |
| 101 | |
| 102 | $_ = uc($a); # taint $_ |
| 103 | |
| 104 | check_taint 15, $_; |
| 105 | |
| 106 | /(\w)/; # taint $&, $`, $', $+, $1. |
| 107 | check_taint 16, $&; |
| 108 | check_taint 17, $`; |
| 109 | check_taint 18, $'; |
| 110 | check_taint 19, $+; |
| 111 | check_taint 20, $1; |
| 112 | check_taint_not 21, $2; |
| 113 | |
| 114 | /(.)/; # untaint $&, $`, $', $+, $1. |
| 115 | check_taint_not 22, $&; |
| 116 | check_taint_not 23, $`; |
| 117 | check_taint_not 24, $'; |
| 118 | check_taint_not 25, $+; |
| 119 | check_taint_not 26, $1; |
| 120 | check_taint_not 27, $2; |
| 121 | |
| 122 | /(\W)/; # taint $&, $`, $', $+, $1. |
| 123 | check_taint 28, $&; |
| 124 | check_taint 29, $`; |
| 125 | check_taint 30, $'; |
| 126 | check_taint 31, $+; |
| 127 | check_taint 32, $1; |
| 128 | check_taint_not 33, $2; |
| 129 | |
| 130 | /(\s)/; # taint $&, $`, $', $+, $1. |
| 131 | check_taint 34, $&; |
| 132 | check_taint 35, $`; |
| 133 | check_taint 36, $'; |
| 134 | check_taint 37, $+; |
| 135 | check_taint 38, $1; |
| 136 | check_taint_not 39, $2; |
| 137 | |
| 138 | /(\S)/; # taint $&, $`, $', $+, $1. |
| 139 | check_taint 40, $&; |
| 140 | check_taint 41, $`; |
| 141 | check_taint 42, $'; |
| 142 | check_taint 43, $+; |
| 143 | check_taint 44, $1; |
| 144 | check_taint_not 45, $2; |
| 145 | |
| 146 | $_ = $a; # untaint $_ |
| 147 | |
| 148 | check_taint_not 46, $_; |
| 149 | |
| 150 | /(b)/; # this must not taint |
| 151 | check_taint_not 47, $&; |
| 152 | check_taint_not 48, $`; |
| 153 | check_taint_not 49, $'; |
| 154 | check_taint_not 50, $+; |
| 155 | check_taint_not 51, $1; |
| 156 | check_taint_not 52, $2; |
| 157 | |
| 158 | $_ = $a; # untaint $_ |
| 159 | |
| 160 | check_taint_not 53, $_; |
| 161 | |
| 162 | $b = uc($a); # taint $b |
| 163 | s/(.+)/$b/; # this must taint only the $_ |
| 164 | |
| 165 | check_taint 54, $_; |
| 166 | check_taint_not 55, $&; |
| 167 | check_taint_not 56, $`; |
| 168 | check_taint_not 57, $'; |
| 169 | check_taint_not 58, $+; |
| 170 | check_taint_not 59, $1; |
| 171 | check_taint_not 60, $2; |
| 172 | |
| 173 | $_ = $a; # untaint $_ |
| 174 | |
| 175 | s/(.+)/b/; # this must not taint |
| 176 | check_taint_not 61, $_; |
| 177 | check_taint_not 62, $&; |
| 178 | check_taint_not 63, $`; |
| 179 | check_taint_not 64, $'; |
| 180 | check_taint_not 65, $+; |
| 181 | check_taint_not 66, $1; |
| 182 | check_taint_not 67, $2; |
| 183 | |
| 184 | $b = $a; # untaint $b |
| 185 | |
| 186 | ($b = $a) =~ s/\w/$&/; |
| 187 | check_taint 68, $b; # $b should be tainted. |
| 188 | check_taint_not 69, $a; # $a should be not. |
| 189 | |
| 190 | $_ = $a; # untaint $_ |
| 191 | |
| 192 | s/(\w)/\l$1/; # this must taint |
| 193 | check_taint 70, $_; |
| 194 | check_taint 71, $&; |
| 195 | check_taint 72, $`; |
| 196 | check_taint 73, $'; |
| 197 | check_taint 74, $+; |
| 198 | check_taint 75, $1; |
| 199 | check_taint_not 76, $2; |
| 200 | |
| 201 | $_ = $a; # untaint $_ |
| 202 | |
| 203 | s/(\w)/\L$1/; # this must taint |
| 204 | check_taint 77, $_; |
| 205 | check_taint 78, $&; |
| 206 | check_taint 79, $`; |
| 207 | check_taint 80, $'; |
| 208 | check_taint 81, $+; |
| 209 | check_taint 82, $1; |
| 210 | check_taint_not 83, $2; |
| 211 | |
| 212 | $_ = $a; # untaint $_ |
| 213 | |
| 214 | s/(\w)/\u$1/; # this must taint |
| 215 | check_taint 84, $_; |
| 216 | check_taint 85, $&; |
| 217 | check_taint 86, $`; |
| 218 | check_taint 87, $'; |
| 219 | check_taint 88, $+; |
| 220 | check_taint 89, $1; |
| 221 | check_taint_not 90, $2; |
| 222 | |
| 223 | $_ = $a; # untaint $_ |
| 224 | |
| 225 | s/(\w)/\U$1/; # this must taint |
| 226 | check_taint 91, $_; |
| 227 | check_taint 92, $&; |
| 228 | check_taint 93, $`; |
| 229 | check_taint 94, $'; |
| 230 | check_taint 95, $+; |
| 231 | check_taint 96, $1; |
| 232 | check_taint_not 97, $2; |
| 233 | |
| 234 | # After all this tainting $a should be cool. |
| 235 | |
| 236 | check_taint_not 98, $a; |
| 237 | |
| 238 | sub last_without_setlocale { 98 } |
| 239 | |
| 240 | # I think we've seen quite enough of taint. |
| 241 | # Let us do some *real* locale work now, |
| 242 | # unless setlocale() is missing (i.e. minitest). |
| 243 | |
| 244 | exit unless $have_setlocale; |
| 245 | |
| 246 | # Find locales. |
| 247 | |
| 248 | debug "# Scanning for locales...\n"; |
| 249 | |
| 250 | # Note that it's okay that some languages have their native names |
| 251 | # capitalized here even though that's not "right". They are lowercased |
| 252 | # anyway later during the scanning process (and besides, some clueless |
| 253 | # vendor might have them capitalized errorneously anyway). |
| 254 | |
| 255 | my $locales = <<EOF; |
| 256 | Afrikaans:af:za:1 15 |
| 257 | Arabic:ar:dz eg sa:6 arabic8 |
| 258 | Brezhoneg Breton:br:fr:1 15 |
| 259 | Bulgarski Bulgarian:bg:bg:5 |
| 260 | Chinese:zh:cn tw:cn.EUC eucCN eucTW euc.CN euc.TW Big5 GB2312 tw.EUC |
| 261 | Hrvatski Croatian:hr:hr:2 |
| 262 | Cymraeg Welsh:cy:cy:1 14 15 |
| 263 | Czech:cs:cz:2 |
| 264 | Dansk Danish:dk:da:1 15 |
| 265 | Nederlands Dutch:nl:be nl:1 15 |
| 266 | English American British:en:au ca gb ie nz us uk zw:1 15 cp850 |
| 267 | Esperanto:eo:eo:3 |
| 268 | Eesti Estonian:et:ee:4 6 13 |
| 269 | Suomi Finnish:fi:fi:1 15 |
| 270 | Flamish::fl:1 15 |
| 271 | Deutsch German:de:at be ch de lu:1 15 |
| 272 | Euskaraz Basque:eu:es fr:1 15 |
| 273 | Galego Galician:gl:es:1 15 |
| 274 | Ellada Greek:el:gr:7 g8 |
| 275 | Frysk:fy:nl:1 15 |
| 276 | Greenlandic:kl:gl:4 6 |
| 277 | Hebrew:iw:il:8 hebrew8 |
| 278 | Hungarian:hu:hu:2 |
| 279 | Indonesian:in:id:1 15 |
| 280 | Gaeilge Irish:ga:IE:1 14 15 |
| 281 | Italiano Italian:it:ch it:1 15 |
| 282 | Nihongo Japanese:ja:jp:euc eucJP jp.EUC sjis |
| 283 | Korean:ko:kr: |
| 284 | Latine Latin:la:va:1 15 |
| 285 | Latvian:lv:lv:4 6 13 |
| 286 | Lithuanian:lt:lt:4 6 13 |
| 287 | Macedonian:mk:mk:1 15 |
| 288 | Maltese:mt:mt:3 |
| 289 | Moldovan:mo:mo:2 |
| 290 | Norsk Norwegian:no no\@nynorsk:no:1 15 |
| 291 | Occitan:oc:es:1 15 |
| 292 | Polski Polish:pl:pl:2 |
| 293 | Rumanian:ro:ro:2 |
| 294 | Russki Russian:ru:ru su ua:5 koi8 koi8r KOI8-R koi8u cp1251 cp866 |
| 295 | Serbski Serbian:sr:yu:5 |
| 296 | Slovak:sk:sk:2 |
| 297 | Slovene Slovenian:sl:si:2 |
| 298 | Sqhip Albanian:sq:sq:1 15 |
| 299 | Svenska Swedish:sv:fi se:1 15 |
| 300 | Thai:th:th:11 tis620 |
| 301 | Turkish:tr:tr:9 turkish8 |
| 302 | Yiddish:yi::1 15 |
| 303 | EOF |
| 304 | |
| 305 | if ($^O eq 'os390') { |
| 306 | # These cause heartburn. Broken locales? |
| 307 | $locales =~ s/Svenska Swedish:sv:fi se:1 15\n//; |
| 308 | $locales =~ s/Thai:th:th:11 tis620\n//; |
| 309 | } |
| 310 | |
| 311 | sub in_utf8 () { $^H & 0x08 || (${^OPEN} || "") =~ /:utf8/ } |
| 312 | |
| 313 | if (in_utf8) { |
| 314 | require "lib/locale/utf8"; |
| 315 | } else { |
| 316 | require "lib/locale/latin1"; |
| 317 | } |
| 318 | |
| 319 | my @Locale; |
| 320 | my $Locale; |
| 321 | my @Alnum_; |
| 322 | |
| 323 | my @utf8locale; |
| 324 | my %utf8skip; |
| 325 | |
| 326 | sub getalnum_ { |
| 327 | sort grep /\w/, map { chr } 0..255 |
| 328 | } |
| 329 | |
| 330 | sub trylocale { |
| 331 | my $locale = shift; |
| 332 | if (setlocale(LC_ALL, $locale)) { |
| 333 | push @Locale, $locale; |
| 334 | } |
| 335 | } |
| 336 | |
| 337 | sub decode_encodings { |
| 338 | my @enc; |
| 339 | |
| 340 | foreach (split(/ /, shift)) { |
| 341 | if (/^(\d+)$/) { |
| 342 | push @enc, "ISO8859-$1"; |
| 343 | push @enc, "iso8859$1"; # HP |
| 344 | if ($1 eq '1') { |
| 345 | push @enc, "roman8"; # HP |
| 346 | } |
| 347 | } else { |
| 348 | push @enc, $_; |
| 349 | push @enc, "$_.UTF-8"; |
| 350 | } |
| 351 | } |
| 352 | if ($^O eq 'os390') { |
| 353 | push @enc, qw(IBM-037 IBM-819 IBM-1047); |
| 354 | } |
| 355 | |
| 356 | return @enc; |
| 357 | } |
| 358 | |
| 359 | trylocale("C"); |
| 360 | trylocale("POSIX"); |
| 361 | foreach (0..15) { |
| 362 | trylocale("ISO8859-$_"); |
| 363 | trylocale("iso8859$_"); |
| 364 | trylocale("iso8859-$_"); |
| 365 | trylocale("iso_8859_$_"); |
| 366 | trylocale("isolatin$_"); |
| 367 | trylocale("isolatin-$_"); |
| 368 | trylocale("iso_latin_$_"); |
| 369 | } |
| 370 | |
| 371 | # Sanitize the environment so that we can run the external 'locale' |
| 372 | # program without the taint mode getting grumpy. |
| 373 | |
| 374 | # $ENV{PATH} is special in VMS. |
| 375 | delete $ENV{PATH} if $^O ne 'VMS' or $Config{d_setenv}; |
| 376 | |
| 377 | # Other subversive stuff. |
| 378 | delete @ENV{qw(IFS CDPATH ENV BASH_ENV)}; |
| 379 | |
| 380 | if (-x "/usr/bin/locale" && open(LOCALES, "/usr/bin/locale -a 2>/dev/null|")) { |
| 381 | while (<LOCALES>) { |
| 382 | chomp; |
| 383 | trylocale($_); |
| 384 | } |
| 385 | close(LOCALES); |
| 386 | } elsif ($^O eq 'VMS' && defined($ENV{'SYS$I18N_LOCALE'}) && -d 'SYS$I18N_LOCALE') { |
| 387 | # The SYS$I18N_LOCALE logical name search list was not present on |
| 388 | # VAX VMS V5.5-12, but was on AXP && VAX VMS V6.2 as well as later versions. |
| 389 | opendir(LOCALES, "SYS\$I18N_LOCALE:"); |
| 390 | while ($_ = readdir(LOCALES)) { |
| 391 | chomp; |
| 392 | trylocale($_); |
| 393 | } |
| 394 | close(LOCALES); |
| 395 | } else { |
| 396 | |
| 397 | # This is going to be slow. |
| 398 | |
| 399 | foreach my $locale (split(/\n/, $locales)) { |
| 400 | my ($locale_name, $language_codes, $country_codes, $encodings) = |
| 401 | split(/:/, $locale); |
| 402 | my @enc = decode_encodings($encodings); |
| 403 | foreach my $loc (split(/ /, $locale_name)) { |
| 404 | trylocale($loc); |
| 405 | foreach my $enc (@enc) { |
| 406 | trylocale("$loc.$enc"); |
| 407 | } |
| 408 | $loc = lc $loc; |
| 409 | foreach my $enc (@enc) { |
| 410 | trylocale("$loc.$enc"); |
| 411 | } |
| 412 | } |
| 413 | foreach my $lang (split(/ /, $language_codes)) { |
| 414 | trylocale($lang); |
| 415 | foreach my $country (split(/ /, $country_codes)) { |
| 416 | my $lc = "${lang}_${country}"; |
| 417 | trylocale($lc); |
| 418 | foreach my $enc (@enc) { |
| 419 | trylocale("$lc.$enc"); |
| 420 | } |
| 421 | my $lC = "${lang}_\U${country}"; |
| 422 | trylocale($lC); |
| 423 | foreach my $enc (@enc) { |
| 424 | trylocale("$lC.$enc"); |
| 425 | } |
| 426 | } |
| 427 | } |
| 428 | } |
| 429 | } |
| 430 | |
| 431 | setlocale(LC_ALL, "C"); |
| 432 | |
| 433 | @Locale = sort @Locale; |
| 434 | |
| 435 | debug "# Locales = @Locale\n"; |
| 436 | |
| 437 | my %Problem; |
| 438 | my %Okay; |
| 439 | my %Testing; |
| 440 | my @Neoalpha; |
| 441 | my %Neoalpha; |
| 442 | |
| 443 | sub tryneoalpha { |
| 444 | my ($Locale, $i, $test) = @_; |
| 445 | unless ($test) { |
| 446 | $Problem{$i}{$Locale} = 1; |
| 447 | debug "# failed $i with locale '$Locale'\n"; |
| 448 | } else { |
| 449 | push @{$Okay{$i}}, $Locale; |
| 450 | } |
| 451 | } |
| 452 | |
| 453 | foreach $Locale (@Locale) { |
| 454 | debug "# Locale = $Locale\n"; |
| 455 | @Alnum_ = getalnum_(); |
| 456 | debug "# w = ", join("",@Alnum_), "\n"; |
| 457 | |
| 458 | unless (setlocale(LC_ALL, $Locale)) { |
| 459 | foreach (99..103) { |
| 460 | $Problem{$_}{$Locale} = -1; |
| 461 | } |
| 462 | next; |
| 463 | } |
| 464 | |
| 465 | # Sieve the uppercase and the lowercase. |
| 466 | |
| 467 | my %UPPER = (); |
| 468 | my %lower = (); |
| 469 | my %BoThCaSe = (); |
| 470 | for (@Alnum_) { |
| 471 | if (/[^\d_]/) { # skip digits and the _ |
| 472 | if (uc($_) eq $_) { |
| 473 | $UPPER{$_} = $_; |
| 474 | } |
| 475 | if (lc($_) eq $_) { |
| 476 | $lower{$_} = $_; |
| 477 | } |
| 478 | } |
| 479 | } |
| 480 | foreach (keys %UPPER) { |
| 481 | $BoThCaSe{$_}++ if exists $lower{$_}; |
| 482 | } |
| 483 | foreach (keys %lower) { |
| 484 | $BoThCaSe{$_}++ if exists $UPPER{$_}; |
| 485 | } |
| 486 | foreach (keys %BoThCaSe) { |
| 487 | delete $UPPER{$_}; |
| 488 | delete $lower{$_}; |
| 489 | } |
| 490 | |
| 491 | debug "# UPPER = ", join("", sort keys %UPPER ), "\n"; |
| 492 | debug "# lower = ", join("", sort keys %lower ), "\n"; |
| 493 | debug "# BoThCaSe = ", join("", sort keys %BoThCaSe), "\n"; |
| 494 | |
| 495 | # Find the alphabets that are not alphabets in the default locale. |
| 496 | |
| 497 | { |
| 498 | no locale; |
| 499 | |
| 500 | @Neoalpha = (); |
| 501 | for (keys %UPPER, keys %lower) { |
| 502 | push(@Neoalpha, $_) if (/\W/); |
| 503 | $Neoalpha{$_} = $_; |
| 504 | } |
| 505 | } |
| 506 | |
| 507 | @Neoalpha = sort @Neoalpha; |
| 508 | |
| 509 | debug "# Neoalpha = ", join("",@Neoalpha), "\n"; |
| 510 | |
| 511 | if (@Neoalpha == 0) { |
| 512 | # If we have no Neoalphas the remaining tests are no-ops. |
| 513 | debug "# no Neoalpha, skipping tests 99..102 for locale '$Locale'\n"; |
| 514 | foreach (99..102) { |
| 515 | push @{$Okay{$_}}, $Locale; |
| 516 | } |
| 517 | } else { |
| 518 | |
| 519 | # Test \w. |
| 520 | |
| 521 | my $word = join('', @Neoalpha); |
| 522 | |
| 523 | if ($Locale =~ /utf-?8/i) { |
| 524 | debug "# unknown whether locale and Unicode have the same \\w, skipping test 99 for locale '$Locale'\n"; |
| 525 | push @{$Okay{99}}, $Locale; |
| 526 | } else { |
| 527 | if ($word =~ /^(\w+)$/) { |
| 528 | tryneoalpha($Locale, 99, 1); |
| 529 | } else { |
| 530 | tryneoalpha($Locale, 99, 0); |
| 531 | } |
| 532 | } |
| 533 | |
| 534 | # Cross-check the whole 8-bit character set. |
| 535 | |
| 536 | for (map { chr } 0..255) { |
| 537 | tryneoalpha($Locale, 100, |
| 538 | (/\w/ xor /\W/) || |
| 539 | (/\d/ xor /\D/) || |
| 540 | (/\s/ xor /\S/)); |
| 541 | } |
| 542 | |
| 543 | # Test for read-only scalars' locale vs non-locale comparisons. |
| 544 | |
| 545 | { |
| 546 | no locale; |
| 547 | $a = "qwerty"; |
| 548 | { |
| 549 | use locale; |
| 550 | tryneoalpha($Locale, 101, ($a cmp "qwerty") == 0); |
| 551 | } |
| 552 | } |
| 553 | |
| 554 | { |
| 555 | my ($from, $to, $lesser, $greater, |
| 556 | @test, %test, $test, $yes, $no, $sign); |
| 557 | |
| 558 | for (0..9) { |
| 559 | # Select a slice. |
| 560 | $from = int(($_*@Alnum_)/10); |
| 561 | $to = $from + int(@Alnum_/10); |
| 562 | $to = $#Alnum_ if ($to > $#Alnum_); |
| 563 | $lesser = join('', @Alnum_[$from..$to]); |
| 564 | # Select a slice one character on. |
| 565 | $from++; $to++; |
| 566 | $to = $#Alnum_ if ($to > $#Alnum_); |
| 567 | $greater = join('', @Alnum_[$from..$to]); |
| 568 | ($yes, $no, $sign) = ($lesser lt $greater |
| 569 | ? (" ", "not ", 1) |
| 570 | : ("not ", " ", -1)); |
| 571 | # all these tests should FAIL (return 0). |
| 572 | # Exact lt or gt cannot be tested because |
| 573 | # in some locales, say, eacute and E may test equal. |
| 574 | @test = |
| 575 | ( |
| 576 | $no.' ($lesser le $greater)', # 1 |
| 577 | 'not ($lesser ne $greater)', # 2 |
| 578 | ' ($lesser eq $greater)', # 3 |
| 579 | $yes.' ($lesser ge $greater)', # 4 |
| 580 | $yes.' ($lesser ge $greater)', # 5 |
| 581 | $yes.' ($greater le $lesser )', # 7 |
| 582 | 'not ($greater ne $lesser )', # 8 |
| 583 | ' ($greater eq $lesser )', # 9 |
| 584 | $no.' ($greater ge $lesser )', # 10 |
| 585 | 'not (($lesser cmp $greater) == -($sign))' # 11 |
| 586 | ); |
| 587 | @test{@test} = 0 x @test; |
| 588 | $test = 0; |
| 589 | for my $ti (@test) { |
| 590 | $test{$ti} = eval $ti; |
| 591 | $test ||= $test{$ti} |
| 592 | } |
| 593 | tryneoalpha($Locale, 102, $test == 0); |
| 594 | if ($test) { |
| 595 | debug "# lesser = '$lesser'\n"; |
| 596 | debug "# greater = '$greater'\n"; |
| 597 | debug "# lesser cmp greater = ", |
| 598 | $lesser cmp $greater, "\n"; |
| 599 | debug "# greater cmp lesser = ", |
| 600 | $greater cmp $lesser, "\n"; |
| 601 | debug "# (greater) from = $from, to = $to\n"; |
| 602 | for my $ti (@test) { |
| 603 | debugf("# %-40s %-4s", $ti, |
| 604 | $test{$ti} ? 'FAIL' : 'ok'); |
| 605 | if ($ti =~ /\(\.*(\$.+ +cmp +\$[^\)]+)\.*\)/) { |
| 606 | debugf("(%s == %4d)", $1, eval $1); |
| 607 | } |
| 608 | debug "\n#"; |
| 609 | } |
| 610 | |
| 611 | last; |
| 612 | } |
| 613 | } |
| 614 | } |
| 615 | } |
| 616 | |
| 617 | use locale; |
| 618 | |
| 619 | my ($x, $y) = (1.23, 1.23); |
| 620 | |
| 621 | $a = "$x"; |
| 622 | printf ''; # printf used to reset locale to "C" |
| 623 | $b = "$y"; |
| 624 | |
| 625 | debug "# 103..107: a = $a, b = $b, Locale = $Locale\n"; |
| 626 | |
| 627 | tryneoalpha($Locale, 103, $a eq $b); |
| 628 | |
| 629 | my $c = "$x"; |
| 630 | my $z = sprintf ''; # sprintf used to reset locale to "C" |
| 631 | my $d = "$y"; |
| 632 | |
| 633 | debug "# 104..107: c = $c, d = $d, Locale = $Locale\n"; |
| 634 | |
| 635 | tryneoalpha($Locale, 104, $c eq $d); |
| 636 | |
| 637 | { |
| 638 | use warnings; |
| 639 | my $w = 0; |
| 640 | local $SIG{__WARN__} = |
| 641 | sub { |
| 642 | print "# @_\n"; |
| 643 | $w++; |
| 644 | }; |
| 645 | |
| 646 | # The == (among other ops) used to warn for locales |
| 647 | # that had something else than "." as the radix character. |
| 648 | |
| 649 | tryneoalpha($Locale, 105, $c == 1.23); |
| 650 | |
| 651 | tryneoalpha($Locale, 106, $c == $x); |
| 652 | |
| 653 | tryneoalpha($Locale, 107, $c == $d); |
| 654 | |
| 655 | { |
| 656 | # no locale; # XXX did this ever work correctly? |
| 657 | |
| 658 | my $e = "$x"; |
| 659 | |
| 660 | debug "# 108..110: e = $e, Locale = $Locale\n"; |
| 661 | |
| 662 | tryneoalpha($Locale, 108, $e == 1.23); |
| 663 | |
| 664 | tryneoalpha($Locale, 109, $e == $x); |
| 665 | |
| 666 | tryneoalpha($Locale, 110, $e == $c); |
| 667 | } |
| 668 | |
| 669 | my $f = "1.23"; |
| 670 | my $g = 2.34; |
| 671 | |
| 672 | debug "# 111..115: f = $f, g = $g, locale = $Locale\n"; |
| 673 | |
| 674 | tryneoalpha($Locale, 111, $f == 1.23); |
| 675 | |
| 676 | tryneoalpha($Locale, 112, $f == $x); |
| 677 | |
| 678 | tryneoalpha($Locale, 113, $f == $c); |
| 679 | |
| 680 | tryneoalpha($Locale, 114, abs(($f + $g) - 3.57) < 0.01); |
| 681 | |
| 682 | tryneoalpha($Locale, 115, $w == 0); |
| 683 | } |
| 684 | |
| 685 | # Does taking lc separately differ from taking |
| 686 | # the lc "in-line"? (This was the bug 19990704.002, change #3568.) |
| 687 | # The bug was in the caching of the 'o'-magic. |
| 688 | { |
| 689 | use locale; |
| 690 | |
| 691 | sub lcA { |
| 692 | my $lc0 = lc $_[0]; |
| 693 | my $lc1 = lc $_[1]; |
| 694 | return $lc0 cmp $lc1; |
| 695 | } |
| 696 | |
| 697 | sub lcB { |
| 698 | return lc($_[0]) cmp lc($_[1]); |
| 699 | } |
| 700 | |
| 701 | my $x = "ab"; |
| 702 | my $y = "aa"; |
| 703 | my $z = "AB"; |
| 704 | |
| 705 | tryneoalpha($Locale, 116, |
| 706 | lcA($x, $y) == 1 && lcB($x, $y) == 1 || |
| 707 | lcA($x, $z) == 0 && lcB($x, $z) == 0); |
| 708 | } |
| 709 | |
| 710 | # Does lc of an UPPER (if different from the UPPER) match |
| 711 | # case-insensitively the UPPER, and does the UPPER match |
| 712 | # case-insensitively the lc of the UPPER. And vice versa. |
| 713 | { |
| 714 | use locale; |
| 715 | no utf8; |
| 716 | my $re = qr/[\[\(\{\*\+\?\|\^\$\\]/; |
| 717 | |
| 718 | my @f = (); |
| 719 | foreach my $x (keys %UPPER) { |
| 720 | my $y = lc $x; |
| 721 | next unless uc $y eq $x; |
| 722 | print "# UPPER $x lc $y ", |
| 723 | $x =~ /$y/i ? 1 : 0, " ", |
| 724 | $y =~ /$x/i ? 1 : 0, "\n" if 0; |
| 725 | # If $x and $y contain regular expression characters |
| 726 | # AND THEY lowercase (/i) to regular expression characters, |
| 727 | # regcomp() will be mightily confused. No, the \Q doesn't |
| 728 | # help here (maybe regex engine internal lowercasing |
| 729 | # is done after the \Q?) An example of this happening is |
| 730 | # the bg_BG (Bulgarian) locale under EBCDIC (OS/390 USS): |
| 731 | # the chr(173) (the "[") is the lowercase of the chr(235). |
| 732 | # Similarly losing EBCDIC locales include cs_cz, cs_CZ, |
| 733 | # el_gr, el_GR, en_us.IBM-037 (!), en_US.IBM-037 (!), |
| 734 | # et_ee, et_EE, hr_hr, hr_HR, hu_hu, hu_HU, lt_LT, |
| 735 | # mk_mk, mk_MK, nl_nl.IBM-037, nl_NL.IBM-037, |
| 736 | # pl_pl, pl_PL, ro_ro, ro_RO, ru_ru, ru_RU, |
| 737 | # sk_sk, sk_SK, sl_si, sl_SI, tr_tr, tr_TR. |
| 738 | if ($x =~ $re || $y =~ $re) { |
| 739 | print "# Regex characters in '$x' or '$y', skipping test 117 for locale '$Locale'\n"; |
| 740 | next; |
| 741 | } |
| 742 | # With utf8 both will fail since the locale concept |
| 743 | # of upper/lower does not work well in Unicode. |
| 744 | push @f, $x unless $x =~ /$y/i == $y =~ /$x/i; |
| 745 | |
| 746 | foreach my $x (keys %lower) { |
| 747 | my $y = uc $x; |
| 748 | next unless lc $y eq $x; |
| 749 | print "# lower $x uc $y ", |
| 750 | $x =~ /$y/i ? 1 : 0, " ", |
| 751 | $y =~ /$x/i ? 1 : 0, "\n" if 0; |
| 752 | if ($x =~ $re || $y =~ $re) { # See above. |
| 753 | print "# Regex characters in '$x' or '$y', skipping test 117 for locale '$Locale'\n"; |
| 754 | next; |
| 755 | } |
| 756 | # With utf8 both will fail since the locale concept |
| 757 | # of upper/lower does not work well in Unicode. |
| 758 | push @f, $x unless $x =~ /$y/i == $y =~ /$x/i; |
| 759 | } |
| 760 | tryneoalpha($Locale, 117, @f == 0); |
| 761 | if (@f) { |
| 762 | print "# failed 117 locale '$Locale' characters @f\n" |
| 763 | } |
| 764 | } |
| 765 | } |
| 766 | } |
| 767 | |
| 768 | # Recount the errors. |
| 769 | |
| 770 | foreach (&last_without_setlocale()+1..$last) { |
| 771 | if ($Problem{$_} || !defined $Okay{$_} || !@{$Okay{$_}}) { |
| 772 | if ($_ == 102) { |
| 773 | print "# The failure of test 102 is not necessarily fatal.\n"; |
| 774 | print "# It usually indicates a problem in the environment,\n"; |
| 775 | print "# not in Perl itself.\n"; |
| 776 | } |
| 777 | print "not "; |
| 778 | } |
| 779 | print "ok $_\n"; |
| 780 | } |
| 781 | |
| 782 | # Give final advice. |
| 783 | |
| 784 | my $didwarn = 0; |
| 785 | |
| 786 | foreach (99..$last) { |
| 787 | if ($Problem{$_}) { |
| 788 | my @f = sort keys %{ $Problem{$_} }; |
| 789 | my $f = join(" ", @f); |
| 790 | $f =~ s/(.{50,60}) /$1\n#\t/g; |
| 791 | print |
| 792 | "#\n", |
| 793 | "# The locale ", (@f == 1 ? "definition" : "definitions"), "\n#\n", |
| 794 | "#\t", $f, "\n#\n", |
| 795 | "# on your system may have errors because the locale test $_\n", |
| 796 | "# failed in ", (@f == 1 ? "that locale" : "those locales"), |
| 797 | ".\n"; |
| 798 | print <<EOW; |
| 799 | # |
| 800 | # If your users are not using these locales you are safe for the moment, |
| 801 | # but please report this failure first to perlbug\@perl.com using the |
| 802 | # perlbug script (as described in the INSTALL file) so that the exact |
| 803 | # details of the failures can be sorted out first and then your operating |
| 804 | # system supplier can be alerted about these anomalies. |
| 805 | # |
| 806 | EOW |
| 807 | $didwarn = 1; |
| 808 | } |
| 809 | } |
| 810 | |
| 811 | # Tell which locales were okay and which were not. |
| 812 | |
| 813 | if ($didwarn) { |
| 814 | my (@s, @F); |
| 815 | |
| 816 | foreach my $l (@Locale) { |
| 817 | my $p = 0; |
| 818 | foreach my $t (102..$last) { |
| 819 | $p++ if $Problem{$t}{$l}; |
| 820 | } |
| 821 | push @s, $l if $p == 0; |
| 822 | push @F, $l unless $p == 0; |
| 823 | } |
| 824 | |
| 825 | if (@s) { |
| 826 | my $s = join(" ", @s); |
| 827 | $s =~ s/(.{50,60}) /$1\n#\t/g; |
| 828 | |
| 829 | warn |
| 830 | "# The following locales\n#\n", |
| 831 | "#\t", $s, "\n#\n", |
| 832 | "# tested okay.\n#\n", |
| 833 | } else { |
| 834 | warn "# None of your locales were fully okay.\n"; |
| 835 | } |
| 836 | |
| 837 | if (@F) { |
| 838 | my $F = join(" ", @F); |
| 839 | $F =~ s/(.{50,60}) /$1\n#\t/g; |
| 840 | |
| 841 | warn |
| 842 | "# The following locales\n#\n", |
| 843 | "#\t", $F, "\n#\n", |
| 844 | "# had problems.\n#\n", |
| 845 | } else { |
| 846 | warn "# None of your locales were broken.\n"; |
| 847 | } |
| 848 | |
| 849 | if (@utf8locale) { |
| 850 | my $S = join(" ", @utf8locale); |
| 851 | $S =~ s/(.{50,60}) /$1\n#\t/g; |
| 852 | |
| 853 | warn "#\n# The following locales\n#\n", |
| 854 | "#\t", $S, "\n#\n", |
| 855 | "# were skipped for the tests ", |
| 856 | join(" ", sort {$a<=>$b} keys %utf8skip), "\n", |
| 857 | "# because UTF-8 and locales do not work together in Perl.\n#\n"; |
| 858 | } |
| 859 | } |
| 860 | |
| 861 | sub last { 117 } |
| 862 | |
| 863 | # eof |