lib/locale.t

   1 #!./perl -wT
   2
   3 # This tests plain 'use locale' and adorned 'use locale ":not_characters"'
   4 # Because these pragmas are compile time, and I (khw) am trying to test
   5 # without using 'eval' as much as possible, which might cloud the issue,  the
   6 # crucial parts of the code are duplicated in a block for each pragma.
   7
   8 # To make a TODO test, add the string 'TODO' to its %test_names value
   9
  10 my $is_ebcdic = ord("A") == 193;
  11
  12 no warnings 'locale';  # We test even weird locales; and do some scary things
  13                        # in ok locales
  14
  15 binmode STDOUT, ':utf8';
  16 binmode STDERR, ':utf8';
  17
  18 BEGIN {
  19     chdir 't' if -d 't';
  20     @INC = '../lib';
  21     unshift @INC, '.';
  22     require './loc_tools.pl';
  23     unless (locales_enabled('LC_CTYPE')) {
  24         print "1..0\n";
  25         exit;
  26     }
  27     $| = 1;
  28     require Config; import Config;
  29 }
  30
  31 use strict;
  32 use feature 'fc', 'postderef';
  33
  34 # =1 adds debugging output; =2 increases the verbosity somewhat
  35 my $debug = $ENV{PERL_DEBUG_FULL_TEST} // 0;
  36
  37 # Certain tests have been shown to be problematical for a few locales.  Don't
  38 # fail them unless at least this percentage of the tested locales fail.
  39 # On AIX machines, many locales call a no-break space a graphic.
  40 # (There aren't 1000 locales currently in existence, so 99.9 works)
  41 my $acceptable_failure_percentage = ($^O =~ / ^ ( AIX ) $ /ix)
  42                                      ? 99.9
  43                                      : 5;
  44
  45 # The list of test numbers of the problematic tests.
  46 my %problematical_tests;
  47
  48 # If any %problematical_tests fails in one of these locales, it is
  49 # considered a TODO.
  50 my %known_bad_locales = (
  51                           irix => qr/ ^ (?: cs | hu | sk ) $/x,
  52                           darwin => qr/ ^ lt_LT.ISO8859 /ix,
  53                           os390 => qr/ ^ italian /ix,
  54                         );
  55
  56 use Dumpvalue;
  57
  58 my $dumper = Dumpvalue->new(
  59                             tick => qq{"},
  60                             quoteHighBit => 0,
  61                             unctrl => "quote"
  62                            );
  63 sub debug {
  64   return unless $debug;
  65   my($mess) = join "", '# ', @_;
  66   chomp $mess;
  67   print STDERR $dumper->stringify($mess,1), "\n";
  68 }
  69
  70 sub debug_more {
  71   return unless $debug > 1;
  72   return debug(@_);
  73 }
  74
  75 sub debugf {
  76     printf STDERR @_ if $debug;
  77 }
  78
  79 $a = 'abc %9';
  80
  81 my $test_num = 0;
  82
  83 sub ok {
  84     my ($result, $message) = @_;
  85     $message = "" unless defined $message;
  86
  87     print 'not ' unless ($result);
  88     print "ok " . ++$test_num;
  89     print " $message";
  90     print "\n";
  91     return ($result) ? 1 : 0;
  92 }
  93
  94 # First we'll do a lot of taint checking for locales.
  95 # This is the easiest to test, actually, as any locale,
  96 # even the default locale will taint under 'use locale'.
  97
  98 sub is_tainted { # hello, camel two.
  99     no warnings 'uninitialized' ;
 100     my $dummy;
 101     local $@;
 102     not eval { $dummy = join("", @_), kill 0; 1 }
 103 }
 104
 105 sub check_taint ($;$) {
 106     my $message_tail = $_[1] // "";
 107
 108     # Extra blanks are so aligns with taint_not output
 109     $message_tail = ":     $message_tail" if $message_tail;
 110     ok is_tainted($_[0]), "verify that is tainted$message_tail";
 111 }
 112
 113 sub check_taint_not ($;$) {
 114     my $message_tail = $_[1] // "";
 115     $message_tail = ":  $message_tail" if $message_tail;
 116     ok((not is_tainted($_[0])), "verify that isn't tainted$message_tail");
 117 }
 118
 119 foreach my $category (qw(ALL COLLATE CTYPE MESSAGES MONETARY NUMERIC TIME)) {
 120     my $short_result = locales_enabled($category);
 121     ok ($short_result == 0 || $short_result == 1,
 122         "Verify locales_enabled('$category') returns 0 or 1");
 123     debug("locales_enabled('$category') returned '$short_result'");
 124     my $long_result = locales_enabled("LC_$category");
 125     if (! ok ($long_result == $short_result,
 126               "   and locales_enabled('LC_$category') returns "
 127             . "the same value")
 128     ) {
 129         debug("locales_enabled('LC_$category') returned $long_result");
 130     }
 131 }
 132
 133 "\tb\t" =~ /^m?(\s)(.*)\1$/;
 134 check_taint_not   $&, "not tainted outside 'use locale'";
 135 ;
 136
 137 use locale;     # engage locale and therefore locale taint.
 138
 139 # BE SURE TO COPY ANYTHING YOU ADD to these tests to the block below for
 140 # ":notcharacters"
 141
 142 check_taint_not   $a, '$a';
 143
 144 check_taint       uc($a), 'uc($a)';
 145 check_taint       "\U$a", '"\U$a"';
 146 check_taint       ucfirst($a), 'ucfirst($a)';
 147 check_taint       "\u$a", '"\u$a"';
 148 check_taint       lc($a), 'lc($a)';
 149 check_taint       fc($a), 'fc($a)';
 150 check_taint       "\L$a", '"\L$a"';
 151 check_taint       "\F$a", '"\F$a"';
 152 check_taint       lcfirst($a), 'lcfirst($a)';
 153 check_taint       "\l$a", '"\l$a"';
 154
 155 check_taint_not  sprintf('%e', 123.456), "sprintf('%e', 123.456)";
 156 check_taint_not  sprintf('%f', 123.456), "sprintf('%f', 123.456)";
 157 check_taint_not  sprintf('%g', 123.456), "sprintf('%g', 123.456)";
 158 check_taint_not  sprintf('%d', 123.456), "sprintf('%d', 123.456)";
 159 check_taint_not  sprintf('%x', 123.456), "sprintf('%x', 123.456)";
 160
 161 $_ = $a;        # untaint $_
 162
 163 $_ = uc($a);    # taint $_
 164
 165 check_taint      $_, '$_ = uc($a)';
 166
 167 /(\w)/; # taint $&, $`, $', $+, $1.
 168 check_taint      $&, "\$& from /(\\w)/";
 169 check_taint      $`, "\t\$`";
 170 check_taint      $', "\t\$'";
 171 check_taint      $+, "\t\$+";
 172 check_taint      $1, "\t\$1";
 173 check_taint_not  $2, "\t\$2";
 174
 175 /(.)/;  # untaint $&, $`, $', $+, $1.
 176 check_taint_not  $&, "\$& from /(.)/";
 177 check_taint_not  $`, "\t\$`";
 178 check_taint_not  $', "\t\$'";
 179 check_taint_not  $+, "\t\$+";
 180 check_taint_not  $1, "\t\$1";
 181 check_taint_not  $2, "\t\$2";
 182
 183 /(\W)/; # taint $&, $`, $', $+, $1.
 184 check_taint      $&, "\$& from /(\\W)/";
 185 check_taint      $`, "\t\$`";
 186 check_taint      $', "\t\$'";
 187 check_taint      $+, "\t\$+";
 188 check_taint      $1, "\t\$1";
 189 check_taint_not  $2, "\t\$2";
 190
 191 /(.)/;  # untaint $&, $`, $', $+, $1.
 192 check_taint_not  $&, "\$& from /(.)/";
 193 check_taint_not  $`, "\t\$`";
 194 check_taint_not  $', "\t\$'";
 195 check_taint_not  $+, "\t\$+";
 196 check_taint_not  $1, "\t\$1";
 197 check_taint_not  $2, "\t\$2";
 198
 199 /(\s)/; # taint $&, $`, $', $+, $1.
 200 check_taint      $&, "\$& from /(\\s)/";
 201 check_taint      $`, "\t\$`";
 202 check_taint      $', "\t\$'";
 203 check_taint      $+, "\t\$+";
 204 check_taint      $1, "\t\$1";
 205 check_taint_not  $2, "\t\$2";
 206
 207 /(.)/;  # untaint $&, $`, $', $+, $1.
 208 check_taint_not  $&, "\$& from /(.)/";
 209
 210 /(\S)/; # taint $&, $`, $', $+, $1.
 211 check_taint      $&, "\$& from /(\\S)/";
 212 check_taint      $`, "\t\$`";
 213 check_taint      $', "\t\$'";
 214 check_taint      $+, "\t\$+";
 215 check_taint      $1, "\t\$1";
 216 check_taint_not  $2, "\t\$2";
 217
 218 /(.)/;  # untaint $&, $`, $', $+, $1.
 219 check_taint_not  $&, "\$& from /(.)/";
 220
 221 "0" =~ /(\d)/;  # taint $&, $`, $', $+, $1.
 222 check_taint      $&, "\$& from /(\\d)/";
 223 check_taint      $`, "\t\$`";
 224 check_taint      $', "\t\$'";
 225 check_taint      $+, "\t\$+";
 226 check_taint      $1, "\t\$1";
 227 check_taint_not  $2, "\t\$2";
 228
 229 /(.)/;  # untaint $&, $`, $', $+, $1.
 230 check_taint_not  $&, "\$& from /(.)/";
 231
 232 /(\D)/; # taint $&, $`, $', $+, $1.
 233 check_taint      $&, "\$& from /(\\D)/";
 234 check_taint      $`, "\t\$`";
 235 check_taint      $', "\t\$'";
 236 check_taint      $+, "\t\$+";
 237 check_taint      $1, "\t\$1";
 238 check_taint_not  $2, "\t\$2";
 239
 240 /(.)/;  # untaint $&, $`, $', $+, $1.
 241 check_taint_not  $&, "\$& from /(.)/";
 242
 243 /([[:alnum:]])/;        # taint $&, $`, $', $+, $1.
 244 check_taint      $&, "\$& from /([[:alnum:]])/";
 245 check_taint      $`, "\t\$`";
 246 check_taint      $', "\t\$'";
 247 check_taint      $+, "\t\$+";
 248 check_taint      $1, "\t\$1";
 249 check_taint_not  $2, "\t\$2";
 250
 251 /(.)/;  # untaint $&, $`, $', $+, $1.
 252 check_taint_not  $&, "\$& from /(.)/";
 253
 254 /([[:^alnum:]])/;       # taint $&, $`, $', $+, $1.
 255 check_taint      $&, "\$& from /([[:^alnum:]])/";
 256 check_taint      $`, "\t\$`";
 257 check_taint      $', "\t\$'";
 258 check_taint      $+, "\t\$+";
 259 check_taint      $1, "\t\$1";
 260 check_taint_not  $2, "\t\$2";
 261
 262 "a" =~ /(a)|(\w)/;      # taint $&, $`, $', $+, $1.
 263 check_taint      $&, "\$& from /(a)|(\\w)/";
 264 check_taint      $`, "\t\$`";
 265 check_taint      $', "\t\$'";
 266 check_taint      $+, "\t\$+";
 267 check_taint      $1, "\t\$1";
 268 ok($1 eq 'a', ("\t" x 5) . "\$1 is 'a'");
 269 ok(! defined $2, ("\t" x 5) . "\$2 is undefined");
 270 check_taint_not  $2, "\t\$2";
 271 check_taint_not  $3, "\t\$3";
 272
 273 /(.)/;  # untaint $&, $`, $', $+, $1.
 274 check_taint_not  $&, "\$& from /(.)/";
 275
 276 "\N{CYRILLIC SMALL LETTER A}" =~ /(\N{CYRILLIC CAPITAL LETTER A})/i;    # no tainting because no locale dependence
 277 check_taint_not      $&, "\$& from /(\\N{CYRILLIC CAPITAL LETTER A})/i";
 278 check_taint_not      $`, "\t\$`";
 279 check_taint_not      $', "\t\$'";
 280 check_taint_not      $+, "\t\$+";
 281 check_taint_not      $1, "\t\$1";
 282 ok($1 eq "\N{CYRILLIC SMALL LETTER A}", ("\t" x 4) . "\t\$1 is 'small cyrillic a'");
 283 check_taint_not      $2, "\t\$2";
 284
 285 /(.)/;  # untaint $&, $`, $', $+, $1.
 286 check_taint_not  $&, "\$& from /./";
 287
 288 "(\N{KELVIN SIGN})" =~ /(\N{KELVIN SIGN})/i;    # taints because depends on locale
 289 check_taint      $&, "\$& from /(\\N{KELVIN SIGN})/i";
 290 check_taint      $`, "\t\$`";
 291 check_taint      $', "\t\$'";
 292 check_taint      $+, "\t\$+";
 293 check_taint      $1, "\t\$1";
 294 check_taint_not      $2, "\t\$2";
 295
 296 /(.)/;  # untaint $&, $`, $', $+, $1.
 297 check_taint_not  $&, "\$& from /(.)/";
 298
 299 "a:" =~ /(.)\b(.)/;     # taint $&, $`, $', $+, $1.
 300 check_taint      $&, "\$& from /(.)\\b(.)/";
 301 check_taint      $`, "\t\$`";
 302 check_taint      $', "\t\$'";
 303 check_taint      $+, "\t\$+";
 304 check_taint      $1, "\t\$1";
 305 check_taint      $2, "\t\$2";
 306 check_taint_not  $3, "\t\$3";
 307
 308 /(.)/;  # untaint $&, $`, $', $+, $1.
 309 check_taint_not  $&, "\$& from /./";
 310
 311 "aa" =~ /(.)\B(.)/;     # taint $&, $`, $', $+, $1.
 312 check_taint      $&, "\$& from /(.)\\B(.)/";
 313 check_taint      $`, "\t\$`";
 314 check_taint      $', "\t\$'";
 315 check_taint      $+, "\t\$+";
 316 check_taint      $1, "\t\$1";
 317 check_taint      $2, "\t\$2";
 318 check_taint_not  $3, "\t\$3";
 319
 320 /(.)/;  # untaint $&, $`, $', $+, $1.
 321 check_taint_not  $&, "\$& from /./";
 322
 323 "aaa" =~ /(.).(\1)/i;   # notaint because not locale dependent
 324 check_taint_not      $&, "\$ & from /(.).(\\1)/";
 325 check_taint_not      $`, "\t\$`";
 326 check_taint_not      $', "\t\$'";
 327 check_taint_not      $+, "\t\$+";
 328 check_taint_not      $1, "\t\$1";
 329 check_taint_not      $2, "\t\$2";
 330 check_taint_not      $3, "\t\$3";
 331
 332 /(.)/;  # untaint $&, $`, $', $+, $1.
 333 check_taint_not  $&, "\$ & from /./";
 334
 335 $_ = $a;        # untaint $_
 336
 337 check_taint_not  $_, 'untainting $_ works';
 338
 339 /(b)/;          # this must not taint
 340 check_taint_not  $&, "\$ & from /(b)/";
 341 check_taint_not  $`, "\t\$`";
 342 check_taint_not  $', "\t\$'";
 343 check_taint_not  $+, "\t\$+";
 344 check_taint_not  $1, "\t\$1";
 345 check_taint_not  $2, "\t\$2";
 346
 347 $_ = $a;        # untaint $_
 348
 349 check_taint_not  $_, 'untainting $_ works';
 350
 351 $b = uc($a);    # taint $b
 352 s/(.+)/$b/;     # this must taint only the $_
 353
 354 check_taint      $_, '$_ (wasn\'t tainted) from s/(.+)/$b/ where $b is tainted';
 355 check_taint_not  $&, "\t\$&";
 356 check_taint_not  $`, "\t\$`";
 357 check_taint_not  $', "\t\$'";
 358 check_taint_not  $+, "\t\$+";
 359 check_taint_not  $1, "\t\$1";
 360 check_taint_not  $2, "\t\$2";
 361
 362 $_ = $a;        # untaint $_
 363
 364 s/(.+)/b/;      # this must not taint
 365 check_taint_not  $_, '$_ (wasn\'t tainted) from s/(.+)/b/';
 366 check_taint_not  $&, "\t\$&";
 367 check_taint_not  $`, "\t\$`";
 368 check_taint_not  $', "\t\$'";
 369 check_taint_not  $+, "\t\$+";
 370 check_taint_not  $1, "\t\$1";
 371 check_taint_not  $2, "\t\$2";
 372
 373 $b = $a;        # untaint $b
 374
 375 ($b = $a) =~ s/\w/$&/;
 376 check_taint      $b, '$b from ($b = $a) =~ s/\w/$&/';   # $b should be tainted.
 377 check_taint_not  $a, '$a from ($b = $a) =~ s/\w/$&/';   # $a should be not.
 378
 379 $_ = $a;        # untaint $_
 380
 381 s/(\w)/\l$1/;   # this must taint
 382 check_taint      $_, '$_ (wasn\'t tainted) from s/(\w)/\l$1/,'; # this must taint
 383 check_taint      $&, "\t\$&";
 384 check_taint      $`, "\t\$`";
 385 check_taint      $', "\t\$'";
 386 check_taint      $+, "\t\$+";
 387 check_taint      $1, "\t\$1";
 388 check_taint_not  $2, "\t\$2";
 389
 390 $_ = $a;        # untaint $_
 391
 392 s/(\w)/\L$1/;   # this must taint
 393 check_taint      $_, '$_ (wasn\'t tainted) from s/(\w)/\L$1/,';
 394 check_taint      $&, "\t\$&";
 395 check_taint      $`, "\t\$`";
 396 check_taint      $', "\t\$'";
 397 check_taint      $+, "\t\$+";
 398 check_taint      $1, "\t\$1";
 399 check_taint_not  $2, "\t\$2";
 400
 401 $_ = $a;        # untaint $_
 402
 403 s/(\w)/\u$1/;   # this must taint
 404 check_taint      $_, '$_ (wasn\'t tainted) from s/(\w)/\u$1/';
 405 check_taint      $&, "\t\$&";
 406 check_taint      $`, "\t\$`";
 407 check_taint      $', "\t\$'";
 408 check_taint      $+, "\t\$+";
 409 check_taint      $1, "\t\$1";
 410 check_taint_not  $2, "\t\$2";
 411
 412 $_ = $a;        # untaint $_
 413
 414 s/(\w)/\U$1/;   # this must taint
 415 check_taint      $_, '$_ (wasn\'t tainted) from s/(\w)/\U$1/';
 416 check_taint      $&, "\t\$&";
 417 check_taint      $`, "\t\$`";
 418 check_taint      $', "\t\$'";
 419 check_taint      $+, "\t\$+";
 420 check_taint      $1, "\t\$1";
 421 check_taint_not  $2, "\t\$2";
 422
 423 # After all this tainting $a should be cool.
 424
 425 check_taint_not  $a, '$a still not tainted';
 426
 427 "a" =~ /([a-z])/;
 428 check_taint_not $1, '"a" =~ /([a-z])/';
 429 "foo.bar_baz" =~ /^(.*)[._](.*?)$/;  # Bug 120675
 430 check_taint_not $1, '"foo.bar_baz" =~ /^(.*)[._](.*?)$/';
 431
 432 # BE SURE TO COPY ANYTHING YOU ADD to the block below
 433
 434 {   # This is just the previous tests copied here with a different
 435     # compile-time pragma.
 436
 437     use locale ':not_characters'; # engage restricted locale with different
 438                                   # tainting rules
 439     check_taint_not   $a, '$a';
 440
 441     check_taint_not   uc($a), 'uc($a)';
 442     check_taint_not   "\U$a", '"\U$a"';
 443     check_taint_not   ucfirst($a), 'ucfirst($a)';
 444     check_taint_not   "\u$a", '"\u$a"';
 445     check_taint_not   lc($a), 'lc($a)';
 446     check_taint_not   fc($a), 'fc($a)';
 447     check_taint_not   "\L$a", '"\L$a"';
 448     check_taint_not   "\F$a", '"\F$a"';
 449     check_taint_not   lcfirst($a), 'lcfirst($a)';
 450     check_taint_not   "\l$a", '"\l$a"';
 451
 452     check_taint_not  sprintf('%e', 123.456), "sprintf('%e', 123.456)";
 453     check_taint_not  sprintf('%f', 123.456), "sprintf('%f', 123.456)";
 454     check_taint_not  sprintf('%g', 123.456), "sprintf('%g', 123.456)";
 455     check_taint_not  sprintf('%d', 123.456), "sprintf('%d', 123.456)";
 456     check_taint_not  sprintf('%x', 123.456), "sprintf('%x', 123.456)";
 457
 458     $_ = $a;    # untaint $_
 459
 460     $_ = uc($a);
 461
 462     check_taint_not  $_, '$_ = uc($a)';
 463
 464     /(\w)/;
 465     check_taint_not  $&, "\$& from /(\\w)/";
 466     check_taint_not  $`, "\t\$`";
 467     check_taint_not  $', "\t\$'";
 468     check_taint_not  $+, "\t\$+";
 469     check_taint_not  $1, "\t\$1";
 470     check_taint_not  $2, "\t\$2";
 471
 472     /(.)/;      # untaint $&, $`, $', $+, $1.
 473     check_taint_not  $&, "\$& from /(.)/";
 474     check_taint_not  $`, "\t\$`";
 475     check_taint_not  $', "\t\$'";
 476     check_taint_not  $+, "\t\$+";
 477     check_taint_not  $1, "\t\$1";
 478     check_taint_not  $2, "\t\$2";
 479
 480     /(\W)/;
 481     check_taint_not  $&, "\$& from /(\\W)/";
 482     check_taint_not  $`, "\t\$`";
 483     check_taint_not  $', "\t\$'";
 484     check_taint_not  $+, "\t\$+";
 485     check_taint_not  $1, "\t\$1";
 486     check_taint_not  $2, "\t\$2";
 487
 488     /(.)/;      # untaint $&, $`, $', $+, $1.
 489     check_taint_not  $&, "\$& from /(.)/";
 490     check_taint_not  $`, "\t\$`";
 491     check_taint_not  $', "\t\$'";
 492     check_taint_not  $+, "\t\$+";
 493     check_taint_not  $1, "\t\$1";
 494     check_taint_not  $2, "\t\$2";
 495
 496     /(\s)/;
 497     check_taint_not  $&, "\$& from /(\\s)/";
 498     check_taint_not  $`, "\t\$`";
 499     check_taint_not  $', "\t\$'";
 500     check_taint_not  $+, "\t\$+";
 501     check_taint_not  $1, "\t\$1";
 502     check_taint_not  $2, "\t\$2";
 503
 504     /(.)/;      # untaint $&, $`, $', $+, $1.
 505     check_taint_not  $&, "\$& from /(.)/";
 506
 507     /(\S)/;
 508     check_taint_not  $&, "\$& from /(\\S)/";
 509     check_taint_not  $`, "\t\$`";
 510     check_taint_not  $', "\t\$'";
 511     check_taint_not  $+, "\t\$+";
 512     check_taint_not  $1, "\t\$1";
 513     check_taint_not  $2, "\t\$2";
 514
 515     /(.)/;      # untaint $&, $`, $', $+, $1.
 516     check_taint_not  $&, "\$& from /(.)/";
 517
 518     "0" =~ /(\d)/;
 519     check_taint_not  $&, "\$& from /(\\d)/";
 520     check_taint_not  $`, "\t\$`";
 521     check_taint_not  $', "\t\$'";
 522     check_taint_not  $+, "\t\$+";
 523     check_taint_not  $1, "\t\$1";
 524     check_taint_not  $2, "\t\$2";
 525
 526     /(.)/;      # untaint $&, $`, $', $+, $1.
 527     check_taint_not  $&, "\$& from /(.)/";
 528
 529     /(\D)/;
 530     check_taint_not  $&, "\$& from /(\\D)/";
 531     check_taint_not  $`, "\t\$`";
 532     check_taint_not  $', "\t\$'";
 533     check_taint_not  $+, "\t\$+";
 534     check_taint_not  $1, "\t\$1";
 535     check_taint_not  $2, "\t\$2";
 536
 537     /(.)/;      # untaint $&, $`, $', $+, $1.
 538     check_taint_not  $&, "\$& from /(.)/";
 539
 540     /([[:alnum:]])/;
 541     check_taint_not  $&, "\$& from /([[:alnum:]])/";
 542     check_taint_not  $`, "\t\$`";
 543     check_taint_not  $', "\t\$'";
 544     check_taint_not  $+, "\t\$+";
 545     check_taint_not  $1, "\t\$1";
 546     check_taint_not  $2, "\t\$2";
 547
 548     /(.)/;      # untaint $&, $`, $', $+, $1.
 549     check_taint_not  $&, "\$& from /(.)/";
 550
 551     /([[:^alnum:]])/;
 552     check_taint_not  $&, "\$& from /([[:^alnum:]])/";
 553     check_taint_not  $`, "\t\$`";
 554     check_taint_not  $', "\t\$'";
 555     check_taint_not  $+, "\t\$+";
 556     check_taint_not  $1, "\t\$1";
 557     check_taint_not  $2, "\t\$2";
 558
 559     "a" =~ /(a)|(\w)/;
 560     check_taint_not  $&, "\$& from /(a)|(\\w)/";
 561     check_taint_not  $`, "\t\$`";
 562     check_taint_not  $', "\t\$'";
 563     check_taint_not  $+, "\t\$+";
 564     check_taint_not  $1, "\t\$1";
 565     ok($1 eq 'a', ("\t" x 5) . "\$1 is 'a'");
 566     ok(! defined $2, ("\t" x 5) . "\$2 is undefined");
 567     check_taint_not  $2, "\t\$2";
 568     check_taint_not  $3, "\t\$3";
 569
 570     /(.)/;      # untaint $&, $`, $', $+, $1.
 571     check_taint_not  $&, "\$& from /(.)/";
 572
 573     "\N{CYRILLIC SMALL LETTER A}" =~ /(\N{CYRILLIC CAPITAL LETTER A})/i;
 574     check_taint_not      $&, "\$& from /(\\N{CYRILLIC CAPITAL LETTER A})/i";
 575     check_taint_not      $`, "\t\$`";
 576     check_taint_not      $', "\t\$'";
 577     check_taint_not      $+, "\t\$+";
 578     check_taint_not      $1, "\t\$1";
 579     ok($1 eq "\N{CYRILLIC SMALL LETTER A}", ("\t" x 4) . "\t\$1 is 'small cyrillic a'");
 580     check_taint_not      $2, "\t\$2";
 581
 582     /(.)/;      # untaint $&, $`, $', $+, $1.
 583     check_taint_not  $&, "\$& from /./";
 584
 585     "(\N{KELVIN SIGN})" =~ /(\N{KELVIN SIGN})/i;
 586     check_taint_not  $&, "\$& from /(\\N{KELVIN SIGN})/i";
 587     check_taint_not  $`, "\t\$`";
 588     check_taint_not  $', "\t\$'";
 589     check_taint_not  $+, "\t\$+";
 590     check_taint_not  $1, "\t\$1";
 591     check_taint_not      $2, "\t\$2";
 592
 593     /(.)/;      # untaint $&, $`, $', $+, $1.
 594     check_taint_not  $&, "\$& from /(.)/";
 595
 596     "a:" =~ /(.)\b(.)/;
 597     check_taint_not  $&, "\$& from /(.)\\b(.)/";
 598     check_taint_not  $`, "\t\$`";
 599     check_taint_not  $', "\t\$'";
 600     check_taint_not  $+, "\t\$+";
 601     check_taint_not  $1, "\t\$1";
 602     check_taint_not  $2, "\t\$2";
 603     check_taint_not  $3, "\t\$3";
 604
 605     /(.)/;      # untaint $&, $`, $', $+, $1.
 606     check_taint_not  $&, "\$& from /./";
 607
 608     "aa" =~ /(.)\B(.)/;
 609     check_taint_not  $&, "\$& from /(.)\\B(.)/";
 610     check_taint_not  $`, "\t\$`";
 611     check_taint_not  $', "\t\$'";
 612     check_taint_not  $+, "\t\$+";
 613     check_taint_not  $1, "\t\$1";
 614     check_taint_not  $2, "\t\$2";
 615     check_taint_not  $3, "\t\$3";
 616
 617     /(.)/;      # untaint $&, $`, $', $+, $1.
 618     check_taint_not  $&, "\$& from /./";
 619
 620     "aaa" =~ /(.).(\1)/i;       # notaint because not locale dependent
 621     check_taint_not      $&, "\$ & from /(.).(\\1)/";
 622     check_taint_not      $`, "\t\$`";
 623     check_taint_not      $', "\t\$'";
 624     check_taint_not      $+, "\t\$+";
 625     check_taint_not      $1, "\t\$1";
 626     check_taint_not      $2, "\t\$2";
 627     check_taint_not      $3, "\t\$3";
 628
 629     /(.)/;      # untaint $&, $`, $', $+, $1.
 630     check_taint_not  $&, "\$ & from /./";
 631
 632     $_ = $a;    # untaint $_
 633
 634     check_taint_not  $_, 'untainting $_ works';
 635
 636     /(b)/;
 637     check_taint_not  $&, "\$ & from /(b)/";
 638     check_taint_not  $`, "\t\$`";
 639     check_taint_not  $', "\t\$'";
 640     check_taint_not  $+, "\t\$+";
 641     check_taint_not  $1, "\t\$1";
 642     check_taint_not  $2, "\t\$2";
 643
 644     $_ = $a;    # untaint $_
 645
 646     check_taint_not  $_, 'untainting $_ works';
 647
 648     s/(.+)/b/;
 649     check_taint_not  $_, '$_ (wasn\'t tainted) from s/(.+)/b/';
 650     check_taint_not  $&, "\t\$&";
 651     check_taint_not  $`, "\t\$`";
 652     check_taint_not  $', "\t\$'";
 653     check_taint_not  $+, "\t\$+";
 654     check_taint_not  $1, "\t\$1";
 655     check_taint_not  $2, "\t\$2";
 656
 657     $b = $a;    # untaint $b
 658
 659     ($b = $a) =~ s/\w/$&/;
 660     check_taint_not     $b, '$b from ($b = $a) =~ s/\w/$&/';
 661     check_taint_not  $a, '$a from ($b = $a) =~ s/\w/$&/';
 662
 663     $_ = $a;    # untaint $_
 664
 665     s/(\w)/\l$1/;
 666     check_taint_not     $_, '$_ (wasn\'t tainted) from s/(\w)/\l$1/,';  # this must taint
 667     check_taint_not     $&, "\t\$&";
 668     check_taint_not     $`, "\t\$`";
 669     check_taint_not     $', "\t\$'";
 670     check_taint_not     $+, "\t\$+";
 671     check_taint_not     $1, "\t\$1";
 672     check_taint_not  $2, "\t\$2";
 673
 674     $_ = $a;    # untaint $_
 675
 676     s/(\w)/\L$1/;
 677     check_taint_not     $_, '$_ (wasn\'t tainted) from s/(\w)/\L$1/,';
 678     check_taint_not     $&, "\t\$&";
 679     check_taint_not     $`, "\t\$`";
 680     check_taint_not     $', "\t\$'";
 681     check_taint_not     $+, "\t\$+";
 682     check_taint_not     $1, "\t\$1";
 683     check_taint_not  $2, "\t\$2";
 684
 685     $_ = $a;    # untaint $_
 686
 687     s/(\w)/\u$1/;
 688     check_taint_not     $_, '$_ (wasn\'t tainted) from s/(\w)/\u$1/';
 689     check_taint_not     $&, "\t\$&";
 690     check_taint_not     $`, "\t\$`";
 691     check_taint_not     $', "\t\$'";
 692     check_taint_not     $+, "\t\$+";
 693     check_taint_not     $1, "\t\$1";
 694     check_taint_not  $2, "\t\$2";
 695
 696     $_ = $a;    # untaint $_
 697
 698     s/(\w)/\U$1/;
 699     check_taint_not     $_, '$_ (wasn\'t tainted) from s/(\w)/\U$1/';
 700     check_taint_not     $&, "\t\$&";
 701     check_taint_not     $`, "\t\$`";
 702     check_taint_not     $', "\t\$'";
 703     check_taint_not     $+, "\t\$+";
 704     check_taint_not     $1, "\t\$1";
 705     check_taint_not  $2, "\t\$2";
 706
 707     # After all this tainting $a should be cool.
 708
 709     check_taint_not  $a, '$a still not tainted';
 710
 711     "a" =~ /([a-z])/;
 712     check_taint_not $1, '"a" =~ /([a-z])/';
 713     "foo.bar_baz" =~ /^(.*)[._](.*?)$/;  # Bug 120675
 714     check_taint_not $1, '"foo.bar_baz" =~ /^(.*)[._](.*?)$/';
 715
 716 }
 717
 718 # Here are in scope of 'use locale'
 719
 720 # I think we've seen quite enough of taint.
 721 # Let us do some *real* locale work now,
 722 # unless setlocale() is missing (i.e. minitest).
 723
 724 # The test number before our first setlocale()
 725 my $final_without_setlocale = $test_num;
 726
 727 # Find locales.
 728
 729 debug "Scanning for locales...\n";
 730
 731 require POSIX; import POSIX ':locale_h';
 732
 733 my @Locale = find_locales([ &POSIX::LC_CTYPE, &POSIX::LC_NUMERIC, &POSIX::LC_ALL ]);
 734
 735 debug "Locales =\n";
 736 for ( @Locale ) {
 737     debug "$_\n";
 738 }
 739
 740 unless (@Locale) {
 741     print "1..$test_num\n";
 742     exit;
 743 }
 744
 745
 746 setlocale(&POSIX::LC_ALL, "C");
 747
 748 my %posixes;
 749
 750 my %Problem;
 751 my %Okay;
 752 my %Known_bad_locale;   # Failed test for a locale known to be bad
 753 my %Testing;
 754 my @Added_alpha;   # Alphas that aren't in the C locale.
 755 my %test_names;
 756
 757 sub disp_chars {
 758     # This returns a display string denoting the input parameter @_, each
 759     # entry of which is a single character in the range 0-255.  The first part
 760     # of the output is a string of the characters in @_ that are ASCII
 761     # graphics, and hence unambiguously displayable.  They are given by code
 762     # point order.  The second part is the remaining code points, the ordinals
 763     # of which are each displayed as 2-digit hex.  Blanks are inserted so as
 764     # to keep anything from the first part looking like a 2-digit hex number.
 765
 766     no locale;
 767     my @chars = sort { ord $a <=> ord $b } @_;
 768     my $output = "";
 769     my $range_start;
 770     my $start_class;
 771     push @chars, chr(258);  # This sentinel simplifies the loop termination
 772                             # logic
 773     foreach my $i (0 .. @chars - 1) {
 774         my $char = $chars[$i];
 775         my $range_end;
 776         my $class;
 777
 778         # We avoid using [:posix:] classes, as these are being tested in this
 779         # file.  Each equivalence class below is for things that can appear in
 780         # a range; those that can't be in a range have class -1.  0 for those
 781         # which should be output in hex; and >0 for the other ranges
 782         if ($char =~ /[A-Z]/) {
 783             $class = 2;
 784         }
 785         elsif ($char =~ /[a-z]/) {
 786             $class = 3;
 787         }
 788         elsif ($char =~ /[0-9]/) {
 789             $class = 4;
 790         }
 791         # Uncomment to get literal punctuation displayed instead of hex
 792         #elsif ($char =~ /[[\]!"#\$\%&\'()*+,.\/:\\;<=>?\@\^_`{|}~-]/) {
 793         #    $class = -1;    # Punct never appears in a range
 794         #}
 795         else {
 796             $class = 0;     # Output in hex
 797         }
 798
 799         if (! defined $range_start) {
 800             if ($class < 0) {
 801                 $output .= " " . $char;
 802             }
 803             else {
 804                 $range_start = ord $char;
 805                 $start_class = $class;
 806             }
 807         } # A range ends if not consecutive, or the class-type changes
 808         elsif (ord $char != ($range_end = ord($chars[$i-1])) + 1
 809               || $class != $start_class)
 810         {
 811
 812             # Here, the current character is not in the range.  This means the
 813             # previous character must have been.  Output the range up through
 814             # that one.
 815             my $range_length = $range_end - $range_start + 1;
 816             if ($start_class > 0) {
 817                 $output .= " " . chr($range_start);
 818                 $output .= "-" . chr($range_end) if $range_length > 1;
 819             }
 820             else {
 821                 $output .= sprintf(" %02X", $range_start);
 822                 $output .= sprintf("-%02X", $range_end) if $range_length > 1;
 823             }
 824
 825             # Handle the new current character, as potentially beginning a new
 826             # range
 827             undef $range_start;
 828             redo;
 829         }
 830     }
 831
 832     $output =~ s/^ //;
 833     return $output;
 834 }
 835
 836 sub disp_str ($) {
 837     my $string = shift;
 838
 839     # Displays the string unambiguously.  ASCII printables are always output
 840     # as-is, though perhaps separated by blanks from other characters.  If
 841     # entirely printable ASCII, just returns the string.  Otherwise if valid
 842     # UTF-8 it uses the character names for non-printable-ASCII.  Otherwise it
 843     # outputs hex for each non-ASCII-printable byte.
 844
 845     return $string if $string =~ / ^ [[:print:]]* $/xa;
 846
 847     my $result = "";
 848     my $prev_was_punct = 1; # Beginning is considered punct
 849     if (utf8::valid($string) && utf8::is_utf8($string)) {
 850         use charnames ();
 851         foreach my $char (split "", $string) {
 852
 853             # Keep punctuation adjacent to other characters; otherwise
 854             # separate them with a blank
 855             if ($char =~ /[[:punct:]]/a) {
 856                 $result .= $char;
 857                 $prev_was_punct = 1;
 858             }
 859             elsif ($char =~ /[[:print:]]/a) {
 860                 $result .= "  " unless $prev_was_punct;
 861                 $result .= $char;
 862                 $prev_was_punct = 0;
 863             }
 864             else {
 865                 $result .= "  " unless $prev_was_punct;
 866                 my $name = charnames::viacode(ord $char);
 867                 $result .= (defined $name) ? $name : ':unknown:';
 868                 $prev_was_punct = 0;
 869             }
 870         }
 871     }
 872     else {
 873         use bytes;
 874         foreach my $char (split "", $string) {
 875             if ($char =~ /[[:punct:]]/a) {
 876                 $result .= $char;
 877                 $prev_was_punct = 1;
 878             }
 879             elsif ($char =~ /[[:print:]]/a) {
 880                 $result .= " " unless $prev_was_punct;
 881                 $result .= $char;
 882                 $prev_was_punct = 0;
 883             }
 884             else {
 885                 $result .= " " unless $prev_was_punct;
 886                 $result .= sprintf("%02X", ord $char);
 887                 $prev_was_punct = 0;
 888             }
 889         }
 890     }
 891
 892     return $result;
 893 }
 894
 895 sub report_result {
 896     my ($Locale, $i, $pass_fail, $message) = @_;
 897     $message //= "";
 898     $message = "  ($message)" if $message;
 899     if ($pass_fail) {
 900         push @{$Okay{$i}}, $Locale;
 901     }
 902     else {
 903         $Known_bad_locale{$i}{$Locale} = 1 if exists $known_bad_locales{$^O}
 904                                          && $Locale =~ $known_bad_locales{$^O};
 905         $Problem{$i}{$Locale} = 1;
 906         debug "failed $i ($test_names{$i}) with locale '$Locale'$message\n";
 907     }
 908 }
 909
 910 sub report_multi_result {
 911     my ($Locale, $i, $results_ref) = @_;
 912
 913     # $results_ref points to an array, each element of which is a character that was
 914     # in error for this test numbered '$i'.  If empty, the test passed
 915
 916     my $message = "";
 917     if (@$results_ref) {
 918         $message = join " ", "for", disp_chars(@$results_ref);
 919     }
 920     report_result($Locale, $i, @$results_ref == 0, $message);
 921 }
 922
 923 my $first_locales_test_number = $final_without_setlocale + 1;
 924 my $locales_test_number;
 925 my $not_necessarily_a_problem_test_number;
 926 my $first_casing_test_number;
 927 my %setlocale_failed;   # List of locales that setlocale() didn't work on
 928
 929 foreach my $Locale (@Locale) {
 930     $locales_test_number = $first_locales_test_number - 1;
 931     debug "\n";
 932     debug "Locale = $Locale\n";
 933
 934     unless (setlocale(&POSIX::LC_ALL, $Locale)) {
 935         $setlocale_failed{$Locale} = $Locale;
 936         next;
 937     }
 938
 939     # We test UTF-8 locales only under ':not_characters';  It is easier to
 940     # test them in other test files than here.  Non- UTF-8 locales are tested
 941     # only under plain 'use locale', as otherwise we would have to convert
 942     # everything in them to Unicode.
 943
 944     my %UPPER = ();     # All alpha X for which uc(X) == X and lc(X) != X
 945     my %lower = ();     # All alpha X for which lc(X) == X and uc(X) != X
 946     my %BoThCaSe = ();  # All alpha X for which uc(X) == lc(X) == X
 947
 948     my $is_utf8_locale = is_locale_utf8($Locale);
 949
 950     debug "is utf8 locale? = $is_utf8_locale\n";
 951
 952     debug "radix = " . disp_str(localeconv()->{decimal_point}) . "\n";
 953
 954     if (! $is_utf8_locale) {
 955         use locale;
 956         @{$posixes{'word'}} = grep /\w/, map { chr } 0..255;
 957         @{$posixes{'digit'}} = grep /\d/, map { chr } 0..255;
 958         @{$posixes{'space'}} = grep /\s/, map { chr } 0..255;
 959         @{$posixes{'alpha'}} = grep /[[:alpha:]]/, map {chr } 0..255;
 960         @{$posixes{'alnum'}} = grep /[[:alnum:]]/, map {chr } 0..255;
 961         @{$posixes{'ascii'}} = grep /[[:ascii:]]/, map {chr } 0..255;
 962         @{$posixes{'blank'}} = grep /[[:blank:]]/, map {chr } 0..255;
 963         @{$posixes{'cntrl'}} = grep /[[:cntrl:]]/, map {chr } 0..255;
 964         @{$posixes{'graph'}} = grep /[[:graph:]]/, map {chr } 0..255;
 965         @{$posixes{'lower'}} = grep /[[:lower:]]/, map {chr } 0..255;
 966         @{$posixes{'print'}} = grep /[[:print:]]/, map {chr } 0..255;
 967         @{$posixes{'punct'}} = grep /[[:punct:]]/, map {chr } 0..255;
 968         @{$posixes{'upper'}} = grep /[[:upper:]]/, map {chr } 0..255;
 969         @{$posixes{'xdigit'}} = grep /[[:xdigit:]]/, map {chr } 0..255;
 970         @{$posixes{'cased'}} = grep /[[:upper:]]/i, map {chr } 0..255;
 971
 972         # Sieve the uppercase and the lowercase.
 973
 974         for (@{$posixes{'word'}}) {
 975             if (/[^\d_]/) { # skip digits and the _
 976                 if (uc($_) eq $_) {
 977                     $UPPER{$_} = $_;
 978                 }
 979                 if (lc($_) eq $_) {
 980                     $lower{$_} = $_;
 981                 }
 982             }
 983         }
 984     }
 985     else {
 986         use locale ':not_characters';
 987         @{$posixes{'word'}} = grep /\w/, map { chr } 0..255;
 988         @{$posixes{'digit'}} = grep /\d/, map { chr } 0..255;
 989         @{$posixes{'space'}} = grep /\s/, map { chr } 0..255;
 990         @{$posixes{'alpha'}} = grep /[[:alpha:]]/, map {chr } 0..255;
 991         @{$posixes{'alnum'}} = grep /[[:alnum:]]/, map {chr } 0..255;
 992         @{$posixes{'ascii'}} = grep /[[:ascii:]]/, map {chr } 0..255;
 993         @{$posixes{'blank'}} = grep /[[:blank:]]/, map {chr } 0..255;
 994         @{$posixes{'cntrl'}} = grep /[[:cntrl:]]/, map {chr } 0..255;
 995         @{$posixes{'graph'}} = grep /[[:graph:]]/, map {chr } 0..255;
 996         @{$posixes{'lower'}} = grep /[[:lower:]]/, map {chr } 0..255;
 997         @{$posixes{'print'}} = grep /[[:print:]]/, map {chr } 0..255;
 998         @{$posixes{'punct'}} = grep /[[:punct:]]/, map {chr } 0..255;
 999         @{$posixes{'upper'}} = grep /[[:upper:]]/, map {chr } 0..255;
1000         @{$posixes{'xdigit'}} = grep /[[:xdigit:]]/, map {chr } 0..255;
1001         @{$posixes{'cased'}} = grep /[[:upper:]]/i, map {chr } 0..255;
1002         for (@{$posixes{'word'}}) {
1003             if (/[^\d_]/) { # skip digits and the _
1004                 if (uc($_) eq $_) {
1005                     $UPPER{$_} = $_;
1006                 }
1007                 if (lc($_) eq $_) {
1008                     $lower{$_} = $_;
1009                 }
1010             }
1011         }
1012     }
1013
1014     # Ordered, where possible,  in groups of "this is a subset of the next
1015     # one"
1016     debug ":upper:  = ", disp_chars(@{$posixes{'upper'}}), "\n";
1017     debug ":lower:  = ", disp_chars(@{$posixes{'lower'}}), "\n";
1018     debug ":cased:  = ", disp_chars(@{$posixes{'cased'}}), "\n";
1019     debug ":alpha:  = ", disp_chars(@{$posixes{'alpha'}}), "\n";
1020     debug ":alnum:  = ", disp_chars(@{$posixes{'alnum'}}), "\n";
1021     debug " w       = ", disp_chars(@{$posixes{'word'}}), "\n";
1022     debug ":graph:  = ", disp_chars(@{$posixes{'graph'}}), "\n";
1023     debug ":print:  = ", disp_chars(@{$posixes{'print'}}), "\n";
1024     debug " d       = ", disp_chars(@{$posixes{'digit'}}), "\n";
1025     debug ":xdigit: = ", disp_chars(@{$posixes{'xdigit'}}), "\n";
1026     debug ":blank:  = ", disp_chars(@{$posixes{'blank'}}), "\n";
1027     debug " s       = ", disp_chars(@{$posixes{'space'}}), "\n";
1028     debug ":punct:  = ", disp_chars(@{$posixes{'punct'}}), "\n";
1029     debug ":cntrl:  = ", disp_chars(@{$posixes{'cntrl'}}), "\n";
1030     debug ":ascii:  = ", disp_chars(@{$posixes{'ascii'}}), "\n";
1031
1032     foreach (keys %UPPER) {
1033
1034         $BoThCaSe{$_}++ if exists $lower{$_};
1035     }
1036     foreach (keys %lower) {
1037         $BoThCaSe{$_}++ if exists $UPPER{$_};
1038     }
1039     foreach (keys %BoThCaSe) {
1040         delete $UPPER{$_};
1041         delete $lower{$_};
1042     }
1043
1044     my %Unassigned;
1045     foreach my $ord ( 0 .. 255 ) {
1046         $Unassigned{chr $ord} = 1;
1047     }
1048     foreach my $class (keys %posixes) {
1049         foreach my $char (@{$posixes{$class}}) {
1050             delete $Unassigned{$char};
1051         }
1052     }
1053
1054     debug "UPPER    = ", disp_chars(sort { ord $a <=> ord $b } keys %UPPER), "\n";
1055     debug "lower    = ", disp_chars(sort { ord $a <=> ord $b } keys %lower), "\n";
1056     debug "BoThCaSe = ", disp_chars(sort { ord $a <=> ord $b } keys %BoThCaSe), "\n";
1057     debug "Unassigned = ", disp_chars(sort { ord $a <=> ord $b } keys %Unassigned), "\n";
1058
1059     my @failures;
1060     my @fold_failures;
1061     foreach my $x (sort { ord $a <=> ord $b } keys %UPPER) {
1062         my $ok;
1063         my $fold_ok;
1064         if ($is_utf8_locale) {
1065             use locale ':not_characters';
1066             $ok = $x =~ /[[:upper:]]/;
1067             $fold_ok = $x =~ /[[:lower:]]/i;
1068         }
1069         else {
1070             use locale;
1071             $ok = $x =~ /[[:upper:]]/;
1072             $fold_ok = $x =~ /[[:lower:]]/i;
1073         }
1074         push @failures, $x unless $ok;
1075         push @fold_failures, $x unless $fold_ok;
1076     }
1077     $locales_test_number++;
1078     $first_casing_test_number = $locales_test_number;
1079     $test_names{$locales_test_number} = 'Verify that /[[:upper:]]/ matches all alpha X for which uc(X) == X and lc(X) != X';
1080     report_multi_result($Locale, $locales_test_number, \@failures);
1081
1082     $locales_test_number++;
1083
1084     $test_names{$locales_test_number} = 'Verify that /[[:lower:]]/i matches all alpha X for which uc(X) == X and lc(X) != X';
1085     report_multi_result($Locale, $locales_test_number, \@fold_failures);
1086
1087     undef @failures;
1088     undef @fold_failures;
1089
1090     foreach my $x (sort { ord $a <=> ord $b } keys %lower) {
1091         my $ok;
1092         my $fold_ok;
1093         if ($is_utf8_locale) {
1094             use locale ':not_characters';
1095             $ok = $x =~ /[[:lower:]]/;
1096             $fold_ok = $x =~ /[[:upper:]]/i;
1097         }
1098         else {
1099             use locale;
1100             $ok = $x =~ /[[:lower:]]/;
1101             $fold_ok = $x =~ /[[:upper:]]/i;
1102         }
1103         push @failures, $x unless $ok;
1104         push @fold_failures, $x unless $fold_ok;
1105     }
1106
1107     $locales_test_number++;
1108     $test_names{$locales_test_number} = 'Verify that /[[:lower:]]/ matches all alpha X for which lc(X) == X and uc(X) != X';
1109     report_multi_result($Locale, $locales_test_number, \@failures);
1110
1111     $locales_test_number++;
1112     $test_names{$locales_test_number} = 'Verify that /[[:upper:]]/i matches all alpha X for which lc(X) == X and uc(X) != X';
1113     report_multi_result($Locale, $locales_test_number, \@fold_failures);
1114
1115     {   # Find the alphabetic characters that are not considered alphabetics
1116         # in the default (C) locale.
1117
1118         no locale;
1119
1120         @Added_alpha = ();
1121         for (keys %UPPER, keys %lower, keys %BoThCaSe) {
1122             push(@Added_alpha, $_) if (/\W/);
1123         }
1124     }
1125
1126     @Added_alpha = sort { ord $a <=> ord $b } @Added_alpha;
1127
1128     debug "Added_alpha = ", disp_chars(@Added_alpha), "\n";
1129
1130     # Cross-check the whole 8-bit character set.
1131
1132     ++$locales_test_number;
1133     my @f;
1134     $test_names{$locales_test_number} = 'Verify that \w and [:word:] are identical';
1135     for (map { chr } 0..255) {
1136         if ($is_utf8_locale) {
1137             use locale ':not_characters';
1138             push @f, $_ unless /[[:word:]]/ == /\w/;
1139         }
1140         else {
1141             push @f, $_ unless /[[:word:]]/ == /\w/;
1142         }
1143     }
1144     report_multi_result($Locale, $locales_test_number, \@f);
1145
1146     ++$locales_test_number;
1147     undef @f;
1148     $test_names{$locales_test_number} = 'Verify that \d and [:digit:] are identical';
1149     for (map { chr } 0..255) {
1150         if ($is_utf8_locale) {
1151             use locale ':not_characters';
1152             push @f, $_ unless /[[:digit:]]/ == /\d/;
1153         }
1154         else {
1155             push @f, $_ unless /[[:digit:]]/ == /\d/;
1156         }
1157     }
1158     report_multi_result($Locale, $locales_test_number, \@f);
1159
1160     ++$locales_test_number;
1161     undef @f;
1162     $test_names{$locales_test_number} = 'Verify that \s and [:space:] are identical';
1163     for (map { chr } 0..255) {
1164         if ($is_utf8_locale) {
1165             use locale ':not_characters';
1166             push @f, $_ unless /[[:space:]]/ == /\s/;
1167         }
1168         else {
1169             push @f, $_ unless /[[:space:]]/ == /\s/;
1170         }
1171     }
1172     report_multi_result($Locale, $locales_test_number, \@f);
1173
1174     ++$locales_test_number;
1175     undef @f;
1176     $test_names{$locales_test_number} = 'Verify that [:posix:] and [:^posix:] are mutually exclusive';
1177     for (map { chr } 0..255) {
1178         if ($is_utf8_locale) {
1179             use locale ':not_characters';
1180             push @f, $_ unless   (/[[:alpha:]]/ xor /[[:^alpha:]]/)   ||
1181                     (/[[:alnum:]]/ xor /[[:^alnum:]]/)   ||
1182                     (/[[:ascii:]]/ xor /[[:^ascii:]]/)   ||
1183                     (/[[:blank:]]/ xor /[[:^blank:]]/)   ||
1184                     (/[[:cntrl:]]/ xor /[[:^cntrl:]]/)   ||
1185                     (/[[:digit:]]/ xor /[[:^digit:]]/)   ||
1186                     (/[[:graph:]]/ xor /[[:^graph:]]/)   ||
1187                     (/[[:lower:]]/ xor /[[:^lower:]]/)   ||
1188                     (/[[:print:]]/ xor /[[:^print:]]/)   ||
1189                     (/[[:space:]]/ xor /[[:^space:]]/)   ||
1190                     (/[[:upper:]]/ xor /[[:^upper:]]/)   ||
1191                     (/[[:word:]]/  xor /[[:^word:]]/)    ||
1192                     (/[[:xdigit:]]/ xor /[[:^xdigit:]]/) ||
1193
1194                     # effectively is what [:cased:] would be if it existed.
1195                     (/[[:upper:]]/i xor /[[:^upper:]]/i);
1196         }
1197         else {
1198             push @f, $_ unless   (/[[:alpha:]]/ xor /[[:^alpha:]]/)   ||
1199                     (/[[:alnum:]]/ xor /[[:^alnum:]]/)   ||
1200                     (/[[:ascii:]]/ xor /[[:^ascii:]]/)   ||
1201                     (/[[:blank:]]/ xor /[[:^blank:]]/)   ||
1202                     (/[[:cntrl:]]/ xor /[[:^cntrl:]]/)   ||
1203                     (/[[:digit:]]/ xor /[[:^digit:]]/)   ||
1204                     (/[[:graph:]]/ xor /[[:^graph:]]/)   ||
1205                     (/[[:lower:]]/ xor /[[:^lower:]]/)   ||
1206                     (/[[:print:]]/ xor /[[:^print:]]/)   ||
1207                     (/[[:space:]]/ xor /[[:^space:]]/)   ||
1208                     (/[[:upper:]]/ xor /[[:^upper:]]/)   ||
1209                     (/[[:word:]]/  xor /[[:^word:]]/)    ||
1210                     (/[[:xdigit:]]/ xor /[[:^xdigit:]]/) ||
1211                     (/[[:upper:]]/i xor /[[:^upper:]]/i);
1212         }
1213     }
1214     report_multi_result($Locale, $locales_test_number, \@f);
1215
1216     # The rules for the relationships are given in:
1217     # http://www.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap07.html
1218
1219
1220     ++$locales_test_number;
1221     undef @f;
1222     $test_names{$locales_test_number} = 'Verify that [:lower:] contains at least a-z';
1223     for ('a' .. 'z') {
1224         if ($is_utf8_locale) {
1225             use locale ':not_characters';
1226             push @f, $_  unless /[[:lower:]]/;
1227         }
1228         else {
1229             push @f, $_  unless /[[:lower:]]/;
1230         }
1231     }
1232     report_multi_result($Locale, $locales_test_number, \@f);
1233
1234     ++$locales_test_number;
1235     undef @f;
1236     $test_names{$locales_test_number} = 'Verify that [:lower:] is a subset of [:alpha:]';
1237     for (map { chr } 0..255) {
1238         if ($is_utf8_locale) {
1239             use locale ':not_characters';
1240             push @f, $_  if /[[:lower:]]/ and ! /[[:alpha:]]/;
1241         }
1242         else {
1243             push @f, $_  if /[[:lower:]]/ and ! /[[:alpha:]]/;
1244         }
1245     }
1246     report_multi_result($Locale, $locales_test_number, \@f);
1247
1248     ++$locales_test_number;
1249     undef @f;
1250     $test_names{$locales_test_number} = 'Verify that [:upper:] contains at least A-Z';
1251     for ('A' .. 'Z') {
1252         if ($is_utf8_locale) {
1253             use locale ':not_characters';
1254             push @f, $_  unless /[[:upper:]]/;
1255         }
1256         else {
1257             push @f, $_  unless /[[:upper:]]/;
1258         }
1259     }
1260     report_multi_result($Locale, $locales_test_number, \@f);
1261
1262     ++$locales_test_number;
1263     undef @f;
1264     $test_names{$locales_test_number} = 'Verify that [:upper:] is a subset of [:alpha:]';
1265     for (map { chr } 0..255) {
1266         if ($is_utf8_locale) {
1267             use locale ':not_characters';
1268             push @f, $_  if /[[:upper:]]/ and ! /[[:alpha:]]/;
1269         }
1270         else {
1271             push @f, $_ if /[[:upper:]]/  and ! /[[:alpha:]]/;
1272         }
1273     }
1274     report_multi_result($Locale, $locales_test_number, \@f);
1275
1276     ++$locales_test_number;
1277     undef @f;
1278     $test_names{$locales_test_number} = 'Verify that /[[:lower:]]/i is a subset of [:alpha:]';
1279     for (map { chr } 0..255) {
1280         if ($is_utf8_locale) {
1281             use locale ':not_characters';
1282             push @f, $_ if /[[:lower:]]/i  and ! /[[:alpha:]]/;
1283         }
1284         else {
1285             push @f, $_ if /[[:lower:]]/i  and ! /[[:alpha:]]/;
1286         }
1287     }
1288     report_multi_result($Locale, $locales_test_number, \@f);
1289
1290     ++$locales_test_number;
1291     undef @f;
1292     $test_names{$locales_test_number} = 'Verify that [:alpha:] is a subset of [:alnum:]';
1293     for (map { chr } 0..255) {
1294         if ($is_utf8_locale) {
1295             use locale ':not_characters';
1296             push @f, $_ if /[[:alpha:]]/  and ! /[[:alnum:]]/;
1297         }
1298         else {
1299             push @f, $_ if /[[:alpha:]]/  and ! /[[:alnum:]]/;
1300         }
1301     }
1302     report_multi_result($Locale, $locales_test_number, \@f);
1303
1304     ++$locales_test_number;
1305     undef @f;
1306     $test_names{$locales_test_number} = 'Verify that [:digit:] contains at least 0-9';
1307     for ('0' .. '9') {
1308         if ($is_utf8_locale) {
1309             use locale ':not_characters';
1310             push @f, $_  unless /[[:digit:]]/;
1311         }
1312         else {
1313             push @f, $_  unless /[[:digit:]]/;
1314         }
1315     }
1316     report_multi_result($Locale, $locales_test_number, \@f);
1317
1318     ++$locales_test_number;
1319     undef @f;
1320     $test_names{$locales_test_number} = 'Verify that [:digit:] is a subset of [:alnum:]';
1321     for (map { chr } 0..255) {
1322         if ($is_utf8_locale) {
1323             use locale ':not_characters';
1324             push @f, $_ if /[[:digit:]]/  and ! /[[:alnum:]]/;
1325         }
1326         else {
1327             push @f, $_ if /[[:digit:]]/  and ! /[[:alnum:]]/;
1328         }
1329     }
1330     report_multi_result($Locale, $locales_test_number, \@f);
1331
1332     ++$locales_test_number;
1333     undef @f;
1334     $test_names{$locales_test_number} = 'Verify that [:digit:] matches either 10 or 20 code points';
1335     report_result($Locale, $locales_test_number, @{$posixes{'digit'}} == 10 || @{$posixes{'digit'}} == 20);
1336
1337     ++$locales_test_number;
1338     undef @f;
1339     $test_names{$locales_test_number} = 'Verify that if there is a second set of digits in [:digit:], they are consecutive';
1340     if (@{$posixes{'digit'}} == 20) {
1341         my $previous_ord;
1342         for (map { chr } 0..255) {
1343             next unless /[[:digit:]]/;
1344             next if /[0-9]/;
1345             if (defined $previous_ord) {
1346                 if ($is_utf8_locale) {
1347                     use locale ':not_characters';
1348                     push @f, $_ if ord $_ != $previous_ord + 1;
1349                 }
1350                 else {
1351                     push @f, $_ if ord $_ != $previous_ord + 1;
1352                 }
1353             }
1354             $previous_ord = ord $_;
1355         }
1356     }
1357     report_multi_result($Locale, $locales_test_number, \@f);
1358
1359     ++$locales_test_number;
1360     undef @f;
1361     my @xdigit_digits;  # :digit: & :xdigit:
1362     $test_names{$locales_test_number} = 'Verify that [:xdigit:] contains one or two blocks of 10 consecutive [:digit:] chars';
1363     for (map { chr } 0..255) {
1364         if ($is_utf8_locale) {
1365             use locale ':not_characters';
1366             # For utf8 locales, we actually use a stricter test: that :digit:
1367             # is a subset of :xdigit:, as we know that only 0-9 should match
1368             push @f, $_ if /[[:digit:]]/ and ! /[[:xdigit:]]/;
1369         }
1370         else {
1371             push @xdigit_digits, $_ if /[[:digit:]]/ and /[[:xdigit:]]/;
1372         }
1373     }
1374     if (! $is_utf8_locale) {
1375
1376         # For non-utf8 locales, @xdigit_digits is a list of the characters
1377         # that are both :xdigit: and :digit:.  Because :digit: is stored in
1378         # increasing code point order (unless the tests above failed),
1379         # @xdigit_digits is as well.  There should be exactly 10 or
1380         # 20 of these.
1381         if (@xdigit_digits != 10 && @xdigit_digits != 20) {
1382             @f = @xdigit_digits;
1383         }
1384         else {
1385
1386             # Look for contiguity in the series, adding any wrong ones to @f
1387             my @temp = @xdigit_digits;
1388             while (@temp > 1) {
1389                 push @f, $temp[1] if ($temp[0] != $temp[1] - 1)
1390
1391                                      # Skip this test for the 0th character of
1392                                      # the second block of 10, as it won't be
1393                                      # contiguous with the previous block
1394                                      && (! defined $xdigit_digits[10]
1395                                          || $temp[1] != $xdigit_digits[10]);
1396                 shift @temp;
1397             }
1398         }
1399     }
1400
1401     report_multi_result($Locale, $locales_test_number, \@f);
1402
1403     ++$locales_test_number;
1404     undef @f;
1405     $test_names{$locales_test_number} = 'Verify that [:xdigit:] contains at least A-F, a-f';
1406     for ('A' .. 'F', 'a' .. 'f') {
1407         if ($is_utf8_locale) {
1408             use locale ':not_characters';
1409             push @f, $_  unless /[[:xdigit:]]/;
1410         }
1411         else {
1412             push @f, $_  unless /[[:xdigit:]]/;
1413         }
1414     }
1415     report_multi_result($Locale, $locales_test_number, \@f);
1416
1417     ++$locales_test_number;
1418     undef @f;
1419     $test_names{$locales_test_number} = 'Verify that any additional members of [:xdigit:], are in groups of 6 consecutive code points';
1420     my $previous_ord;
1421     my $count = 0;
1422     for my $chr (map { chr } 0..255) {
1423         next unless $chr =~ /[[:xdigit:]]/;
1424         if ($is_utf8_locale) {
1425             next if $chr =~ /[[:digit:]]/;
1426         }
1427         else {
1428             next if grep { $chr eq $_ } @xdigit_digits;
1429         }
1430         next if $chr =~ /[A-Fa-f]/;
1431         if (defined $previous_ord) {
1432             if ($is_utf8_locale) {
1433                 use locale ':not_characters';
1434                 push @f, $chr if ord $chr != $previous_ord + 1;
1435             }
1436             else {
1437                 push @f, $chr if ord $chr != $previous_ord + 1;
1438             }
1439         }
1440         $count++;
1441         if ($count == 6) {
1442             undef $previous_ord;
1443         }
1444         else {
1445             $previous_ord = ord $chr;
1446         }
1447     }
1448     report_multi_result($Locale, $locales_test_number, \@f);
1449
1450     ++$locales_test_number;
1451     undef @f;
1452     $test_names{$locales_test_number} = 'Verify that [:xdigit:] is a subset of [:graph:]';
1453     for (map { chr } 0..255) {
1454         if ($is_utf8_locale) {
1455             use locale ':not_characters';
1456             push @f, $_ if /[[:xdigit:]]/  and ! /[[:graph:]]/;
1457         }
1458         else {
1459             push @f, $_ if /[[:xdigit:]]/  and ! /[[:graph:]]/;
1460         }
1461     }
1462     report_multi_result($Locale, $locales_test_number, \@f);
1463
1464     # Note that xdigit doesn't have to be a subset of alnum
1465
1466     ++$locales_test_number;
1467     undef @f;
1468     $test_names{$locales_test_number} = 'Verify that [:punct:] is a subset of [:graph:]';
1469     for (map { chr } 0..255) {
1470         if ($is_utf8_locale) {
1471             use locale ':not_characters';
1472             push @f, $_ if /[[:punct:]]/  and ! /[[:graph:]]/;
1473         }
1474         else {
1475             push @f, $_ if /[[:punct:]]/  and ! /[[:graph:]]/;
1476         }
1477     }
1478     report_multi_result($Locale, $locales_test_number, \@f);
1479
1480     ++$locales_test_number;
1481     undef @f;
1482     $test_names{$locales_test_number} = 'Verify that the space character is not in [:graph:]';
1483     if ($is_utf8_locale) {
1484         use locale ':not_characters';
1485         push @f, " " if " " =~ /[[:graph:]]/;
1486     }
1487     else {
1488         push @f, " " if " " =~ /[[:graph:]]/;
1489     }
1490     report_multi_result($Locale, $locales_test_number, \@f);
1491
1492     ++$locales_test_number;
1493     undef @f;
1494     $test_names{$locales_test_number} = 'Verify that [:space:] contains at least [\f\n\r\t\cK ]';
1495     for (' ', "\f", "\n", "\r", "\t", "\cK") {
1496         if ($is_utf8_locale) {
1497             use locale ':not_characters';
1498             push @f, $_  unless /[[:space:]]/;
1499         }
1500         else {
1501             push @f, $_  unless /[[:space:]]/;
1502         }
1503     }
1504     report_multi_result($Locale, $locales_test_number, \@f);
1505
1506     ++$locales_test_number;
1507     undef @f;
1508     $test_names{$locales_test_number} = 'Verify that [:blank:] contains at least [\t ]';
1509     for (' ', "\t") {
1510         if ($is_utf8_locale) {
1511             use locale ':not_characters';
1512             push @f, $_  unless /[[:blank:]]/;
1513         }
1514         else {
1515             push @f, $_  unless /[[:blank:]]/;
1516         }
1517     }
1518     report_multi_result($Locale, $locales_test_number, \@f);
1519
1520     ++$locales_test_number;
1521     undef @f;
1522     $test_names{$locales_test_number} = 'Verify that [:blank:] is a subset of [:space:]';
1523     for (map { chr } 0..255) {
1524         if ($is_utf8_locale) {
1525             use locale ':not_characters';
1526             push @f, $_ if /[[:blank:]]/  and ! /[[:space:]]/;
1527         }
1528         else {
1529             push @f, $_ if /[[:blank:]]/  and ! /[[:space:]]/;
1530         }
1531     }
1532     report_multi_result($Locale, $locales_test_number, \@f);
1533
1534     ++$locales_test_number;
1535     undef @f;
1536     $test_names{$locales_test_number} = 'Verify that [:graph:] is a subset of [:print:]';
1537     for (map { chr } 0..255) {
1538         if ($is_utf8_locale) {
1539             use locale ':not_characters';
1540             push @f, $_ if /[[:graph:]]/  and ! /[[:print:]]/;
1541         }
1542         else {
1543             push @f, $_ if /[[:graph:]]/  and ! /[[:print:]]/;
1544         }
1545     }
1546     report_multi_result($Locale, $locales_test_number, \@f);
1547
1548     ++$locales_test_number;
1549     undef @f;
1550     $test_names{$locales_test_number} = 'Verify that the space character is in [:print:]';
1551     if ($is_utf8_locale) {
1552         use locale ':not_characters';
1553         push @f, " " if " " !~ /[[:print:]]/;
1554     }
1555     else {
1556         push @f, " " if " " !~ /[[:print:]]/;
1557     }
1558     report_multi_result($Locale, $locales_test_number, \@f);
1559
1560     ++$locales_test_number;
1561     undef @f;
1562     $test_names{$locales_test_number} = 'Verify that isn\'t both [:cntrl:] and [:print:]';
1563     for (map { chr } 0..255) {
1564         if ($is_utf8_locale) {
1565             use locale ':not_characters';
1566             push @f, $_ if (/[[:print:]]/ and /[[:cntrl:]]/);
1567         }
1568         else {
1569             push @f, $_ if (/[[:print:]]/ and /[[:cntrl:]]/);
1570         }
1571     }
1572     report_multi_result($Locale, $locales_test_number, \@f);
1573
1574     ++$locales_test_number;
1575     undef @f;
1576     $test_names{$locales_test_number} = 'Verify that isn\'t both [:alpha:] and [:digit:]';
1577     for (map { chr } 0..255) {
1578         if ($is_utf8_locale) {
1579             use locale ':not_characters';
1580             push @f, $_ if /[[:alpha:]]/ and /[[:digit:]]/;
1581         }
1582         else {
1583             push @f, $_ if /[[:alpha:]]/ and /[[:digit:]]/;
1584         }
1585     }
1586     report_multi_result($Locale, $locales_test_number, \@f);
1587
1588     ++$locales_test_number;
1589     undef @f;
1590     $test_names{$locales_test_number} = 'Verify that isn\'t both [:alnum:] and [:punct:]';
1591     for (map { chr } 0..255) {
1592         if ($is_utf8_locale) {
1593             use locale ':not_characters';
1594             push @f, $_ if /[[:alnum:]]/ and /[[:punct:]]/;
1595         }
1596         else {
1597             push @f, $_ if /[[:alnum:]]/ and /[[:punct:]]/;
1598         }
1599     }
1600     report_multi_result($Locale, $locales_test_number, \@f);
1601
1602     ++$locales_test_number;
1603     undef @f;
1604     $test_names{$locales_test_number} = 'Verify that isn\'t both [:xdigit:] and [:punct:]';
1605     for (map { chr } 0..255) {
1606         if ($is_utf8_locale) {
1607             use locale ':not_characters';
1608             push @f, $_ if (/[[:punct:]]/ and /[[:xdigit:]]/);
1609         }
1610         else {
1611             push @f, $_ if (/[[:punct:]]/ and /[[:xdigit:]]/);
1612         }
1613     }
1614     report_multi_result($Locale, $locales_test_number, \@f);
1615
1616     ++$locales_test_number;
1617     undef @f;
1618     $test_names{$locales_test_number} = 'Verify that isn\'t both [:graph:] and [:space:]';
1619     for (map { chr } 0..255) {
1620         if ($is_utf8_locale) {
1621             use locale ':not_characters';
1622             push @f, $_ if (/[[:graph:]]/ and /[[:space:]]/);
1623         }
1624         else {
1625             push @f, $_ if (/[[:graph:]]/ and /[[:space:]]/);
1626         }
1627     }
1628     report_multi_result($Locale, $locales_test_number, \@f);
1629
1630     foreach ($first_casing_test_number..$locales_test_number) {
1631         $problematical_tests{$_} = 1;
1632     }
1633
1634
1635     # Test for read-only scalars' locale vs non-locale comparisons.
1636
1637     {
1638         no locale;
1639         my $ok;
1640         $a = "qwerty";
1641         if ($is_utf8_locale) {
1642             use locale ':not_characters';
1643             $ok = ($a cmp "qwerty") == 0;
1644         }
1645         else {
1646             use locale;
1647             $ok = ($a cmp "qwerty") == 0;
1648         }
1649         report_result($Locale, ++$locales_test_number, $ok);
1650         $test_names{$locales_test_number} = 'Verify that cmp works with a read-only scalar; no- vs locale';
1651     }
1652
1653     {
1654         my ($from, $to, $lesser, $greater,
1655             @test, %test, $test, $yes, $no, $sign);
1656
1657         ++$locales_test_number;
1658         $test_names{$locales_test_number} = 'Verify that "le", "ne", etc work';
1659         $not_necessarily_a_problem_test_number = $locales_test_number;
1660         for (0..9) {
1661             # Select a slice.
1662             $from = int(($_*@{$posixes{'word'}})/10);
1663             $to = $from + int(@{$posixes{'word'}}/10);
1664             $to = $#{$posixes{'word'}} if ($to > $#{$posixes{'word'}});
1665             $lesser  = join('', @{$posixes{'word'}}[$from..$to]);
1666             # Select a slice one character on.
1667             $from++; $to++;
1668             $to = $#{$posixes{'word'}} if ($to > $#{$posixes{'word'}});
1669             $greater = join('', @{$posixes{'word'}}[$from..$to]);
1670             if ($is_utf8_locale) {
1671                 use locale ':not_characters';
1672                 ($yes, $no, $sign) = ($lesser lt $greater
1673                                     ? ("    ", "not ", 1)
1674                                     : ("not ", "    ", -1));
1675             }
1676             else {
1677                 use locale;
1678                 ($yes, $no, $sign) = ($lesser lt $greater
1679                                     ? ("    ", "not ", 1)
1680                                     : ("not ", "    ", -1));
1681             }
1682             # all these tests should FAIL (return 0).  Exact lt or gt cannot
1683             # be tested because in some locales, say, eacute and E may test
1684             # equal.
1685             @test =
1686                 (
1687                     $no.'    ($lesser  le $greater)',  # 1
1688                     'not      ($lesser  ne $greater)', # 2
1689                     '         ($lesser  eq $greater)', # 3
1690                     $yes.'    ($lesser  ge $greater)', # 4
1691                     $yes.'    ($lesser  ge $greater)', # 5
1692                     $yes.'    ($greater le $lesser )', # 7
1693                     'not      ($greater ne $lesser )', # 8
1694                     '         ($greater eq $lesser )', # 9
1695                     $no.'     ($greater ge $lesser )', # 10
1696                     'not (($lesser cmp $greater) == -($sign))' # 11
1697                     );
1698             @test{@test} = 0 x @test;
1699             $test = 0;
1700             for my $ti (@test) {
1701                 if ($is_utf8_locale) {
1702                     use locale ':not_characters';
1703                     $test{$ti} = eval $ti;
1704                 }
1705                 else {
1706                     # Already in 'use locale';
1707                     $test{$ti} = eval $ti;
1708                 }
1709                 $test ||= $test{$ti}
1710             }
1711             report_result($Locale, $locales_test_number, $test == 0);
1712             if ($test) {
1713                 debug "lesser  = '$lesser'\n";
1714                 debug "greater = '$greater'\n";
1715                 debug "lesser cmp greater = ",
1716                         $lesser cmp $greater, "\n";
1717                 debug "greater cmp lesser = ",
1718                         $greater cmp $lesser, "\n";
1719                 debug "(greater) from = $from, to = $to\n";
1720                 for my $ti (@test) {
1721                     debugf("# %-40s %-4s", $ti,
1722                             $test{$ti} ? 'FAIL' : 'ok');
1723                     if ($ti =~ /\(\.*(\$.+ +cmp +\$[^\)]+)\.*\)/) {
1724                         debugf("(%s == %4d)", $1, eval $1);
1725                     }
1726                     debugf("\n#");
1727                 }
1728
1729                 last;
1730             }
1731         }
1732     }
1733
1734     my $ok1;
1735     my $ok2;
1736     my $ok3;
1737     my $ok4;
1738     my $ok5;
1739     my $ok6;
1740     my $ok7;
1741     my $ok8;
1742     my $ok9;
1743     my $ok10;
1744     my $ok11;
1745     my $ok12;
1746     my $ok13;
1747     my $ok14;
1748     my $ok14_5;
1749     my $ok15;
1750     my $ok16;
1751     my $ok17;
1752     my $ok18;
1753     my $ok19;
1754     my $ok20;
1755     my $ok21;
1756
1757     my $c;
1758     my $d;
1759     my $e;
1760     my $f;
1761     my $g;
1762     my $h;
1763     my $i;
1764     my $j;
1765
1766     if (! $is_utf8_locale) {
1767         use locale;
1768
1769         my ($x, $y) = (1.23, 1.23);
1770
1771         $a = "$x";
1772         printf ''; # printf used to reset locale to "C"
1773         $b = "$y";
1774         $ok1 = $a eq $b;
1775
1776         $c = "$x";
1777         my $z = sprintf ''; # sprintf used to reset locale to "C"
1778         $d = "$y";
1779         $ok2 = $c eq $d;
1780         {
1781
1782             use warnings;
1783             my $w = 0;
1784             local $SIG{__WARN__} =
1785                 sub {
1786                     print "# @_\n";
1787                     $w++;
1788                 };
1789
1790             # The == (among other ops) used to warn for locales
1791             # that had something else than "." as the radix character.
1792
1793             $ok3 = $c == 1.23;
1794             $ok4 = $c == $x;
1795             $ok5 = $c == $d;
1796             {
1797                 no locale;
1798
1799                 $e = "$x";
1800
1801                 $ok6 = $e == 1.23;
1802                 $ok7 = $e == $x;
1803                 $ok8 = $e == $c;
1804             }
1805
1806             $f = "1.23";
1807             $g = 2.34;
1808             $h = 1.5;
1809             $i = 1.25;
1810             $j = "$h:$i";
1811
1812             $ok9 = $f == 1.23;
1813             $ok10 = $f == $x;
1814             $ok11 = $f == $c;
1815             $ok12 = abs(($f + $g) - 3.57) < 0.01;
1816             $ok13 = $w == 0;
1817             $ok14 = $ok14_5 = $ok15 = $ok16 = 1;  # Skip for non-utf8 locales
1818         }
1819         {
1820             no locale;
1821             $ok17 = "1.5:1.25" eq sprintf("%g:%g", $h, $i);
1822         }
1823         $ok18 = $j eq sprintf("%g:%g", $h, $i);
1824     }
1825     else {
1826         use locale ':not_characters';
1827
1828         my ($x, $y) = (1.23, 1.23);
1829         $a = "$x";
1830         printf ''; # printf used to reset locale to "C"
1831         $b = "$y";
1832         $ok1 = $a eq $b;
1833
1834         $c = "$x";
1835         my $z = sprintf ''; # sprintf used to reset locale to "C"
1836         $d = "$y";
1837         $ok2 = $c eq $d;
1838         {
1839             use warnings;
1840             my $w = 0;
1841             local $SIG{__WARN__} =
1842                 sub {
1843                     print "# @_\n";
1844                     $w++;
1845                 };
1846             $ok3 = $c == 1.23;
1847             $ok4 = $c == $x;
1848             $ok5 = $c == $d;
1849             {
1850                 no locale;
1851                 $e = "$x";
1852
1853                 $ok6 = $e == 1.23;
1854                 $ok7 = $e == $x;
1855                 $ok8 = $e == $c;
1856             }
1857
1858             $f = "1.23";
1859             $g = 2.34;
1860             $h = 1.5;
1861             $i = 1.25;
1862             $j = "$h:$i";
1863
1864             $ok9 = $f == 1.23;
1865             $ok10 = $f == $x;
1866             $ok11 = $f == $c;
1867             $ok12 = abs(($f + $g) - 3.57) < 0.01;
1868             $ok13 = $w == 0;
1869
1870             # Look for non-ASCII error messages, and verify that the first
1871             # such is in UTF-8 (the others almost certainly will be like the
1872             # first).  This is only done if the current locale has LC_MESSAGES
1873             $ok14 = 1;
1874             $ok14_5 = 1;
1875             if (   locales_enabled('LC_MESSAGES')
1876                 && setlocale(&POSIX::LC_MESSAGES, $Locale))
1877             {
1878                 foreach my $err (keys %!) {
1879                     use Errno;
1880                     $! = eval "&Errno::$err";   # Convert to strerror() output
1881                     my $strerror = "$!";
1882                     if ("$strerror" =~ /\P{ASCII}/) {
1883                         $ok14 = utf8::is_utf8($strerror);
1884                         no locale;
1885                         $ok14_5 = "$!" !~ /\P{ASCII}/;
1886                         last;
1887                     }
1888                 }
1889             }
1890
1891             # Similarly, we verify that a non-ASCII radix is in UTF-8.  This
1892             # also catches if there is a disparity between sprintf and
1893             # stringification.
1894
1895             my $string_g = "$g";
1896             my $sprintf_g = sprintf("%g", $g);
1897
1898             $ok15 = $string_g =~ / ^ \p{ASCII}+ $ /x || utf8::is_utf8($string_g);
1899             $ok16 = $sprintf_g eq $string_g;
1900         }
1901         {
1902             no locale;
1903             $ok17 = "1.5:1.25" eq sprintf("%g:%g", $h, $i);
1904         }
1905         $ok18 = $j eq sprintf("%g:%g", $h, $i);
1906     }
1907
1908     $ok19 = $ok20 = 1;
1909     if (setlocale(&POSIX::LC_TIME, $Locale)) { # These tests aren't affected by
1910                                                # :not_characters
1911         my @times = CORE::localtime();
1912
1913         use locale;
1914         $ok19 = POSIX::strftime("%p", @times) ne "%p"; # [perl #119425]
1915         my $date = POSIX::strftime("'%A'  '%B'  '%Z'  '%p'", @times);
1916         debug("'Day' 'Month' 'TZ' 'am/pm' = ", disp_str($date));
1917
1918         # If there is any non-ascii, it better be UTF-8 in a UTF-8 locale, and
1919         # not UTF-8 if the locale isn't UTF-8.
1920         $ok20 = $date =~ / ^ \p{ASCII}+ $ /x
1921                 || $is_utf8_locale == utf8::is_utf8($date);
1922     }
1923
1924     $ok21 = 1;
1925     if (locales_enabled('LC_MESSAGES')) {
1926         foreach my $err (keys %!) {
1927             no locale;
1928             use Errno;
1929             $! = eval "&Errno::$err";   # Convert to strerror() output
1930             my $strerror = "$!";
1931             if ("$strerror" =~ /\P{ASCII}/) {
1932                 $ok21 = 0;
1933                 last;
1934             }
1935         }
1936     }
1937
1938     report_result($Locale, ++$locales_test_number, $ok1);
1939     $test_names{$locales_test_number} = 'Verify that an intervening printf doesn\'t change assignment results';
1940     my $first_a_test = $locales_test_number;
1941
1942     debug "$first_a_test..$locales_test_number: \$a = $a, \$b = $b, Locale = $Locale\n";
1943
1944     report_result($Locale, ++$locales_test_number, $ok2);
1945     $test_names{$locales_test_number} = 'Verify that an intervening sprintf doesn\'t change assignment results';
1946
1947     my $first_c_test = $locales_test_number;
1948
1949     $test_names{++$locales_test_number} = 'Verify that a different locale radix works when doing "==" with a constant';
1950     if ($Config{usequadmath}) {
1951         print "# Skip: no locale radix with usequadmath ($test_names{$locales_test_number})\n";
1952         report_result($Locale, $locales_test_number, 1);
1953     } else {
1954         report_result($Locale, $locales_test_number, $ok3);
1955         $problematical_tests{$locales_test_number} = 1;
1956     }
1957
1958     $test_names{++$locales_test_number} = 'Verify that a different locale radix works when doing "==" with a scalar';
1959     if ($Config{usequadmath}) {
1960         print "# Skip: no locale radix with usequadmath ($test_names{$locales_test_number})\n";
1961         report_result($Locale, $locales_test_number, 1);
1962     } else {
1963         report_result($Locale, $locales_test_number, $ok4);
1964         $problematical_tests{$locales_test_number} = 1;
1965     }
1966
1967     report_result($Locale, ++$locales_test_number, $ok5);
1968     $test_names{$locales_test_number} = 'Verify that a different locale radix works when doing "==" with a scalar and an intervening sprintf';
1969     $problematical_tests{$locales_test_number} = 1;
1970
1971     debug "$first_c_test..$locales_test_number: \$c = $c, \$d = $d, Locale = $Locale\n";
1972
1973     report_result($Locale, ++$locales_test_number, $ok6);
1974     $test_names{$locales_test_number} = 'Verify that can assign stringified under inner no-locale block';
1975     my $first_e_test = $locales_test_number;
1976
1977     report_result($Locale, ++$locales_test_number, $ok7);
1978     $test_names{$locales_test_number} = 'Verify that "==" with a scalar still works in inner no locale';
1979
1980     $test_names{++$locales_test_number} = 'Verify that "==" with a scalar and an intervening sprintf still works in inner no locale';
1981     if ($Config{usequadmath}) {
1982         print "# Skip: no locale radix with usequadmath ($test_names{$locales_test_number})\n";
1983         report_result($Locale, $locales_test_number, 1);
1984     } else {
1985         report_result($Locale, $locales_test_number, $ok8);
1986         $problematical_tests{$locales_test_number} = 1;
1987     }
1988
1989     debug "$first_e_test..$locales_test_number: \$e = $e, no locale\n";
1990
1991     report_result($Locale, ++$locales_test_number, $ok9);
1992     $test_names{$locales_test_number} = 'Verify that after a no-locale block, a different locale radix still works when doing "==" with a constant';
1993     $problematical_tests{$locales_test_number} = 1;
1994     my $first_f_test = $locales_test_number;
1995
1996     report_result($Locale, ++$locales_test_number, $ok10);
1997     $test_names{$locales_test_number} = 'Verify that after a no-locale block, a different locale radix still works when doing "==" with a scalar';
1998     $problematical_tests{$locales_test_number} = 1;
1999
2000     $test_names{++$locales_test_number} = 'Verify that after a no-locale block, a different locale radix still works when doing "==" with a scalar and an intervening sprintf';
2001     if ($Config{usequadmath}) {
2002         print "# Skip: no locale radix with usequadmath ($test_names{$locales_test_number})\n";
2003         report_result($Locale, $locales_test_number, 1);
2004     } else {
2005         report_result($Locale, $locales_test_number, $ok11);
2006         $problematical_tests{$locales_test_number} = 1;
2007     }
2008
2009     report_result($Locale, ++$locales_test_number, $ok12);
2010     $test_names{$locales_test_number} = 'Verify that after a no-locale block, a different locale radix can participate in an addition and function call as numeric';
2011     $problematical_tests{$locales_test_number} = 1;
2012
2013     report_result($Locale, ++$locales_test_number, $ok13);
2014     $test_names{$locales_test_number} = 'Verify that don\'t get warning under "==" even if radix is not a dot';
2015     $problematical_tests{$locales_test_number} = 1;
2016
2017     report_result($Locale, ++$locales_test_number, $ok14);
2018     $test_names{$locales_test_number} = 'Verify that non-ASCII UTF-8 error messages are in UTF-8';
2019
2020     report_result($Locale, ++$locales_test_number, $ok14_5);
2021     $test_names{$locales_test_number} = '... and are ASCII outside "use locale"';
2022
2023     report_result($Locale, ++$locales_test_number, $ok15);
2024     $test_names{$locales_test_number} = 'Verify that a number with a UTF-8 radix has a UTF-8 stringification';
2025
2026     report_result($Locale, ++$locales_test_number, $ok16);
2027     $test_names{$locales_test_number} = 'Verify that a sprintf of a number with a UTF-8 radix yields UTF-8';
2028
2029     report_result($Locale, ++$locales_test_number, $ok17);
2030     $test_names{$locales_test_number} = 'Verify that a sprintf of a number outside locale scope uses a dot radix';
2031
2032     report_result($Locale, ++$locales_test_number, $ok18);
2033     $test_names{$locales_test_number} = 'Verify that a sprintf of a number back within locale scope uses locale radix';
2034
2035     report_result($Locale, ++$locales_test_number, $ok19);
2036     $test_names{$locales_test_number} = 'Verify that strftime doesn\'t return "%p" in locales where %p is empty';
2037
2038     report_result($Locale, ++$locales_test_number, $ok20);
2039     $test_names{$locales_test_number} = 'Verify that strftime returns date with UTF-8 flag appropriately set';
2040     $problematical_tests{$locales_test_number} = 1;   # This is broken in
2041                                                       # OS X 10.9.3
2042
2043     report_result($Locale, ++$locales_test_number, $ok21);
2044     $test_names{$locales_test_number} = '"$!" is ASCII only outside of locale scope';
2045
2046     debug "$first_f_test..$locales_test_number: \$f = $f, \$g = $g, back to locale = $Locale\n";
2047
2048     # Does taking lc separately differ from taking
2049     # the lc "in-line"?  (This was the bug 19990704.002, change #3568.)
2050     # The bug was in the caching of the 'o'-magic.
2051     if (! $is_utf8_locale) {
2052         use locale;
2053
2054         sub lcA {
2055             my $lc0 = lc $_[0];
2056             my $lc1 = lc $_[1];
2057             return $lc0 cmp $lc1;
2058         }
2059
2060         sub lcB {
2061             return lc($_[0]) cmp lc($_[1]);
2062         }
2063
2064         my $x = "ab";
2065         my $y = "aa";
2066         my $z = "AB";
2067
2068         report_result($Locale, ++$locales_test_number,
2069                     lcA($x, $y) == 1 && lcB($x, $y) == 1 ||
2070                     lcA($x, $z) == 0 && lcB($x, $z) == 0);
2071     }
2072     else {
2073         use locale ':not_characters';
2074
2075         sub lcC {
2076             my $lc0 = lc $_[0];
2077             my $lc1 = lc $_[1];
2078             return $lc0 cmp $lc1;
2079         }
2080
2081         sub lcD {
2082             return lc($_[0]) cmp lc($_[1]);
2083         }
2084
2085         my $x = "ab";
2086         my $y = "aa";
2087         my $z = "AB";
2088
2089         report_result($Locale, ++$locales_test_number,
2090                     lcC($x, $y) == 1 && lcD($x, $y) == 1 ||
2091                     lcC($x, $z) == 0 && lcD($x, $z) == 0);
2092     }
2093     $test_names{$locales_test_number} = 'Verify "lc(foo) cmp lc(bar)" is the same as using intermediaries for the cmp';
2094
2095     # Does lc of an UPPER (if different from the UPPER) match
2096     # case-insensitively the UPPER, and does the UPPER match
2097     # case-insensitively the lc of the UPPER.  And vice versa.
2098     {
2099         use locale;
2100         no utf8;
2101         my $re = qr/[\[\(\{\*\+\?\|\^\$\\]/;
2102
2103         my @f = ();
2104         ++$locales_test_number;
2105         $test_names{$locales_test_number} = 'Verify case insensitive matching works';
2106         foreach my $x (sort { ord $a <=> ord $b } keys %UPPER) {
2107             if (! $is_utf8_locale) {
2108                 my $y = lc $x;
2109                 next unless uc $y eq $x;
2110                 debug_more( "UPPER=", disp_chars(($x)),
2111                             "; lc=", disp_chars(($y)), "; ",
2112                             "; fc=", disp_chars((fc $x)), "; ",
2113                             disp_chars(($x)), "=~/", disp_chars(($y)), "/i=",
2114                             $x =~ /\Q$y/i ? 1 : 0,
2115                             "; ",
2116                             disp_chars(($y)), "=~/", disp_chars(($x)), "/i=",
2117                             $y =~ /\Q$x/i ? 1 : 0,
2118                             "\n");
2119                 #
2120                 # If $x and $y contain regular expression characters
2121                 # AND THEY lowercase (/i) to regular expression characters,
2122                 # regcomp() will be mightily confused.  No, the \Q doesn't
2123                 # help here (maybe regex engine internal lowercasing
2124                 # is done after the \Q?)  An example of this happening is
2125                 # the bg_BG (Bulgarian) locale under EBCDIC (OS/390 USS):
2126                 # the chr(173) (the "[") is the lowercase of the chr(235).
2127                 #
2128                 # Similarly losing EBCDIC locales include cs_cz, cs_CZ,
2129                 # el_gr, el_GR, en_us.IBM-037 (!), en_US.IBM-037 (!),
2130                 # et_ee, et_EE, hr_hr, hr_HR, hu_hu, hu_HU, lt_LT,
2131                 # mk_mk, mk_MK, nl_nl.IBM-037, nl_NL.IBM-037,
2132                 # pl_pl, pl_PL, ro_ro, ro_RO, ru_ru, ru_RU,
2133                 # sk_sk, sk_SK, sl_si, sl_SI, tr_tr, tr_TR.
2134                 #
2135                 # Similar things can happen even under (bastardised)
2136                 # non-EBCDIC locales: in many European countries before the
2137                 # advent of ISO 8859-x nationally customised versions of
2138                 # ISO 646 were devised, reusing certain punctuation
2139                 # characters for modified characters needed by the
2140                 # country/language.  For example, the "|" might have
2141                 # stood for U+00F6 or LATIN SMALL LETTER O WITH DIAERESIS.
2142                 #
2143                 if ($x =~ $re || $y =~ $re) {
2144                     print "# Regex characters in '$x' or '$y', skipping test $locales_test_number for locale '$Locale'\n";
2145                     next;
2146                 }
2147                 push @f, $x unless $x =~ /\Q$y/i && $y =~ /\Q$x/i;
2148
2149                 # fc is not a locale concept, so Perl uses lc for it.
2150                 push @f, $x unless lc $x eq fc $x;
2151             }
2152             else {
2153                 use locale ':not_characters';
2154                 my $y = lc $x;
2155                 next unless uc $y eq $x;
2156                 debug_more( "UPPER=", disp_chars(($x)),
2157                             "; lc=", disp_chars(($y)), "; ",
2158                             "; fc=", disp_chars((fc $x)), "; ",
2159                             disp_chars(($x)), "=~/", disp_chars(($y)), "/i=",
2160                             $x =~ /\Q$y/i ? 1 : 0,
2161                             "; ",
2162                             disp_chars(($y)), "=~/", disp_chars(($x)), "/i=",
2163                             $y =~ /\Q$x/i ? 1 : 0,
2164                             "\n");
2165
2166                 push @f, $x unless $x =~ /\Q$y/i && $y =~ /\Q$x/i;
2167
2168                 # The places where Unicode's lc is different from fc are
2169                 # skipped here by virtue of the 'next unless uc...' line above
2170                 push @f, $x unless lc $x eq fc $x;
2171             }
2172         }
2173
2174         foreach my $x (sort { ord $a <=> ord $b } keys %lower) {
2175             if (! $is_utf8_locale) {
2176                 my $y = uc $x;
2177                 next unless lc $y eq $x;
2178                 debug_more( "lower=", disp_chars(($x)),
2179                             "; uc=", disp_chars(($y)), "; ",
2180                             "; fc=", disp_chars((fc $x)), "; ",
2181                             disp_chars(($x)), "=~/", disp_chars(($y)), "/i=",
2182                             $x =~ /\Q$y/i ? 1 : 0,
2183                             "; ",
2184                             disp_chars(($y)), "=~/", disp_chars(($x)), "/i=",
2185                             $y =~ /\Q$x/i ? 1 : 0,
2186                             "\n");
2187                 if ($x =~ $re || $y =~ $re) { # See above.
2188                     print "# Regex characters in '$x' or '$y', skipping test $locales_test_number for locale '$Locale'\n";
2189                     next;
2190                 }
2191                 push @f, $x unless $x =~ /\Q$y/i && $y =~ /\Q$x/i;
2192
2193                 push @f, $x unless lc $x eq fc $x;
2194             }
2195             else {
2196                 use locale ':not_characters';
2197                 my $y = uc $x;
2198                 next unless lc $y eq $x;
2199                 debug_more( "lower=", disp_chars(($x)),
2200                             "; uc=", disp_chars(($y)), "; ",
2201                             "; fc=", disp_chars((fc $x)), "; ",
2202                             disp_chars(($x)), "=~/", disp_chars(($y)), "/i=",
2203                             $x =~ /\Q$y/i ? 1 : 0,
2204                             "; ",
2205                             disp_chars(($y)), "=~/", disp_chars(($x)), "/i=",
2206                             $y =~ /\Q$x/i ? 1 : 0,
2207                             "\n");
2208                 push @f, $x unless $x =~ /\Q$y/i && $y =~ /\Q$x/i;
2209
2210                 push @f, $x unless lc $x eq fc $x;
2211             }
2212         }
2213         report_multi_result($Locale, $locales_test_number, \@f);
2214         $problematical_tests{$locales_test_number} = 1;
2215     }
2216
2217     # [perl #109318]
2218     {
2219         my @f = ();
2220         ++$locales_test_number;
2221         $test_names{$locales_test_number} = 'Verify atof with locale radix and negative exponent';
2222         $problematical_tests{$locales_test_number} = 1;
2223
2224         my $radix = POSIX::localeconv()->{decimal_point};
2225         my @nums = (
2226              "3.14e+9",  "3${radix}14e+9",  "3.14e-9",  "3${radix}14e-9",
2227             "-3.14e+9", "-3${radix}14e+9", "-3.14e-9", "-3${radix}14e-9",
2228         );
2229
2230         if (! $is_utf8_locale) {
2231             use locale;
2232             for my $num (@nums) {
2233                 push @f, $num
2234                     unless sprintf("%g", $num) =~ /3.+14/;
2235             }
2236         }
2237         else {
2238             use locale ':not_characters';
2239             for my $num (@nums) {
2240                 push @f, $num
2241                     unless sprintf("%g", $num) =~ /3.+14/;
2242             }
2243         }
2244
2245         if ($Config{usequadmath}) {
2246             print "# Skip: no locale radix with usequadmath ($Locale)\n";
2247             report_result($Locale, $locales_test_number, 1);
2248         } else {
2249             report_result($Locale, $locales_test_number, @f == 0);
2250             if (@f) {
2251                 print "# failed $locales_test_number locale '$Locale' numbers @f\n"
2252             }
2253         }
2254     }
2255 }
2256
2257 my $final_locales_test_number = $locales_test_number;
2258
2259 # Recount the errors.
2260
2261 TEST_NUM:
2262 foreach $test_num ($first_locales_test_number..$final_locales_test_number) {
2263     if (%setlocale_failed) {
2264         print "not ";
2265     }
2266     elsif ($Problem{$test_num}
2267            || ! defined $Okay{$test_num}
2268            || ! @{$Okay{$test_num}})
2269     {
2270         if (defined $not_necessarily_a_problem_test_number
2271             && $test_num == $not_necessarily_a_problem_test_number)
2272         {
2273             print "# The failure of test $not_necessarily_a_problem_test_number is not necessarily fatal.\n";
2274             print "# It usually indicates a problem in the environment,\n";
2275             print "# not in Perl itself.\n";
2276         }
2277
2278         # If there are any locales that pass this test, or are known-bad, it
2279         # may be that there are enough passes that we TODO the failure.
2280         if (($Okay{$test_num} || $Known_bad_locale{$test_num})
2281             && grep { $_ == $test_num } keys %problematical_tests)
2282         {
2283             no warnings 'experimental::postderef';
2284
2285             # Don't count the known-bad failures when calculating the
2286             # percentage that fail.
2287             my $known_failures = (exists $Known_bad_locale{$test_num})
2288                                   ? scalar(keys $Known_bad_locale{$test_num}->%*)
2289                                   : 0;
2290             my $adjusted_failures = scalar(keys $Problem{$test_num}->%*)
2291                                     - $known_failures;
2292
2293             # Specially handle failures where only known-bad locales fail.
2294             # This makes the diagnositics clearer.
2295             if ($adjusted_failures <= 0) {
2296                 print "not ok $test_num $test_names{$test_num} # TODO fails only on ",
2297                                                                 "known bad locales: ",
2298                       join " ", keys $Known_bad_locale{$test_num}->%*, "\n";
2299                 next TEST_NUM;
2300             }
2301
2302             # Round to nearest .1%
2303             my $percent_fail = (int(.5 + (1000 * $adjusted_failures
2304                                           / scalar(@Locale))))
2305                                / 10;
2306             if ($percent_fail < $acceptable_failure_percentage) {
2307                 if (! $debug) {
2308                     $test_names{$test_num} .= 'TODO';
2309                     print "# ", 100 - $percent_fail, "% of locales not known to be problematic on this platform\n";
2310                     print "# pass the following test, so it is likely that the failures\n";
2311                     print "# are errors in the locale definitions.  The test is marked TODO, as the\n";
2312                     print "# problem is not likely to be Perl's\n";
2313                 }
2314             }
2315             if ($debug) {
2316                 print "# $percent_fail% of locales (",
2317                       scalar(keys $Problem{$test_num}->%*),
2318                       " of ",
2319                       scalar(@Locale),
2320                       ") fail the above test (TODO cut-off is ",
2321                       $acceptable_failure_percentage,
2322                       "%)\n";
2323             }
2324         }
2325         print "#\n";
2326         if ($debug) {
2327             print "# The code points that had this failure are given above.  Look for lines\n";
2328             print "# that match 'failed $test_num'\n";
2329         }
2330         else {
2331             print "# For more details, rerun, with environment variable PERL_DEBUG_FULL_TEST=1.\n";
2332             print "# Then look at that output for lines that match 'failed $test_num'\n";
2333         }
2334         print "not ";
2335     }
2336     print "ok $test_num";
2337     if (defined $test_names{$test_num}) {
2338         # If TODO is in the test name, make it thus
2339         my $todo = $test_names{$test_num} =~ s/TODO\s*//;
2340         print " $test_names{$test_num}";
2341         print " # TODO" if $todo;
2342     }
2343     print "\n";
2344 }
2345
2346 $test_num = $final_locales_test_number;
2347
2348 unless ( $^O =~ m!^(dragonfly|openbsd|bitrig|mirbsd)$! ) {
2349     # perl #115808
2350     use warnings;
2351     my $warned = 0;
2352     local $SIG{__WARN__} = sub {
2353         $warned = $_[0] =~ /uninitialized/;
2354     };
2355     my $z = "y" . setlocale(&POSIX::LC_ALL, "xyzzy");
2356     ok($warned, "variable set to setlocale(BAD LOCALE) is considered uninitialized");
2357 }
2358
2359 # Test that tainting and case changing works on utf8 strings.  These tests are
2360 # placed last to avoid disturbing the hard-coded test numbers that existed at
2361 # the time these were added above this in this file.
2362 # This also tests that locale overrides unicode_strings in the same scope for
2363 # non-utf8 strings.
2364 setlocale(&POSIX::LC_ALL, "C");
2365 {
2366     use locale;
2367     use feature 'unicode_strings';
2368
2369     foreach my $function ("uc", "ucfirst", "lc", "lcfirst", "fc") {
2370         my @list;   # List of code points to test for $function
2371
2372         # Used to calculate the changed case for ASCII characters by using the
2373         # ord, instead of using one of the functions under test.
2374         my $ascii_case_change_delta;
2375         my $above_latin1_case_change_delta; # Same for the specific ords > 255
2376                                             # that we use
2377
2378         # We test an ASCII character, which should change case;
2379         # a Latin1 character, which shouldn't change case under this C locale,
2380         # an above-Latin1 character that when the case is changed would cross
2381         #   the 255/256 boundary, so doesn't change case
2382         #   (the \x{149} is one of these, but changes into 2 characters, the
2383         #   first one of which doesn't cross the boundary.
2384         # the final one in each list is an above-Latin1 character whose case
2385         #   does change.  The code below uses its position in its list as a
2386         #   marker to indicate that it, unlike the other code points above
2387         #   ASCII, has a successful case change
2388         #
2389         # All casing operations under locale (but not :not_characters) should
2390         # taint
2391         if ($function =~ /^u/) {
2392             @list = ("", "a",
2393                      chr(utf8::unicode_to_native(0xe0)),
2394                      chr(utf8::unicode_to_native(0xff)),
2395                      "\x{fb00}", "\x{149}", "\x{101}");
2396             $ascii_case_change_delta = ($is_ebcdic) ? +64 : -32;
2397             $above_latin1_case_change_delta = -1;
2398         }
2399         else {
2400             @list = ("", "A",
2401                      chr(utf8::unicode_to_native(0xC0)),
2402                      "\x{17F}", "\x{100}");
2403             $ascii_case_change_delta = ($is_ebcdic) ? -64 : +32;
2404             $above_latin1_case_change_delta = +1;
2405         }
2406         foreach my $is_utf8_locale (0 .. 1) {
2407             foreach my $j (0 .. $#list) {
2408                 my $char = $list[$j];
2409
2410                 for my $encoded_in_utf8 (0 .. 1) {
2411                     my $should_be;
2412                     my $changed;
2413                     if (! $is_utf8_locale) {
2414                         no warnings 'locale';
2415                         $should_be = ($j == $#list)
2416                             ? chr(ord($char) + $above_latin1_case_change_delta)
2417                             : (length $char == 0 || utf8::native_to_unicode(ord($char)) > 127)
2418                               ? $char
2419                               : chr(ord($char) + $ascii_case_change_delta);
2420
2421                         # This monstrosity is in order to avoid using an eval,
2422                         # which might perturb the results
2423                         $changed = ($function eq "uc")
2424                                     ? uc($char)
2425                                     : ($function eq "ucfirst")
2426                                       ? ucfirst($char)
2427                                       : ($function eq "lc")
2428                                         ? lc($char)
2429                                         : ($function eq "lcfirst")
2430                                           ? lcfirst($char)
2431                                           : ($function eq "fc")
2432                                             ? fc($char)
2433                                             : die("Unexpected function \"$function\"");
2434                     }
2435                     else {
2436                         {
2437                             no locale;
2438
2439                             # For utf8-locales the case changing functions
2440                             # should work just like they do outside of locale.
2441                             # Can use eval here because not testing it when
2442                             # not in locale.
2443                             $should_be = eval "$function('$char')";
2444                             die "Unexpected eval error $@ from 'eval \"$function('$char')\"'" if  $@;
2445
2446                         }
2447                         use locale ':not_characters';
2448                         $changed = ($function eq "uc")
2449                                     ? uc($char)
2450                                     : ($function eq "ucfirst")
2451                                       ? ucfirst($char)
2452                                       : ($function eq "lc")
2453                                         ? lc($char)
2454                                         : ($function eq "lcfirst")
2455                                           ? lcfirst($char)
2456                                           : ($function eq "fc")
2457                                             ? fc($char)
2458                                             : die("Unexpected function \"$function\"");
2459                     }
2460                     ok($changed eq $should_be,
2461                         "$function(\"$char\") in C locale "
2462                         . (($is_utf8_locale)
2463                             ? "(use locale ':not_characters'"
2464                             : "(use locale")
2465                         . (($encoded_in_utf8)
2466                             ? "; encoded in utf8)"
2467                             : "; not encoded in utf8)")
2468                         . " should be \"$should_be\", got \"$changed\"");
2469
2470                     # Tainting shouldn't happen for use locale :not_character
2471                     # (a utf8 locale)
2472                     (! $is_utf8_locale)
2473                     ? check_taint($changed)
2474                     : check_taint_not($changed);
2475
2476                     # Use UTF-8 next time through the loop
2477                     utf8::upgrade($char);
2478                 }
2479             }
2480         }
2481     }
2482 }
2483
2484 # Give final advice.
2485
2486 my $didwarn = 0;
2487
2488 foreach ($first_locales_test_number..$final_locales_test_number) {
2489     if ($Problem{$_}) {
2490         my @f = sort keys %{ $Problem{$_} };
2491
2492         # Don't list the failures caused by known-bad locales.
2493         if (exists $known_bad_locales{$^O}) {
2494             @f = grep { $_ !~ $known_bad_locales{$^O} } @f;
2495             next unless @f;
2496         }
2497         my $f = join(" ", @f);
2498         $f =~ s/(.{50,60}) /$1\n#\t/g;
2499         print
2500             "#\n",
2501             "# The locale ", (@f == 1 ? "definition" : "definitions"), "\n#\n",
2502             "#\t", $f, "\n#\n",
2503             "# on your system may have errors because the locale test $_\n",
2504             "# \"$test_names{$_}\"\n",
2505             "# failed in ", (@f == 1 ? "that locale" : "those locales"),
2506             ".\n";
2507         print <<EOW;
2508 #
2509 # If your users are not using these locales you are safe for the moment,
2510 # but please report this failure first to perlbug\@perl.com using the
2511 # perlbug script (as described in the INSTALL file) so that the exact
2512 # details of the failures can be sorted out first and then your operating
2513 # system supplier can be alerted about these anomalies.
2514 #
2515 EOW
2516         $didwarn = 1;
2517     }
2518 }
2519
2520 # Tell which locales were okay and which were not.
2521
2522 if ($didwarn) {
2523     my (@s, @F);
2524
2525     foreach my $l (@Locale) {
2526         my $p = 0;
2527         if ($setlocale_failed{$l}) {
2528             $p++;
2529         }
2530         else {
2531             foreach my $t
2532                         ($first_locales_test_number..$final_locales_test_number)
2533             {
2534                 $p++ if $Problem{$t}{$l};
2535             }
2536         }
2537         push @s, $l if $p == 0;
2538         push @F, $l unless $p == 0;
2539     }
2540
2541     if (@s) {
2542         my $s = join(" ", @s);
2543         $s =~ s/(.{50,60}) /$1\n#\t/g;
2544
2545         print
2546             "# The following locales\n#\n",
2547             "#\t", $s, "\n#\n",
2548             "# tested okay.\n#\n",
2549     } else {
2550         print "# None of your locales were fully okay.\n";
2551     }
2552
2553     if (@F) {
2554         my $F = join(" ", @F);
2555         $F =~ s/(.{50,60}) /$1\n#\t/g;
2556
2557         my $details = "";
2558         unless ($debug) {
2559             $details = "# For more details, rerun, with environment variable PERL_DEBUG_FULL_TEST=1.\n";
2560         }
2561         elsif ($debug == 1) {
2562             $details = "# For even more details, rerun, with environment variable PERL_DEBUG_FULL_TEST=2.\n";
2563         }
2564
2565         print
2566           "# The following locales\n#\n",
2567           "#\t", $F, "\n#\n",
2568           "# had problems.\n#\n",
2569           $details;
2570     } else {
2571         print "# None of your locales were broken.\n";
2572     }
2573 }
2574
2575 if (exists $known_bad_locales{$^O} && ! %Known_bad_locale) {
2576     $test_num++;
2577     print "ok $test_num $^O no longer has known bad locales # TODO\n";
2578 }
2579
2580 print "1..$test_num\n";
2581
2582 # eof