t/lib/warnings/utf8

   1
   2   utf8.c AOK
   3
   4      [utf8_to_uvchr_buf]
   5      Malformed UTF-8 character
   6         my $a = ord "\x80" ;
   7
   8      Malformed UTF-8 character
   9         my $a = ord "\xf080" ;
  10      <<<<<< this warning can't be easily triggered from perl anymore
  11
  12      [utf16_to_utf8]
  13      Malformed UTF-16 surrogate
  14      <<<<<< Add a test when something actually calls utf16_to_utf8
  15
  16 __END__
  17 # utf8.c [utf8_to_uvchr_buf] -W
  18 # NAME Malformed under 'use utf8' in double-quoted string
  19 BEGIN {
  20     if (ord('A') == 193) {
  21         print "SKIPPED\n# ebcdic platforms generates different Malformed UTF-8 warnings.";
  22         exit 0;
  23     }
  24 }
  25 use utf8 ;
  26 no warnings;    # Malformed is a fatal error, so gets output anyway.
  27 my $a = "snøstorm" ;
  28 EXPECT
  29 Malformed UTF-8 character: \xf8\x73\x74\x6f\x72 (unexpected non-continuation byte 0x73, immediately after start byte 0xf8; need 5 bytes, got 1) at - line 10.
  30 Malformed UTF-8 character (fatal) at - line 10.
  31 ########
  32 # NAME Malformed under 'use utf8' in single-quoted string
  33 BEGIN {
  34     if (ord('A') == 193) {
  35         print "SKIPPED\n# ebcdic platforms generates different Malformed UTF-8 warnings.";
  36         exit 0;
  37     }
  38 }
  39 use utf8 ;
  40 no warnings;    # Malformed is a fatal error, so gets output anyway.
  41 my $a = 'snøstorm' ;
  42 EXPECT
  43 Malformed UTF-8 character: \xf8\x73\x74\x6f\x72 (unexpected non-continuation byte 0x73, immediately after start byte 0xf8; need 5 bytes, got 1) at - line 9.
  44 Malformed UTF-8 character (fatal) at - line 9.
  45 ########
  46 use warnings 'utf8';
  47 my $d7ff  = uc(chr(0xD7FF));
  48 my $d800  = uc(chr(0xD800));
  49 my $dfff  = uc(chr(0xDFFF));
  50 my $e000  = uc(chr(0xE000));
  51 my $feff  = uc(chr(0xFEFF));
  52 my $fffd  = uc(chr(0xFFFD));
  53 my $fffe  = uc(chr(0xFFFE));
  54 my $ffff  = uc(chr(0xFFFF));
  55 my $hex4  = uc(chr(0x10000));
  56 my $hex5  = uc(chr(0x100000));
  57 my $maxm1 = uc(chr(0x10FFFE));
  58 my $max   = uc(chr(0x10FFFF));
  59 my $nonUnicode =  uc(chr(0x110000));
  60 no warnings 'utf8';
  61 my $d7ff  = uc(chr(0xD7FF));
  62 my $d800  = uc(chr(0xD800));
  63 my $dfff  = uc(chr(0xDFFF));
  64 my $e000  = uc(chr(0xE000));
  65 my $feff  = uc(chr(0xFEFF));
  66 my $fffd  = uc(chr(0xFFFD));
  67 my $fffe  = uc(chr(0xFFFE));
  68 my $ffff  = uc(chr(0xFFFF));
  69 my $hex4  = uc(chr(0x10000));
  70 my $hex5  = uc(chr(0x100000));
  71 my $maxm1 = uc(chr(0x10FFFE));
  72 my $max   = uc(chr(0x10FFFF));
  73 my $nonUnicode =  uc(chr(0x110000));
  74 EXPECT
  75 Operation "uc" returns its argument for UTF-16 surrogate U+D800 at - line 3.
  76 Operation "uc" returns its argument for UTF-16 surrogate U+DFFF at - line 4.
  77 Operation "uc" returns its argument for non-Unicode code point 0x110000 at - line 14.
  78 ########
  79 use warnings 'utf8';
  80 my $d800  = uc(chr(0xD800));
  81 my $nonUnicode =  uc(chr(0x110000));
  82 no warnings 'surrogate';
  83 my $d800  = uc(chr(0xD800));
  84 my $nonUnicode =  uc(chr(0x110000));
  85 EXPECT
  86 Operation "uc" returns its argument for UTF-16 surrogate U+D800 at - line 2.
  87 Operation "uc" returns its argument for non-Unicode code point 0x110000 at - line 3.
  88 Operation "uc" returns its argument for non-Unicode code point 0x110000 at - line 6.
  89 ########
  90 use warnings 'utf8';
  91 my $d800  = uc(chr(0xD800));
  92 my $nonUnicode =  uc(chr(0x110000));
  93 no warnings 'non_unicode';
  94 my $d800  = uc(chr(0xD800));
  95 my $nonUnicode =  uc(chr(0x110000));
  96 EXPECT
  97 Operation "uc" returns its argument for UTF-16 surrogate U+D800 at - line 2.
  98 Operation "uc" returns its argument for non-Unicode code point 0x110000 at - line 3.
  99 Operation "uc" returns its argument for UTF-16 surrogate U+D800 at - line 5.
 100 ########
 101 use warnings 'utf8';
 102 no warnings 'deprecated'; # This is above IV_MAX on 32 bit machines
 103 my $big_nonUnicode = uc(chr(0x8000_0000));
 104 no warnings 'non_unicode';
 105 my $big_nonUnicode = uc(chr(0x8000_0000));
 106 EXPECT
 107 Operation "uc" returns its argument for non-Unicode code point 0x80000000 at - line 3.
 108 ########
 109 use warnings 'utf8';
 110 my $d7ff  = lc pack("U", 0xD7FF);
 111 my $d800  = lc pack("U", 0xD800);
 112 my $dfff  = lc pack("U", 0xDFFF);
 113 my $e000  = lc pack("U", 0xE000);
 114 my $feff  = lc pack("U", 0xFEFF);
 115 my $fffd  = lc pack("U", 0xFFFD);
 116 my $fffe  = lc pack("U", 0xFFFE);
 117 my $ffff  = lc pack("U", 0xFFFF);
 118 my $hex4  = lc pack("U", 0x10000);
 119 my $hex5  = lc pack("U", 0x100000);
 120 my $maxm1 = lc pack("U", 0x10FFFE);
 121 my $max   = lc pack("U", 0x10FFFF);
 122 my $nonUnicode =  lc(pack("U", 0x110000));
 123 no warnings 'utf8';
 124 my $d7ff  = lc pack("U", 0xD7FF);
 125 my $d800  = lc pack("U", 0xD800);
 126 my $dfff  = lc pack("U", 0xDFFF);
 127 my $e000  = lc pack("U", 0xE000);
 128 my $feff  = lc pack("U", 0xFEFF);
 129 my $fffd  = lc pack("U", 0xFFFD);
 130 my $fffe  = lc pack("U", 0xFFFE);
 131 my $ffff  = lc pack("U", 0xFFFF);
 132 my $hex4  = lc pack("U", 0x10000);
 133 my $hex5  = lc pack("U", 0x100000);
 134 my $maxm1 = lc pack("U", 0x10FFFE);
 135 my $max   = lc pack("U", 0x10FFFF);
 136 my $nonUnicode =  lc(pack("U", 0x110000));
 137 EXPECT
 138 Operation "lc" returns its argument for UTF-16 surrogate U+D800 at - line 3.
 139 Operation "lc" returns its argument for UTF-16 surrogate U+DFFF at - line 4.
 140 Operation "lc" returns its argument for non-Unicode code point 0x110000 at - line 14.
 141 ########
 142 use warnings 'utf8';
 143 my $d7ff  = ucfirst "\x{D7FF}";
 144 my $d800  = ucfirst "\x{D800}";
 145 my $dfff  = ucfirst "\x{DFFF}";
 146 my $e000  = ucfirst "\x{E000}";
 147 my $feff  = ucfirst "\x{FEFF}";
 148 my $fffd  = ucfirst "\x{FFFD}";
 149 my $fffe  = ucfirst "\x{FFFE}";
 150 my $ffff  = ucfirst "\x{FFFF}";
 151 my $hex4  = ucfirst "\x{10000}";
 152 my $hex5  = ucfirst "\x{100000}";
 153 my $maxm1 = ucfirst "\x{10FFFE}";
 154 my $max   = ucfirst "\x{10FFFF}";
 155 my $nonUnicode =  ucfirst "\x{110000}";
 156 no warnings 'utf8';
 157 my $d7ff  = ucfirst "\x{D7FF}";
 158 my $d800  = ucfirst "\x{D800}";
 159 my $dfff  = ucfirst "\x{DFFF}";
 160 my $e000  = ucfirst "\x{E000}";
 161 my $feff  = ucfirst "\x{FEFF}";
 162 my $fffd  = ucfirst "\x{FFFD}";
 163 my $fffe  = ucfirst "\x{FFFE}";
 164 my $ffff  = ucfirst "\x{FFFF}";
 165 my $hex4  = ucfirst "\x{10000}";
 166 my $hex5  = ucfirst "\x{100000}";
 167 my $maxm1 = ucfirst "\x{10FFFE}";
 168 my $max   = ucfirst "\x{10FFFF}";
 169 my $nonUnicode =  ucfirst "\x{110000}";
 170 EXPECT
 171 Operation "ucfirst" returns its argument for UTF-16 surrogate U+D800 at - line 3.
 172 Operation "ucfirst" returns its argument for UTF-16 surrogate U+DFFF at - line 4.
 173 Operation "ucfirst" returns its argument for non-Unicode code point 0x110000 at - line 14.
 174 ########
 175 # NAME Matching \p{} against above-Unicode
 176 use warnings 'utf8';
 177 chr(0xD7FF) =~ /\p{Any}/;
 178 chr(0xD800) =~ /\p{Any}/;
 179 chr(0xDFFF) =~ /\p{Any}/;
 180 chr(0xE000) =~ /\p{Any}/;
 181 chr(0xFEFF) =~ /\p{Any}/;
 182 chr(0xFFFD) =~ /\p{Any}/;
 183 chr(0xFFFE) =~ /\p{Any}/;
 184 chr(0xFFFF) =~ /\p{Any}/;
 185 chr(0x10000) =~ /\p{Any}/;
 186 chr(0x100000) =~ /\p{Any}/;
 187 chr(0x10FFFE) =~ /\p{Any}/;
 188 chr(0x10FFFF) =~ /\p{Any}/;
 189 chr(0x110000) =~ /[\p{Any}]/;
 190 chr(0x110001) =~ /[\w\p{Any}]/;
 191 chr(0x10FFFF) =~ /\p{All}/;
 192 chr(0x110002) =~ /[\w\p{All}]/;
 193 chr(0x110003) =~ /[\p{XPosixWord}]/;
 194 chr(0x110004) =~ /[\P{XPosixWord}]/;
 195 chr(0x110005) =~ /^[\p{Unassigned}]/;
 196 chr(0x110006) =~ /^[\P{Unassigned}]/;
 197 # Only Unicode properties give non-Unicode warnings, and only those properties
 198 # which do match above Unicode; and not when something else in the class
 199 # matches above Unicode.  Below we test three ways where something outside the
 200 # property may match non-Unicode: a code point above it, a class \S that we
 201 # know at compile time doesn't, and a class \W whose values aren't (at the time
 202 # of this writing) specified at compile time, but which wouldn't match
 203 chr(0x110050) =~ /\w/;
 204 chr(0x110051) =~ /\W/;
 205 chr(0x110052) =~ /\d/;
 206 chr(0x110053) =~ /\D/;
 207 chr(0x110054) =~ /\s/;
 208 chr(0x110055) =~ /\S/;
 209 chr(0x110056) =~ /[[:word:]]/;
 210 chr(0x110057) =~ /[[:^word:]]/;
 211 chr(0x110058) =~ /[[:alnum:]]/;
 212 chr(0x110059) =~ /[[:^alnum:]]/;
 213 chr(0x11005A) =~ /[[:space:]]/;
 214 chr(0x11005B) =~ /[[:^space:]]/;
 215 chr(0x11005C) =~ /[[:digit:]]/;
 216 chr(0x11005D) =~ /[[:^digit:]]/;
 217 chr(0x11005E) =~ /[[:alpha:]]/;
 218 chr(0x11005F) =~ /[[:^alpha:]]/;
 219 chr(0x110060) =~ /[[:ascii:]]/;
 220 chr(0x110061) =~ /[[:^ascii:]]/;
 221 chr(0x110062) =~ /[[:cntrl:]]/;
 222 chr(0x110063) =~ /[[:^cntrl:]]/;
 223 chr(0x110064) =~ /[[:graph:]]/;
 224 chr(0x110065) =~ /[[:^graph:]]/;
 225 chr(0x110066) =~ /[[:lower:]]/;
 226 chr(0x110067) =~ /[[:^lower:]]/;
 227 chr(0x110068) =~ /[[:print:]]/;
 228 chr(0x110069) =~ /[[:^print:]]/;
 229 chr(0x11006A) =~ /[[:punct:]]/;
 230 chr(0x11006B) =~ /[[:^punct:]]/;
 231 chr(0x11006C) =~ /[[:upper:]]/;
 232 chr(0x11006D) =~ /[[:^upper:]]/;
 233 chr(0x11006E) =~ /[[:xdigit:]]/;
 234 chr(0x11006F) =~ /[[:^xdigit:]]/;
 235 chr(0x110070) =~ /[[:blank:]]/;
 236 chr(0x110071) =~ /[[:^blank:]]/;
 237 chr(0x111010) =~ /[\W\p{Unassigned}]/;
 238 chr(0x111011) =~ /[\W\P{Unassigned}]/;
 239 chr(0x112010) =~ /[\S\p{Unassigned}]/;
 240 chr(0x112011) =~ /[\S\P{Unassigned}]/;
 241 chr(0x113010) =~ /[\x{110000}\p{Unassigned}]/;
 242 chr(0x113011) =~ /[\x{110000}\P{Unassigned}]/;
 243 no warnings 'utf8';
 244 chr(0xD7FF) =~ /\p{Any}/;
 245 chr(0xD800) =~ /\p{Any}/;
 246 chr(0xDFFF) =~ /\p{Any}/;
 247 chr(0xE000) =~ /\p{Any}/;
 248 chr(0xFEFF) =~ /\p{Any}/;
 249 chr(0xFFFD) =~ /\p{Any}/;
 250 chr(0xFFFE) =~ /\p{Any}/;
 251 chr(0xFFFF) =~ /\p{Any}/;
 252 chr(0x10000) =~ /\p{Any}/;
 253 chr(0x100000) =~ /\p{Any}/;
 254 chr(0x10FFFE) =~ /\p{Any}/;
 255 chr(0x10FFFF) =~ /\p{Any}/;
 256 chr(0x110000) =~ /[\p{Any}]/;
 257 chr(0x110001) =~ /[\w\p{Any}]/;
 258 chr(0x10FFFF) =~ /\p{All}/;
 259 chr(0x110002) =~ /[\w\p{All}]/;
 260 chr(0x110003) =~ /[\p{XPosixWord}]/;
 261 chr(0x110004) =~ /[\P{XPosixWord}]/;
 262 chr(0x110005) =~ /^[\p{Unassigned}]/;
 263 chr(0x110006) =~ /^[\P{Unassigned}]/;
 264 chr(0x110050) =~ /\w/;
 265 chr(0x110051) =~ /\W/;
 266 chr(0x110052) =~ /\d/;
 267 chr(0x110053) =~ /\D/;
 268 chr(0x110054) =~ /\s/;
 269 chr(0x110055) =~ /\S/;
 270 chr(0x110056) =~ /[[:word:]]/;
 271 chr(0x110057) =~ /[[:^word:]]/;
 272 chr(0x110058) =~ /[[:alnum:]]/;
 273 chr(0x110059) =~ /[[:^alnum:]]/;
 274 chr(0x11005A) =~ /[[:space:]]/;
 275 chr(0x11005B) =~ /[[:^space:]]/;
 276 chr(0x11005C) =~ /[[:digit:]]/;
 277 chr(0x11005D) =~ /[[:^digit:]]/;
 278 chr(0x11005E) =~ /[[:alpha:]]/;
 279 chr(0x11005F) =~ /[[:^alpha:]]/;
 280 chr(0x110060) =~ /[[:ascii:]]/;
 281 chr(0x110061) =~ /[[:^ascii:]]/;
 282 chr(0x110062) =~ /[[:cntrl:]]/;
 283 chr(0x110063) =~ /[[:^cntrl:]]/;
 284 chr(0x110064) =~ /[[:graph:]]/;
 285 chr(0x110065) =~ /[[:^graph:]]/;
 286 chr(0x110066) =~ /[[:lower:]]/;
 287 chr(0x110067) =~ /[[:^lower:]]/;
 288 chr(0x110068) =~ /[[:print:]]/;
 289 chr(0x110069) =~ /[[:^print:]]/;
 290 chr(0x11006A) =~ /[[:punct:]]/;
 291 chr(0x11006B) =~ /[[:^punct:]]/;
 292 chr(0x11006C) =~ /[[:upper:]]/;
 293 chr(0x11006D) =~ /[[:^upper:]]/;
 294 chr(0x11006E) =~ /[[:xdigit:]]/;
 295 chr(0x11006F) =~ /[[:^xdigit:]]/;
 296 chr(0x110070) =~ /[[:blank:]]/;
 297 chr(0x110071) =~ /[[:^blank:]]/;
 298 chr(0x111010) =~ /[\W\p{Unassigned}]/;
 299 chr(0x111011) =~ /[\W\P{Unassigned}]/;
 300 chr(0x112010) =~ /[\S\p{Unassigned}]/;
 301 chr(0x112011) =~ /[\S\P{Unassigned}]/;
 302 chr(0x113010) =~ /[\x{110000}\p{Unassigned}]/;
 303 chr(0x113011) =~ /[\x{110000}\P{Unassigned}]/;
 304 EXPECT
 305 Matched non-Unicode code point 0x110005 against Unicode property; may not be portable at - line 20.
 306 Matched non-Unicode code point 0x110006 against Unicode property; may not be portable at - line 21.
 307 ########
 308 # NAME Matching Unicode property against above-Unicode code point outputs a warning even if optimizer rejects the match (in synthetic start class)
 309 # Now have to make FATAL to guarantee being output
 310 use warnings FATAL => 'non_unicode';
 311 "\x{110000}" =~ /b?\p{Space}/;
 312 EXPECT
 313 Matched non-Unicode code point 0x110000 against Unicode property; may not be portable at - line 3.
 314 ########
 315 # NAME Matching POSIX class property against above-Unicode code point doesn't output a warning
 316 use warnings 'non_unicode';
 317 use warnings FATAL => 'non_unicode';
 318 "\x{110000}" =~ /b?[[:space:]]/;
 319 EXPECT
 320 ########
 321 use warnings 'utf8';
 322 chr(0x110000) =~ /\p{Any}/;
 323 ########
 324 # NAME utf8, non_unicode warnings categories work on Matched non-Unicode code point warning
 325 use warnings qw(utf8 non_unicode);
 326 chr(0x110000) =~ /^\p{Unassigned}/;
 327 no warnings 'non_unicode';
 328 chr(0x110001) =~ /\p{Unassigned}/;
 329 use warnings 'non_unicode';
 330 no warnings 'utf8';
 331 chr(0x110002) =~ /\p{Unassigned}/;
 332 EXPECT
 333 Matched non-Unicode code point 0x110000 against Unicode property; may not be portable at - line 2.
 334 ########
 335 # NAME optimizable regnode should still give non_unicode warnings when fatalized
 336 use warnings 'utf8';
 337 use warnings FATAL => 'non_unicode';
 338 chr(0x110000) =~ /\p{lb=cr}/;
 339 EXPECT
 340 Matched non-Unicode code point 0x110000 against Unicode property; may not be portable at - line 3.
 341 ########
 342 # NAME optimizable regnode should not give non_unicode warnings when warnings are off
 343 no warnings 'non_unicode';
 344 chr(0x110000) =~ /\p{lb=cr}/;
 345 EXPECT
 346 ########
 347 # NAME 'All' matches above-Unicode without any warning
 348 use warnings qw(utf8 non_unicode);
 349 chr(0x110000) =~ /\p{All}/;
 350 EXPECT
 351 ########
 352 require "../test.pl";
 353 use warnings 'utf8';
 354 sub Is_Super { return '!utf8::Any' }
 355 # The extra char is to avoid an optimization that avoids the problem when the
 356 # property is the only non-latin1 char in a class
 357 print "\x{1100000}" =~ /^[\p{Is_Super}\x{100}]$/, "\n";
 358 EXPECT
 359 1
 360 ########
 361 require "../test.pl";
 362 use warnings 'utf8';
 363 my $file = tempfile();
 364 open(my $fh, "+>:utf8", $file);
 365 print $fh "\x{D7FF}", "\n";
 366 print $fh "\x{D800}", "\n";
 367 print $fh "\x{D900}", "\n";
 368 print $fh "\x{DA00}", "\n";
 369 print $fh "\x{DB00}", "\n";
 370 print $fh "\x{DC00}", "\n";
 371 print $fh "\x{DD00}", "\n";
 372 print $fh "\x{DE00}", "\n";
 373 print $fh "\x{DF00}", "\n";
 374 print $fh "\x{DFFF}", "\n";
 375 print $fh "\x{E000}", "\n";
 376 print $fh "\x{FDCF}", "\n";
 377 print $fh "\x{FDD0}", "\n";
 378 print $fh "\x{FDD1}", "\n";
 379 print $fh "\x{FDEF}", "\n";
 380 print $fh "\x{FDF0}", "\n";
 381 print $fh "\x{FDFE}", "\n";
 382 print $fh "\x{FDFF}", "\n";
 383 print $fh "\x{FE00}", "\n";
 384 print $fh "\x{FEFF}", "\n";
 385 print $fh "\x{FFFD}", "\n";
 386 print $fh "\x{FFFE}", "\n";
 387 print $fh "\x{FFFF}", "\n";
 388 print $fh "\x{10000}", "\n";
 389 print $fh "\x{1FFFD}", "\n";
 390 print $fh "\x{1FFFE}", "\n";
 391 print $fh "\x{1FFFF}", "\n";
 392 print $fh "\x{20000}", "\n";
 393 print $fh "\x{2FFFD}", "\n";
 394 print $fh "\x{2FFFE}", "\n";
 395 print $fh "\x{2FFFF}", "\n";
 396 print $fh "\x{30000}", "\n";
 397 print $fh "\x{3FFFD}", "\n";
 398 print $fh "\x{3FFFE}", "\n";
 399 print $fh "\x{3FFFF}", "\n";
 400 print $fh "\x{40000}", "\n";
 401 print $fh "\x{4FFFD}", "\n";
 402 print $fh "\x{4FFFE}", "\n";
 403 print $fh "\x{4FFFF}", "\n";
 404 print $fh "\x{50000}", "\n";
 405 print $fh "\x{5FFFD}", "\n";
 406 print $fh "\x{5FFFE}", "\n";
 407 print $fh "\x{5FFFF}", "\n";
 408 print $fh "\x{60000}", "\n";
 409 print $fh "\x{6FFFD}", "\n";
 410 print $fh "\x{6FFFE}", "\n";
 411 print $fh "\x{6FFFF}", "\n";
 412 print $fh "\x{70000}", "\n";
 413 print $fh "\x{7FFFD}", "\n";
 414 print $fh "\x{7FFFE}", "\n";
 415 print $fh "\x{7FFFF}", "\n";
 416 print $fh "\x{80000}", "\n";
 417 print $fh "\x{8FFFD}", "\n";
 418 print $fh "\x{8FFFE}", "\n";
 419 print $fh "\x{8FFFF}", "\n";
 420 print $fh "\x{90000}", "\n";
 421 print $fh "\x{9FFFD}", "\n";
 422 print $fh "\x{9FFFE}", "\n";
 423 print $fh "\x{9FFFF}", "\n";
 424 print $fh "\x{A0000}", "\n";
 425 print $fh "\x{AFFFD}", "\n";
 426 print $fh "\x{AFFFE}", "\n";
 427 print $fh "\x{AFFFF}", "\n";
 428 print $fh "\x{B0000}", "\n";
 429 print $fh "\x{BFFFD}", "\n";
 430 print $fh "\x{BFFFE}", "\n";
 431 print $fh "\x{BFFFF}", "\n";
 432 print $fh "\x{C0000}", "\n";
 433 print $fh "\x{CFFFD}", "\n";
 434 print $fh "\x{CFFFE}", "\n";
 435 print $fh "\x{CFFFF}", "\n";
 436 print $fh "\x{D0000}", "\n";
 437 print $fh "\x{DFFFD}", "\n";
 438 print $fh "\x{DFFFE}", "\n";
 439 print $fh "\x{DFFFF}", "\n";
 440 print $fh "\x{E0000}", "\n";
 441 print $fh "\x{EFFFD}", "\n";
 442 print $fh "\x{EFFFE}", "\n";
 443 print $fh "\x{EFFFF}", "\n";
 444 print $fh "\x{F0000}", "\n";
 445 print $fh "\x{FFFFD}", "\n";
 446 print $fh "\x{FFFFE}", "\n";
 447 print $fh "\x{FFFFF}", "\n";
 448 print $fh "\x{100000}", "\n";
 449 print $fh "\x{10FFFD}", "\n";
 450 print $fh "\x{10FFFE}", "\n";
 451 print $fh "\x{10FFFF}", "\n";
 452 print $fh "\x{110000}", "\n";
 453 print $fh "\x{11FFFD}", "\n";
 454 print $fh "\x{11FFFE}", "\n";
 455 print $fh "\x{11FFFF}", "\n";
 456 print $fh "\x{120000}", "\n";
 457 close $fh;
 458 EXPECT
 459 Unicode surrogate U+D800 is illegal in UTF-8 at - line 6.
 460 Unicode surrogate U+D900 is illegal in UTF-8 at - line 7.
 461 Unicode surrogate U+DA00 is illegal in UTF-8 at - line 8.
 462 Unicode surrogate U+DB00 is illegal in UTF-8 at - line 9.
 463 Unicode surrogate U+DC00 is illegal in UTF-8 at - line 10.
 464 Unicode surrogate U+DD00 is illegal in UTF-8 at - line 11.
 465 Unicode surrogate U+DE00 is illegal in UTF-8 at - line 12.
 466 Unicode surrogate U+DF00 is illegal in UTF-8 at - line 13.
 467 Unicode surrogate U+DFFF is illegal in UTF-8 at - line 14.
 468 Unicode non-character U+FDD0 is not recommended for open interchange in print at - line 17.
 469 Unicode non-character U+FDD1 is not recommended for open interchange in print at - line 18.
 470 Unicode non-character U+FDEF is not recommended for open interchange in print at - line 19.
 471 Unicode non-character U+FFFE is not recommended for open interchange in print at - line 26.
 472 Unicode non-character U+FFFF is not recommended for open interchange in print at - line 27.
 473 Unicode non-character U+1FFFE is not recommended for open interchange in print at - line 30.
 474 Unicode non-character U+1FFFF is not recommended for open interchange in print at - line 31.
 475 Unicode non-character U+2FFFE is not recommended for open interchange in print at - line 34.
 476 Unicode non-character U+2FFFF is not recommended for open interchange in print at - line 35.
 477 Unicode non-character U+3FFFE is not recommended for open interchange in print at - line 38.
 478 Unicode non-character U+3FFFF is not recommended for open interchange in print at - line 39.
 479 Unicode non-character U+4FFFE is not recommended for open interchange in print at - line 42.
 480 Unicode non-character U+4FFFF is not recommended for open interchange in print at - line 43.
 481 Unicode non-character U+5FFFE is not recommended for open interchange in print at - line 46.
 482 Unicode non-character U+5FFFF is not recommended for open interchange in print at - line 47.
 483 Unicode non-character U+6FFFE is not recommended for open interchange in print at - line 50.
 484 Unicode non-character U+6FFFF is not recommended for open interchange in print at - line 51.
 485 Unicode non-character U+7FFFE is not recommended for open interchange in print at - line 54.
 486 Unicode non-character U+7FFFF is not recommended for open interchange in print at - line 55.
 487 Unicode non-character U+8FFFE is not recommended for open interchange in print at - line 58.
 488 Unicode non-character U+8FFFF is not recommended for open interchange in print at - line 59.
 489 Unicode non-character U+9FFFE is not recommended for open interchange in print at - line 62.
 490 Unicode non-character U+9FFFF is not recommended for open interchange in print at - line 63.
 491 Unicode non-character U+AFFFE is not recommended for open interchange in print at - line 66.
 492 Unicode non-character U+AFFFF is not recommended for open interchange in print at - line 67.
 493 Unicode non-character U+BFFFE is not recommended for open interchange in print at - line 70.
 494 Unicode non-character U+BFFFF is not recommended for open interchange in print at - line 71.
 495 Unicode non-character U+CFFFE is not recommended for open interchange in print at - line 74.
 496 Unicode non-character U+CFFFF is not recommended for open interchange in print at - line 75.
 497 Unicode non-character U+DFFFE is not recommended for open interchange in print at - line 78.
 498 Unicode non-character U+DFFFF is not recommended for open interchange in print at - line 79.
 499 Unicode non-character U+EFFFE is not recommended for open interchange in print at - line 82.
 500 Unicode non-character U+EFFFF is not recommended for open interchange in print at - line 83.
 501 Unicode non-character U+FFFFE is not recommended for open interchange in print at - line 86.
 502 Unicode non-character U+FFFFF is not recommended for open interchange in print at - line 87.
 503 Unicode non-character U+10FFFE is not recommended for open interchange in print at - line 90.
 504 Unicode non-character U+10FFFF is not recommended for open interchange in print at - line 91.
 505 Code point 0x110000 is not Unicode, may not be portable in print at - line 92.
 506 Code point 0x11FFFD is not Unicode, may not be portable in print at - line 93.
 507 Code point 0x11FFFE is not Unicode, may not be portable in print at - line 94.
 508 Code point 0x11FFFF is not Unicode, may not be portable in print at - line 95.
 509 Code point 0x120000 is not Unicode, may not be portable in print at - line 96.
 510 ########
 511 require "../test.pl";
 512 use warnings 'utf8';
 513 my $file = tempfile();
 514 open(my $fh, "+>:utf8", $file);
 515 print $fh "\x{D800}", "\n";
 516 print $fh "\x{FFFF}", "\n";
 517 print $fh "\x{110000}", "\n";
 518 close $fh;
 519 EXPECT
 520 Unicode surrogate U+D800 is illegal in UTF-8 at - line 5.
 521 Unicode non-character U+FFFF is not recommended for open interchange in print at - line 6.
 522 Code point 0x110000 is not Unicode, may not be portable in print at - line 7.
 523 ########
 524 require "../test.pl";
 525 use warnings 'utf8';
 526 no warnings 'surrogate';
 527 my $file = tempfile();
 528 open(my $fh, "+>:utf8", $file);
 529 print $fh "\x{D800}", "\n";
 530 print $fh "\x{FFFF}", "\n";
 531 print $fh "\x{110000}", "\n";
 532 close $fh;
 533 EXPECT
 534 Unicode non-character U+FFFF is not recommended for open interchange in print at - line 7.
 535 Code point 0x110000 is not Unicode, may not be portable in print at - line 8.
 536 ########
 537 require "../test.pl";
 538 use warnings 'utf8';
 539 no warnings 'nonchar';
 540 my $file = tempfile();
 541 open(my $fh, "+>:utf8", $file);
 542 print $fh "\x{D800}", "\n";
 543 print $fh "\x{FFFF}", "\n";
 544 print $fh "\x{110000}", "\n";
 545 close $fh;
 546 EXPECT
 547 Unicode surrogate U+D800 is illegal in UTF-8 at - line 6.
 548 Code point 0x110000 is not Unicode, may not be portable in print at - line 8.
 549 ########
 550 require "../test.pl";
 551 use warnings 'utf8';
 552 no warnings 'non_unicode';
 553 my $file = tempfile();
 554 open(my $fh, "+>:utf8", $file);
 555 print $fh "\x{D800}", "\n";
 556 print $fh "\x{FFFF}", "\n";
 557 print $fh "\x{110000}", "\n";
 558 close $fh;
 559 EXPECT
 560 Unicode surrogate U+D800 is illegal in UTF-8 at - line 6.
 561 Unicode non-character U+FFFF is not recommended for open interchange in print at - line 7.
 562 ########
 563 # NAME C<use warnings "nonchar"> works in isolation
 564 require "../test.pl";
 565 use warnings 'nonchar';
 566 my $file = tempfile();
 567 open(my $fh, "+>:utf8", $file);
 568 print $fh "\x{FFFF}", "\n";
 569 close $fh;
 570 EXPECT
 571 Unicode non-character U+FFFF is not recommended for open interchange in print at - line 5.
 572 ########
 573 # NAME C<use warnings "surrogate"> works in isolation
 574 require "../test.pl";
 575 use warnings 'surrogate';
 576 my $file = tempfile();
 577 open(my $fh, "+>:utf8", $file);
 578 print $fh "\x{D800}", "\n";
 579 close $fh;
 580 EXPECT
 581 Unicode surrogate U+D800 is illegal in UTF-8 at - line 5.
 582 ########
 583 # NAME C<use warnings "non_unicode"> works in isolation
 584 require "../test.pl";
 585 use warnings 'non_unicode';
 586 my $file = tempfile();
 587 open(my $fh, "+>:utf8", $file);
 588 print $fh "\x{110000}", "\n";
 589 close $fh;
 590 EXPECT
 591 Code point 0x110000 is not Unicode, may not be portable in print at - line 5.
 592 ########
 593 require "../test.pl";
 594 no warnings 'utf8';
 595 my $file = tempfile();
 596 open(my $fh, "+>:utf8", $file);
 597 print $fh "\x{D7FF}", "\n";
 598 print $fh "\x{D800}", "\n";
 599 print $fh "\x{DFFF}", "\n";
 600 print $fh "\x{E000}", "\n";
 601 print $fh "\x{FDCF}", "\n";
 602 print $fh "\x{FDD0}", "\n";
 603 print $fh "\x{FDEF}", "\n";
 604 print $fh "\x{FDF0}", "\n";
 605 print $fh "\x{FEFF}", "\n";
 606 print $fh "\x{FFFD}", "\n";
 607 print $fh "\x{FFFE}", "\n";
 608 print $fh "\x{FFFF}", "\n";
 609 print $fh "\x{10000}", "\n";
 610 print $fh "\x{1FFFE}", "\n";
 611 print $fh "\x{1FFFF}", "\n";
 612 print $fh "\x{2FFFE}", "\n";
 613 print $fh "\x{2FFFF}", "\n";
 614 print $fh "\x{3FFFE}", "\n";
 615 print $fh "\x{3FFFF}", "\n";
 616 print $fh "\x{4FFFE}", "\n";
 617 print $fh "\x{4FFFF}", "\n";
 618 print $fh "\x{5FFFE}", "\n";
 619 print $fh "\x{5FFFF}", "\n";
 620 print $fh "\x{6FFFE}", "\n";
 621 print $fh "\x{6FFFF}", "\n";
 622 print $fh "\x{7FFFE}", "\n";
 623 print $fh "\x{7FFFF}", "\n";
 624 print $fh "\x{8FFFE}", "\n";
 625 print $fh "\x{8FFFF}", "\n";
 626 print $fh "\x{9FFFE}", "\n";
 627 print $fh "\x{9FFFF}", "\n";
 628 print $fh "\x{AFFFE}", "\n";
 629 print $fh "\x{AFFFF}", "\n";
 630 print $fh "\x{BFFFE}", "\n";
 631 print $fh "\x{BFFFF}", "\n";
 632 print $fh "\x{CFFFE}", "\n";
 633 print $fh "\x{CFFFF}", "\n";
 634 print $fh "\x{DFFFE}", "\n";
 635 print $fh "\x{DFFFF}", "\n";
 636 print $fh "\x{EFFFE}", "\n";
 637 print $fh "\x{EFFFF}", "\n";
 638 print $fh "\x{FFFFE}", "\n";
 639 print $fh "\x{FFFFF}", "\n";
 640 print $fh "\x{100000}", "\n";
 641 print $fh "\x{10FFFE}", "\n";
 642 print $fh "\x{10FFFF}", "\n";
 643 print $fh "\x{110000}", "\n";
 644 close $fh;
 645 EXPECT
 646 ########
 647 # NAME Case change crosses 255/256 under non-UTF8 locale
 648 require '../loc_tools.pl';
 649 unless (locales_enabled('LC_CTYPE')) {
 650     print("SKIPPED\n# locales not available\n"),exit;
 651 }
 652 eval { require POSIX; POSIX->import("locale_h") };
 653 if ($@) {
 654     print("SKIPPED\n# no POSIX\n"),exit;
 655 }
 656 use warnings 'locale';
 657 use feature 'fc';
 658 use locale;
 659 setlocale(&POSIX::LC_CTYPE, "C");
 660 my $a;
 661 $a = lc("\x{178}");
 662 $a = fc("\x{1E9E}");
 663 $a = fc("\x{FB05}");
 664 $a = uc("\x{FB00}");
 665 $a = ucfirst("\x{149}");
 666 $a = lcfirst("\x{178}");
 667 no warnings 'locale';
 668 $a = lc("\x{178}");
 669 $a = fc("\x{1E9E}");
 670 $a = fc("\x{FB05}");
 671 $a = uc("\x{FB00}");
 672 $a = ucfirst("\x{149}");
 673 $a = lcfirst("\x{178}");
 674 EXPECT
 675 Can't do lc("\x{178}") on non-UTF-8 locale; resolved to "\x{178}". at - line 14.
 676 Can't do fc("\x{1E9E}") on non-UTF-8 locale; resolved to "\x{17F}\x{17F}". at - line 15.
 677 Can't do fc("\x{FB05}") on non-UTF-8 locale; resolved to "\x{FB06}". at - line 16.
 678 Can't do uc("\x{FB00}") on non-UTF-8 locale; resolved to "\x{FB00}". at - line 17.
 679 Can't do ucfirst("\x{149}") on non-UTF-8 locale; resolved to "\x{149}". at - line 18.
 680 Can't do lcfirst("\x{178}") on non-UTF-8 locale; resolved to "\x{178}". at - line 19.
 681 ########
 682 # NAME Wide character in non-UTF-8 locale
 683 require '../loc_tools.pl';
 684 unless (locales_enabled('LC_CTYPE')) {
 685     print("SKIPPED\n# locales not available\n"),exit;
 686 }
 687 eval { require POSIX; POSIX->import("locale_h") };
 688 if ($@) {
 689     print("SKIPPED\n# no POSIX\n"),exit;
 690 }
 691 use warnings 'locale';
 692 use feature 'fc';
 693 use locale;
 694 setlocale(&POSIX::LC_CTYPE, "C");
 695 my $a;
 696 $a = lc("\x{100}");
 697 $a = lcfirst("\x{101}");
 698 $a = fc("\x{102}");
 699 $a = uc("\x{103}");
 700 $a = ucfirst("\x{104}");
 701 no warnings 'locale';
 702 $a = lc("\x{100}");
 703 $a = lcfirst("\x{101}");
 704 $a = fc("\x{102}");
 705 $a = uc("\x{103}");
 706 $a = ucfirst("\x{104}");
 707 EXPECT
 708 Wide character (U+100) in lc at - line 14.
 709 Wide character (U+101) in lcfirst at - line 15.
 710 Wide character (U+102) in fc at - line 16.
 711 Wide character (U+103) in uc at - line 17.
 712 Wide character (U+104) in ucfirst at - line 18.
 713 ########
 714 # NAME Wide character in UTF-8 locale
 715 require '../loc_tools.pl';
 716 unless (locales_enabled('LC_CTYPE')) {
 717     print("SKIPPED\n# locales not available\n"),exit;
 718 }
 719 eval { require POSIX; POSIX->import("locale_h") };
 720 if ($@) {
 721     print("SKIPPED\n# no POSIX\n"),exit;
 722 }
 723 my @utf8_locales = find_utf8_ctype_locale();
 724 unless (@utf8_locales) {
 725     print("SKIPPED\n# no UTF-8 locales\n"),exit;
 726 }
 727 use warnings 'locale';
 728 use feature 'fc';
 729 use locale;
 730 setlocale(&POSIX::LC_CTYPE, $utf8_locales[0]);
 731 my $a;
 732 $a = lc("\x{100}");
 733 $a = lcfirst("\x{101}");
 734 $a = fc("\x{102}");
 735 $a = uc("\x{103}");
 736 $a = ucfirst("\x{104}");
 737 EXPECT
 738 ########
 739 # NAME Deprecation of too-large code points
 740 require "../test.pl";
 741 use warnings 'non_unicode';
 742 my $max_cp = ~0 >> 1;
 743 my $max_char = chr $max_cp;
 744 my $to_warn_cp = $max_cp + 1;
 745 my $to_warn_char = chr $to_warn_cp;
 746 $max_char =~ /[\x{110000}\P{Unassigned}]/;
 747 $to_warn_char =~ /[\x{110000}\P{Unassigned}]/;
 748 my $temp = qr/$max_char/;
 749 $temp = qr/$to_warn_char/;
 750 $temp = uc($max_char);
 751 $temp = uc($to_warn_char);
 752 my $file = tempfile();
 753 open(my $fh, "+>:utf8", $file);
 754 print $fh $max_char, "\n";
 755 print $fh $to_warn_char, "\n";
 756 close $fh;
 757 EXPECT
 758 OPTION regex
 759 Use of code point 0x80+ is deprecated; the permissible max is 0x7F+\. This will be fatal in Perl 5\.28 at - line \d+.
 760 Use of code point 0x80+ is deprecated; the permissible max is 0x7F+\. This will be fatal in Perl 5\.28 in pattern match \(m//\) at - line \d+.
 761 Use of code point 0x80+ is deprecated; the permissible max is 0x7F+\. This will be fatal in Perl 5\.28 in regexp compilation at - line \d+.
 762 Use of code point 0x80+ is deprecated; the permissible max is 0x7F+\. This will be fatal in Perl 5\.28 in regexp compilation at - line \d+.
 763 Use of code point 0x80+ is deprecated; the permissible max is 0x7F+\. This will be fatal in Perl 5\.28 at - line \d+.
 764 Use of code point 0x80+ is deprecated; the permissible max is 0x7F+\. This will be fatal in Perl 5\.28 in regexp compilation at - line \d+.
 765 Operation "uc" returns its argument for non-Unicode code point 0x7F+ at - line \d+.
 766 Use of code point 0x80+ is deprecated; the permissible max is 0x7F+\. This will be fatal in Perl 5\.28 in uc at - line \d+.
 767 Use of code point 0x80+ is deprecated; the permissible max is 0x7F+\. This will be fatal in Perl 5\.28 at - line \d+.
 768 Operation "uc" returns its argument for non-Unicode code point 0x80+ at - line \d+.
 769 Code point 0x7F+ is not Unicode, may not be portable in print at - line \d+.
 770 Use of code point 0x80+ is deprecated; the permissible max is 0x7F+\. This will be fatal in Perl 5\.28 in print at - line \d+.
 771 ########
 772 # NAME  [perl #127262]
 773 BEGIN{
 774     if (ord('A') == 193) {
 775         print "SKIPPED\n# ebcdic platforms generates different Malformed UTF-8 warnings.";
 776         exit 0;
 777     }
 778     use Config;
 779     unless ($Double{double_style_ieee}) {
 780         print "SKIPPED\n# non-IEEE fp range.";
 781         exit 0;
 782     }
 783 {};$^H=eval'2**400'}Â
 784 EXPECT
 785 Malformed UTF-8 character: \xc2\x0a (unexpected non-continuation byte 0x0a, immediately after start byte 0xc2; need 2 bytes, got 1) at - line 11.