t/lib/warnings/utf8

   1
   2   utf8.c AOK
   3
   4      [utf8_to_uvchr_buf]
   5      Malformed UTF-8 character
   6         my $a = ord "\x80" ;
   7
   8      Malformed UTF-8 character
   9         my $a = ord "\xf080" ;
  10      <<<<<< this warning can't be easily triggered from perl anymore
  11
  12      [utf16_to_utf8]
  13      Malformed UTF-16 surrogate
  14      <<<<<< Add a test when something actually calls utf16_to_utf8
  15
  16 __END__
  17 # utf8.c [utf8_to_uvchr_buf] -W
  18 BEGIN {
  19     if (ord('A') == 193) {
  20         print "SKIPPED\n# ebcdic platforms do not generate Malformed UTF-8 warnings.";
  21         exit 0;
  22     }
  23 }
  24 use utf8 ;
  25 my $a = "snøstorm" ;
  26 {
  27     no warnings 'utf8' ;
  28     my $a = "snøstorm";
  29     use warnings 'utf8' ;
  30     my $a = "snøstorm";
  31 }
  32 EXPECT
  33 Malformed UTF-8 character (unexpected non-continuation byte 0x73, immediately after start byte 0xf8) at - line 9.
  34 Malformed UTF-8 character (unexpected non-continuation byte 0x73, immediately after start byte 0xf8) at - line 14.
  35 ########
  36 use warnings 'utf8';
  37 my $d7ff  = uc(chr(0xD7FF));
  38 my $d800  = uc(chr(0xD800));
  39 my $dfff  = uc(chr(0xDFFF));
  40 my $e000  = uc(chr(0xE000));
  41 my $feff  = uc(chr(0xFEFF));
  42 my $fffd  = uc(chr(0xFFFD));
  43 my $fffe  = uc(chr(0xFFFE));
  44 my $ffff  = uc(chr(0xFFFF));
  45 my $hex4  = uc(chr(0x10000));
  46 my $hex5  = uc(chr(0x100000));
  47 my $maxm1 = uc(chr(0x10FFFE));
  48 my $max   = uc(chr(0x10FFFF));
  49 my $nonUnicode =  uc(chr(0x110000));
  50 no warnings 'utf8';
  51 my $d7ff  = uc(chr(0xD7FF));
  52 my $d800  = uc(chr(0xD800));
  53 my $dfff  = uc(chr(0xDFFF));
  54 my $e000  = uc(chr(0xE000));
  55 my $feff  = uc(chr(0xFEFF));
  56 my $fffd  = uc(chr(0xFFFD));
  57 my $fffe  = uc(chr(0xFFFE));
  58 my $ffff  = uc(chr(0xFFFF));
  59 my $hex4  = uc(chr(0x10000));
  60 my $hex5  = uc(chr(0x100000));
  61 my $maxm1 = uc(chr(0x10FFFE));
  62 my $max   = uc(chr(0x10FFFF));
  63 my $nonUnicode =  uc(chr(0x110000));
  64 EXPECT
  65 Operation "uc" returns its argument for UTF-16 surrogate U+D800 at - line 3.
  66 Operation "uc" returns its argument for UTF-16 surrogate U+DFFF at - line 4.
  67 Operation "uc" returns its argument for non-Unicode code point 0x110000 at - line 14.
  68 ########
  69 use warnings 'utf8';
  70 my $d800  = uc(chr(0xD800));
  71 my $nonUnicode =  uc(chr(0x110000));
  72 no warnings 'surrogate';
  73 my $d800  = uc(chr(0xD800));
  74 my $nonUnicode =  uc(chr(0x110000));
  75 EXPECT
  76 Operation "uc" returns its argument for UTF-16 surrogate U+D800 at - line 2.
  77 Operation "uc" returns its argument for non-Unicode code point 0x110000 at - line 3.
  78 Operation "uc" returns its argument for non-Unicode code point 0x110000 at - line 6.
  79 ########
  80 use warnings 'utf8';
  81 my $d800  = uc(chr(0xD800));
  82 my $nonUnicode =  uc(chr(0x110000));
  83 my $big_nonUnicode = uc(chr(0x8000_0000));
  84 no warnings 'non_unicode';
  85 my $d800  = uc(chr(0xD800));
  86 my $nonUnicode =  uc(chr(0x110000));
  87 my $big_nonUnicode = uc(chr(0x8000_0000));
  88 EXPECT
  89 Operation "uc" returns its argument for UTF-16 surrogate U+D800 at - line 2.
  90 Operation "uc" returns its argument for non-Unicode code point 0x110000 at - line 3.
  91 Operation "uc" returns its argument for non-Unicode code point 0x80000000 at - line 4.
  92 Operation "uc" returns its argument for UTF-16 surrogate U+D800 at - line 6.
  93 ########
  94 use warnings 'utf8';
  95 my $d7ff  = lc pack("U", 0xD7FF);
  96 my $d800  = lc pack("U", 0xD800);
  97 my $dfff  = lc pack("U", 0xDFFF);
  98 my $e000  = lc pack("U", 0xE000);
  99 my $feff  = lc pack("U", 0xFEFF);
 100 my $fffd  = lc pack("U", 0xFFFD);
 101 my $fffe  = lc pack("U", 0xFFFE);
 102 my $ffff  = lc pack("U", 0xFFFF);
 103 my $hex4  = lc pack("U", 0x10000);
 104 my $hex5  = lc pack("U", 0x100000);
 105 my $maxm1 = lc pack("U", 0x10FFFE);
 106 my $max   = lc pack("U", 0x10FFFF);
 107 my $nonUnicode =  lc(pack("U", 0x110000));
 108 no warnings 'utf8';
 109 my $d7ff  = lc pack("U", 0xD7FF);
 110 my $d800  = lc pack("U", 0xD800);
 111 my $dfff  = lc pack("U", 0xDFFF);
 112 my $e000  = lc pack("U", 0xE000);
 113 my $feff  = lc pack("U", 0xFEFF);
 114 my $fffd  = lc pack("U", 0xFFFD);
 115 my $fffe  = lc pack("U", 0xFFFE);
 116 my $ffff  = lc pack("U", 0xFFFF);
 117 my $hex4  = lc pack("U", 0x10000);
 118 my $hex5  = lc pack("U", 0x100000);
 119 my $maxm1 = lc pack("U", 0x10FFFE);
 120 my $max   = lc pack("U", 0x10FFFF);
 121 my $nonUnicode =  lc(pack("U", 0x110000));
 122 EXPECT
 123 Operation "lc" returns its argument for UTF-16 surrogate U+D800 at - line 3.
 124 Operation "lc" returns its argument for UTF-16 surrogate U+DFFF at - line 4.
 125 Operation "lc" returns its argument for non-Unicode code point 0x110000 at - line 14.
 126 ########
 127 use warnings 'utf8';
 128 my $d7ff  = ucfirst "\x{D7FF}";
 129 my $d800  = ucfirst "\x{D800}";
 130 my $dfff  = ucfirst "\x{DFFF}";
 131 my $e000  = ucfirst "\x{E000}";
 132 my $feff  = ucfirst "\x{FEFF}";
 133 my $fffd  = ucfirst "\x{FFFD}";
 134 my $fffe  = ucfirst "\x{FFFE}";
 135 my $ffff  = ucfirst "\x{FFFF}";
 136 my $hex4  = ucfirst "\x{10000}";
 137 my $hex5  = ucfirst "\x{100000}";
 138 my $maxm1 = ucfirst "\x{10FFFE}";
 139 my $max   = ucfirst "\x{10FFFF}";
 140 my $nonUnicode =  ucfirst "\x{110000}";
 141 no warnings 'utf8';
 142 my $d7ff  = ucfirst "\x{D7FF}";
 143 my $d800  = ucfirst "\x{D800}";
 144 my $dfff  = ucfirst "\x{DFFF}";
 145 my $e000  = ucfirst "\x{E000}";
 146 my $feff  = ucfirst "\x{FEFF}";
 147 my $fffd  = ucfirst "\x{FFFD}";
 148 my $fffe  = ucfirst "\x{FFFE}";
 149 my $ffff  = ucfirst "\x{FFFF}";
 150 my $hex4  = ucfirst "\x{10000}";
 151 my $hex5  = ucfirst "\x{100000}";
 152 my $maxm1 = ucfirst "\x{10FFFE}";
 153 my $max   = ucfirst "\x{10FFFF}";
 154 my $nonUnicode =  ucfirst "\x{110000}";
 155 EXPECT
 156 Operation "ucfirst" returns its argument for UTF-16 surrogate U+D800 at - line 3.
 157 Operation "ucfirst" returns its argument for UTF-16 surrogate U+DFFF at - line 4.
 158 Operation "ucfirst" returns its argument for non-Unicode code point 0x110000 at - line 14.
 159 ########
 160 use warnings 'utf8';
 161 chr(0xD7FF) =~ /\p{Any}/;
 162 chr(0xD800) =~ /\p{Any}/;
 163 chr(0xDFFF) =~ /\p{Any}/;
 164 chr(0xE000) =~ /\p{Any}/;
 165 chr(0xFEFF) =~ /\p{Any}/;
 166 chr(0xFFFD) =~ /\p{Any}/;
 167 chr(0xFFFE) =~ /\p{Any}/;
 168 chr(0xFFFF) =~ /\p{Any}/;
 169 chr(0x10000) =~ /\p{Any}/;
 170 chr(0x100000) =~ /\p{Any}/;
 171 chr(0x10FFFE) =~ /\p{Any}/;
 172 chr(0x10FFFF) =~ /\p{Any}/;
 173 chr(0x110000) =~ /\p{Any}/;
 174 no warnings 'utf8';
 175 chr(0xD7FF) =~ /\p{Any}/;
 176 chr(0xD800) =~ /\p{Any}/;
 177 chr(0xDFFF) =~ /\p{Any}/;
 178 chr(0xE000) =~ /\p{Any}/;
 179 chr(0xFEFF) =~ /\p{Any}/;
 180 chr(0xFFFD) =~ /\p{Any}/;
 181 chr(0xFFFE) =~ /\p{Any}/;
 182 chr(0xFFFF) =~ /\p{Any}/;
 183 chr(0x10000) =~ /\p{Any}/;
 184 chr(0x100000) =~ /\p{Any}/;
 185 chr(0x10FFFE) =~ /\p{Any}/;
 186 chr(0x10FFFF) =~ /\p{Any}/;
 187 chr(0x110000) =~ /\p{Any}/;
 188 EXPECT
 189 Code point 0x110000 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 14.
 190 ########
 191 use warnings 'utf8';
 192 chr(0x110000) =~ /\p{Any}/;
 193 no warnings 'non_unicode';
 194 chr(0x110000) =~ /\p{Any}/;
 195 EXPECT
 196 Code point 0x110000 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 2.
 197 ########
 198 require "../test.pl";
 199 use warnings 'utf8';
 200 sub Is_Super { return '!utf8::Any' }
 201 # The extra char is to avoid an optimization that avoids the problem when the
 202 # property is the only non-latin1 char in a class
 203 print "\x{1100000}" =~ /^[\p{Is_Super}\x{100}]$/, "\n";
 204 EXPECT
 205 1
 206 ########
 207 require "../test.pl";
 208 use warnings 'utf8';
 209 my $file = tempfile();
 210 open(my $fh, "+>:utf8", $file);
 211 print $fh "\x{D7FF}", "\n";
 212 print $fh "\x{D800}", "\n";
 213 print $fh "\x{DFFF}", "\n";
 214 print $fh "\x{E000}", "\n";
 215 print $fh "\x{FDCF}", "\n";
 216 print $fh "\x{FDD0}", "\n";
 217 print $fh "\x{FDEF}", "\n";
 218 print $fh "\x{FDF0}", "\n";
 219 print $fh "\x{FEFF}", "\n";
 220 print $fh "\x{FFFD}", "\n";
 221 print $fh "\x{FFFE}", "\n";
 222 print $fh "\x{FFFF}", "\n";
 223 print $fh "\x{10000}", "\n";
 224 print $fh "\x{1FFFE}", "\n";
 225 print $fh "\x{1FFFF}", "\n";
 226 print $fh "\x{2FFFE}", "\n";
 227 print $fh "\x{2FFFF}", "\n";
 228 print $fh "\x{3FFFE}", "\n";
 229 print $fh "\x{3FFFF}", "\n";
 230 print $fh "\x{4FFFE}", "\n";
 231 print $fh "\x{4FFFF}", "\n";
 232 print $fh "\x{5FFFE}", "\n";
 233 print $fh "\x{5FFFF}", "\n";
 234 print $fh "\x{6FFFE}", "\n";
 235 print $fh "\x{6FFFF}", "\n";
 236 print $fh "\x{7FFFE}", "\n";
 237 print $fh "\x{7FFFF}", "\n";
 238 print $fh "\x{8FFFE}", "\n";
 239 print $fh "\x{8FFFF}", "\n";
 240 print $fh "\x{9FFFE}", "\n";
 241 print $fh "\x{9FFFF}", "\n";
 242 print $fh "\x{AFFFE}", "\n";
 243 print $fh "\x{AFFFF}", "\n";
 244 print $fh "\x{BFFFE}", "\n";
 245 print $fh "\x{BFFFF}", "\n";
 246 print $fh "\x{CFFFE}", "\n";
 247 print $fh "\x{CFFFF}", "\n";
 248 print $fh "\x{DFFFE}", "\n";
 249 print $fh "\x{DFFFF}", "\n";
 250 print $fh "\x{EFFFE}", "\n";
 251 print $fh "\x{EFFFF}", "\n";
 252 print $fh "\x{FFFFE}", "\n";
 253 print $fh "\x{FFFFF}", "\n";
 254 print $fh "\x{100000}", "\n";
 255 print $fh "\x{10FFFE}", "\n";
 256 print $fh "\x{10FFFF}", "\n";
 257 print $fh "\x{110000}", "\n";
 258 close $fh;
 259 EXPECT
 260 Unicode surrogate U+D800 is illegal in UTF-8 at - line 6.
 261 Unicode surrogate U+DFFF is illegal in UTF-8 at - line 7.
 262 Unicode non-character U+FDD0 is illegal for open interchange at - line 10.
 263 Unicode non-character U+FDEF is illegal for open interchange at - line 11.
 264 Unicode non-character U+FFFE is illegal for open interchange at - line 15.
 265 Unicode non-character U+FFFF is illegal for open interchange at - line 16.
 266 Unicode non-character U+1FFFE is illegal for open interchange at - line 18.
 267 Unicode non-character U+1FFFF is illegal for open interchange at - line 19.
 268 Unicode non-character U+2FFFE is illegal for open interchange at - line 20.
 269 Unicode non-character U+2FFFF is illegal for open interchange at - line 21.
 270 Unicode non-character U+3FFFE is illegal for open interchange at - line 22.
 271 Unicode non-character U+3FFFF is illegal for open interchange at - line 23.
 272 Unicode non-character U+4FFFE is illegal for open interchange at - line 24.
 273 Unicode non-character U+4FFFF is illegal for open interchange at - line 25.
 274 Unicode non-character U+5FFFE is illegal for open interchange at - line 26.
 275 Unicode non-character U+5FFFF is illegal for open interchange at - line 27.
 276 Unicode non-character U+6FFFE is illegal for open interchange at - line 28.
 277 Unicode non-character U+6FFFF is illegal for open interchange at - line 29.
 278 Unicode non-character U+7FFFE is illegal for open interchange at - line 30.
 279 Unicode non-character U+7FFFF is illegal for open interchange at - line 31.
 280 Unicode non-character U+8FFFE is illegal for open interchange at - line 32.
 281 Unicode non-character U+8FFFF is illegal for open interchange at - line 33.
 282 Unicode non-character U+9FFFE is illegal for open interchange at - line 34.
 283 Unicode non-character U+9FFFF is illegal for open interchange at - line 35.
 284 Unicode non-character U+AFFFE is illegal for open interchange at - line 36.
 285 Unicode non-character U+AFFFF is illegal for open interchange at - line 37.
 286 Unicode non-character U+BFFFE is illegal for open interchange at - line 38.
 287 Unicode non-character U+BFFFF is illegal for open interchange at - line 39.
 288 Unicode non-character U+CFFFE is illegal for open interchange at - line 40.
 289 Unicode non-character U+CFFFF is illegal for open interchange at - line 41.
 290 Unicode non-character U+DFFFE is illegal for open interchange at - line 42.
 291 Unicode non-character U+DFFFF is illegal for open interchange at - line 43.
 292 Unicode non-character U+EFFFE is illegal for open interchange at - line 44.
 293 Unicode non-character U+EFFFF is illegal for open interchange at - line 45.
 294 Unicode non-character U+FFFFE is illegal for open interchange at - line 46.
 295 Unicode non-character U+FFFFF is illegal for open interchange at - line 47.
 296 Unicode non-character U+10FFFE is illegal for open interchange at - line 49.
 297 Unicode non-character U+10FFFF is illegal for open interchange at - line 50.
 298 Code point 0x110000 is not Unicode, may not be portable at - line 51.
 299 ########
 300 require "../test.pl";
 301 use warnings 'utf8';
 302 my $file = tempfile();
 303 open(my $fh, "+>:utf8", $file);
 304 print $fh "\x{D800}", "\n";
 305 print $fh "\x{FFFF}", "\n";
 306 print $fh "\x{110000}", "\n";
 307 close $fh;
 308 EXPECT
 309 Unicode surrogate U+D800 is illegal in UTF-8 at - line 5.
 310 Unicode non-character U+FFFF is illegal for open interchange at - line 6.
 311 Code point 0x110000 is not Unicode, may not be portable at - line 7.
 312 ########
 313 require "../test.pl";
 314 use warnings 'utf8';
 315 no warnings 'surrogate';
 316 my $file = tempfile();
 317 open(my $fh, "+>:utf8", $file);
 318 print $fh "\x{D800}", "\n";
 319 print $fh "\x{FFFF}", "\n";
 320 print $fh "\x{110000}", "\n";
 321 close $fh;
 322 EXPECT
 323 Unicode non-character U+FFFF is illegal for open interchange at - line 7.
 324 Code point 0x110000 is not Unicode, may not be portable at - line 8.
 325 ########
 326 require "../test.pl";
 327 use warnings 'utf8';
 328 no warnings 'nonchar';
 329 my $file = tempfile();
 330 open(my $fh, "+>:utf8", $file);
 331 print $fh "\x{D800}", "\n";
 332 print $fh "\x{FFFF}", "\n";
 333 print $fh "\x{110000}", "\n";
 334 close $fh;
 335 EXPECT
 336 Unicode surrogate U+D800 is illegal in UTF-8 at - line 6.
 337 Code point 0x110000 is not Unicode, may not be portable at - line 8.
 338 ########
 339 require "../test.pl";
 340 use warnings 'utf8';
 341 no warnings 'non_unicode';
 342 my $file = tempfile();
 343 open(my $fh, "+>:utf8", $file);
 344 print $fh "\x{D800}", "\n";
 345 print $fh "\x{FFFF}", "\n";
 346 print $fh "\x{110000}", "\n";
 347 close $fh;
 348 EXPECT
 349 Unicode surrogate U+D800 is illegal in UTF-8 at - line 6.
 350 Unicode non-character U+FFFF is illegal for open interchange at - line 7.
 351 ########
 352 # NAME C<use warnings "nonchar"> works in isolation
 353 require "../test.pl";
 354 use warnings 'nonchar';
 355 my $file = tempfile();
 356 open(my $fh, "+>:utf8", $file);
 357 print $fh "\x{FFFF}", "\n";
 358 close $fh;
 359 EXPECT
 360 Unicode non-character U+FFFF is illegal for open interchange at - line 5.
 361 ########
 362 # NAME C<use warnings "surrogate"> works in isolation
 363 require "../test.pl";
 364 use warnings 'surrogate';
 365 my $file = tempfile();
 366 open(my $fh, "+>:utf8", $file);
 367 print $fh "\x{D800}", "\n";
 368 close $fh;
 369 EXPECT
 370 Unicode surrogate U+D800 is illegal in UTF-8 at - line 5.
 371 ########
 372 # NAME C<use warnings "non_unicode"> works in isolation
 373 require "../test.pl";
 374 use warnings 'non_unicode';
 375 my $file = tempfile();
 376 open(my $fh, "+>:utf8", $file);
 377 print $fh "\x{110000}", "\n";
 378 close $fh;
 379 EXPECT
 380 Code point 0x110000 is not Unicode, may not be portable at - line 5.
 381 ########
 382 require "../test.pl";
 383 no warnings 'utf8';
 384 my $file = tempfile();
 385 open(my $fh, "+>:utf8", $file);
 386 print $fh "\x{D7FF}", "\n";
 387 print $fh "\x{D800}", "\n";
 388 print $fh "\x{DFFF}", "\n";
 389 print $fh "\x{E000}", "\n";
 390 print $fh "\x{FDCF}", "\n";
 391 print $fh "\x{FDD0}", "\n";
 392 print $fh "\x{FDEF}", "\n";
 393 print $fh "\x{FDF0}", "\n";
 394 print $fh "\x{FEFF}", "\n";
 395 print $fh "\x{FFFD}", "\n";
 396 print $fh "\x{FFFE}", "\n";
 397 print $fh "\x{FFFF}", "\n";
 398 print $fh "\x{10000}", "\n";
 399 print $fh "\x{1FFFE}", "\n";
 400 print $fh "\x{1FFFF}", "\n";
 401 print $fh "\x{2FFFE}", "\n";
 402 print $fh "\x{2FFFF}", "\n";
 403 print $fh "\x{3FFFE}", "\n";
 404 print $fh "\x{3FFFF}", "\n";
 405 print $fh "\x{4FFFE}", "\n";
 406 print $fh "\x{4FFFF}", "\n";
 407 print $fh "\x{5FFFE}", "\n";
 408 print $fh "\x{5FFFF}", "\n";
 409 print $fh "\x{6FFFE}", "\n";
 410 print $fh "\x{6FFFF}", "\n";
 411 print $fh "\x{7FFFE}", "\n";
 412 print $fh "\x{7FFFF}", "\n";
 413 print $fh "\x{8FFFE}", "\n";
 414 print $fh "\x{8FFFF}", "\n";
 415 print $fh "\x{9FFFE}", "\n";
 416 print $fh "\x{9FFFF}", "\n";
 417 print $fh "\x{AFFFE}", "\n";
 418 print $fh "\x{AFFFF}", "\n";
 419 print $fh "\x{BFFFE}", "\n";
 420 print $fh "\x{BFFFF}", "\n";
 421 print $fh "\x{CFFFE}", "\n";
 422 print $fh "\x{CFFFF}", "\n";
 423 print $fh "\x{DFFFE}", "\n";
 424 print $fh "\x{DFFFF}", "\n";
 425 print $fh "\x{EFFFE}", "\n";
 426 print $fh "\x{EFFFF}", "\n";
 427 print $fh "\x{FFFFE}", "\n";
 428 print $fh "\x{FFFFF}", "\n";
 429 print $fh "\x{100000}", "\n";
 430 print $fh "\x{10FFFE}", "\n";
 431 print $fh "\x{10FFFF}", "\n";
 432 print $fh "\x{110000}", "\n";
 433 close $fh;
 434 EXPECT