t/lib/warnings/utf8

   1
   2   utf8.c AOK
   3
   4      [utf8_to_uv]
   5      Malformed UTF-8 character
   6         my $a = ord "\x80" ;
   7
   8      Malformed UTF-8 character
   9         my $a = ord "\xf080" ;
  10      <<<<<< this warning can't be easily triggered from perl anymore
  11
  12      [utf16_to_utf8]
  13      Malformed UTF-16 surrogate
  14      <<<<<< Add a test when something actually calls utf16_to_utf8
  15
  16 __END__
  17 # utf8.c [utf8_to_uv] -W
  18 BEGIN {
  19     if (ord('A') == 193) {
  20         print "SKIPPED\n# ebcdic platforms do not generate Malformed UTF-8 warnings.";
  21         exit 0;
  22     }
  23 }
  24 use utf8 ;
  25 my $a = "snøstorm" ;
  26 {
  27     no warnings 'utf8' ;
  28     my $a = "snøstorm";
  29     use warnings 'utf8' ;
  30     my $a = "snøstorm";
  31 }
  32 EXPECT
  33 Malformed UTF-8 character (unexpected non-continuation byte 0x73, immediately after start byte 0xf8) at - line 9.
  34 Malformed UTF-8 character (unexpected non-continuation byte 0x73, immediately after start byte 0xf8) at - line 14.
  35 ########
  36 use warnings 'utf8';
  37 my $d7ff  = uc(chr(0xD7FF));
  38 my $d800  = uc(chr(0xD800));
  39 my $dfff  = uc(chr(0xDFFF));
  40 my $e000  = uc(chr(0xE000));
  41 my $feff  = uc(chr(0xFEFF));
  42 my $fffd  = uc(chr(0xFFFD));
  43 my $fffe  = uc(chr(0xFFFE));
  44 my $ffff  = uc(chr(0xFFFF));
  45 my $hex4  = uc(chr(0x10000));
  46 my $hex5  = uc(chr(0x100000));
  47 my $maxm1 = uc(chr(0x10FFFE));
  48 my $max   = uc(chr(0x10FFFF));
  49 my $nonUnicode =  uc(chr(0x110000));
  50 no warnings 'utf8';
  51 my $d7ff  = uc(chr(0xD7FF));
  52 my $d800  = uc(chr(0xD800));
  53 my $dfff  = uc(chr(0xDFFF));
  54 my $e000  = uc(chr(0xE000));
  55 my $feff  = uc(chr(0xFEFF));
  56 my $fffd  = uc(chr(0xFFFD));
  57 my $fffe  = uc(chr(0xFFFE));
  58 my $ffff  = uc(chr(0xFFFF));
  59 my $hex4  = uc(chr(0x10000));
  60 my $hex5  = uc(chr(0x100000));
  61 my $maxm1 = uc(chr(0x10FFFE));
  62 my $max   = uc(chr(0x10FFFF));
  63 my $nonUnicode =  uc(chr(0x110000));
  64 EXPECT
  65 Operation "uc" returns its argument for UTF-16 surrogate U+D800 at - line 3.
  66 Operation "uc" returns its argument for UTF-16 surrogate U+DFFF at - line 4.
  67 Operation "uc" returns its argument for non-Unicode code point 0x110000 at - line 14.
  68 ########
  69 use warnings 'utf8';
  70 my $d800  = uc(chr(0xD800));
  71 my $nonUnicode =  uc(chr(0x110000));
  72 no warnings 'surrogate';
  73 my $d800  = uc(chr(0xD800));
  74 my $nonUnicode =  uc(chr(0x110000));
  75 EXPECT
  76 Operation "uc" returns its argument for UTF-16 surrogate U+D800 at - line 2.
  77 Operation "uc" returns its argument for non-Unicode code point 0x110000 at - line 3.
  78 Operation "uc" returns its argument for non-Unicode code point 0x110000 at - line 6.
  79 ########
  80 use warnings 'utf8';
  81 my $d800  = uc(chr(0xD800));
  82 my $nonUnicode =  uc(chr(0x110000));
  83 my $big_nonUnicode = uc(chr(0x8000_0000));
  84 no warnings 'non_unicode';
  85 my $d800  = uc(chr(0xD800));
  86 my $nonUnicode =  uc(chr(0x110000));
  87 my $big_nonUnicode = uc(chr(0x8000_0000));
  88 EXPECT
  89 Operation "uc" returns its argument for UTF-16 surrogate U+D800 at - line 2.
  90 Operation "uc" returns its argument for non-Unicode code point 0x110000 at - line 3.
  91 Operation "uc" returns its argument for non-Unicode code point 0x80000000 at - line 4.
  92 Operation "uc" returns its argument for UTF-16 surrogate U+D800 at - line 6.
  93 ########
  94 use warnings 'utf8';
  95 my $d7ff  = lc pack("U", 0xD7FF);
  96 my $d800  = lc pack("U", 0xD800);
  97 my $dfff  = lc pack("U", 0xDFFF);
  98 my $e000  = lc pack("U", 0xE000);
  99 my $feff  = lc pack("U", 0xFEFF);
 100 my $fffd  = lc pack("U", 0xFFFD);
 101 my $fffe  = lc pack("U", 0xFFFE);
 102 my $ffff  = lc pack("U", 0xFFFF);
 103 my $hex4  = lc pack("U", 0x10000);
 104 my $hex5  = lc pack("U", 0x100000);
 105 my $maxm1 = lc pack("U", 0x10FFFE);
 106 my $max   = lc pack("U", 0x10FFFF);
 107 my $nonUnicode =  lc(pack("U", 0x110000));
 108 no warnings 'utf8';
 109 my $d7ff  = lc pack("U", 0xD7FF);
 110 my $d800  = lc pack("U", 0xD800);
 111 my $dfff  = lc pack("U", 0xDFFF);
 112 my $e000  = lc pack("U", 0xE000);
 113 my $feff  = lc pack("U", 0xFEFF);
 114 my $fffd  = lc pack("U", 0xFFFD);
 115 my $fffe  = lc pack("U", 0xFFFE);
 116 my $ffff  = lc pack("U", 0xFFFF);
 117 my $hex4  = lc pack("U", 0x10000);
 118 my $hex5  = lc pack("U", 0x100000);
 119 my $maxm1 = lc pack("U", 0x10FFFE);
 120 my $max   = lc pack("U", 0x10FFFF);
 121 my $nonUnicode =  lc(pack("U", 0x110000));
 122 EXPECT
 123 Operation "lc" returns its argument for UTF-16 surrogate U+D800 at - line 3.
 124 Operation "lc" returns its argument for UTF-16 surrogate U+DFFF at - line 4.
 125 Operation "lc" returns its argument for non-Unicode code point 0x110000 at - line 14.
 126 ########
 127 use warnings 'utf8';
 128 my $d7ff  = ucfirst "\x{D7FF}";
 129 my $d800  = ucfirst "\x{D800}";
 130 my $dfff  = ucfirst "\x{DFFF}";
 131 my $e000  = ucfirst "\x{E000}";
 132 my $feff  = ucfirst "\x{FEFF}";
 133 my $fffd  = ucfirst "\x{FFFD}";
 134 my $fffe  = ucfirst "\x{FFFE}";
 135 my $ffff  = ucfirst "\x{FFFF}";
 136 my $hex4  = ucfirst "\x{10000}";
 137 my $hex5  = ucfirst "\x{100000}";
 138 my $maxm1 = ucfirst "\x{10FFFE}";
 139 my $max   = ucfirst "\x{10FFFF}";
 140 my $nonUnicode =  ucfirst "\x{110000}";
 141 no warnings 'utf8';
 142 my $d7ff  = ucfirst "\x{D7FF}";
 143 my $d800  = ucfirst "\x{D800}";
 144 my $dfff  = ucfirst "\x{DFFF}";
 145 my $e000  = ucfirst "\x{E000}";
 146 my $feff  = ucfirst "\x{FEFF}";
 147 my $fffd  = ucfirst "\x{FFFD}";
 148 my $fffe  = ucfirst "\x{FFFE}";
 149 my $ffff  = ucfirst "\x{FFFF}";
 150 my $hex4  = ucfirst "\x{10000}";
 151 my $hex5  = ucfirst "\x{100000}";
 152 my $maxm1 = ucfirst "\x{10FFFE}";
 153 my $max   = ucfirst "\x{10FFFF}";
 154 my $nonUnicode =  ucfirst "\x{110000}";
 155 EXPECT
 156 Operation "ucfirst" returns its argument for UTF-16 surrogate U+D800 at - line 3.
 157 Operation "ucfirst" returns its argument for UTF-16 surrogate U+DFFF at - line 4.
 158 Operation "ucfirst" returns its argument for non-Unicode code point 0x110000 at - line 14.
 159 ########
 160 use warnings 'utf8';
 161 chr(0xD7FF) =~ /\p{Any}/;
 162 chr(0xD800) =~ /\p{Any}/;
 163 chr(0xDFFF) =~ /\p{Any}/;
 164 chr(0xE000) =~ /\p{Any}/;
 165 chr(0xFEFF) =~ /\p{Any}/;
 166 chr(0xFFFD) =~ /\p{Any}/;
 167 chr(0xFFFE) =~ /\p{Any}/;
 168 chr(0xFFFF) =~ /\p{Any}/;
 169 chr(0x10000) =~ /\p{Any}/;
 170 chr(0x100000) =~ /\p{Any}/;
 171 chr(0x10FFFE) =~ /\p{Any}/;
 172 chr(0x10FFFF) =~ /\p{Any}/;
 173 chr(0x110000) =~ /\p{Any}/;
 174 no warnings 'utf8';
 175 chr(0xD7FF) =~ /\p{Any}/;
 176 chr(0xD800) =~ /\p{Any}/;
 177 chr(0xDFFF) =~ /\p{Any}/;
 178 chr(0xE000) =~ /\p{Any}/;
 179 chr(0xFEFF) =~ /\p{Any}/;
 180 chr(0xFFFD) =~ /\p{Any}/;
 181 chr(0xFFFE) =~ /\p{Any}/;
 182 chr(0xFFFF) =~ /\p{Any}/;
 183 chr(0x10000) =~ /\p{Any}/;
 184 chr(0x100000) =~ /\p{Any}/;
 185 chr(0x10FFFE) =~ /\p{Any}/;
 186 chr(0x10FFFF) =~ /\p{Any}/;
 187 chr(0x110000) =~ /\p{Any}/;
 188 EXPECT
 189 Code point 0x110000 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 14.
 190 ########
 191 use warnings 'utf8';
 192 chr(0x110000) =~ /\p{Any}/;
 193 no warnings 'non_unicode';
 194 chr(0x110000) =~ /\p{Any}/;
 195 EXPECT
 196 Code point 0x110000 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 2.
 197 ########
 198 require "../test.pl";
 199 use warnings 'utf8';
 200 my $file = tempfile();
 201 open(my $fh, "+>:utf8", $file);
 202 print $fh "\x{D7FF}", "\n";
 203 print $fh "\x{D800}", "\n";
 204 print $fh "\x{DFFF}", "\n";
 205 print $fh "\x{E000}", "\n";
 206 print $fh "\x{FDCF}", "\n";
 207 print $fh "\x{FDD0}", "\n";
 208 print $fh "\x{FDEF}", "\n";
 209 print $fh "\x{FDF0}", "\n";
 210 print $fh "\x{FEFF}", "\n";
 211 print $fh "\x{FFFD}", "\n";
 212 print $fh "\x{FFFE}", "\n";
 213 print $fh "\x{FFFF}", "\n";
 214 print $fh "\x{10000}", "\n";
 215 print $fh "\x{1FFFE}", "\n";
 216 print $fh "\x{1FFFF}", "\n";
 217 print $fh "\x{2FFFE}", "\n";
 218 print $fh "\x{2FFFF}", "\n";
 219 print $fh "\x{3FFFE}", "\n";
 220 print $fh "\x{3FFFF}", "\n";
 221 print $fh "\x{4FFFE}", "\n";
 222 print $fh "\x{4FFFF}", "\n";
 223 print $fh "\x{5FFFE}", "\n";
 224 print $fh "\x{5FFFF}", "\n";
 225 print $fh "\x{6FFFE}", "\n";
 226 print $fh "\x{6FFFF}", "\n";
 227 print $fh "\x{7FFFE}", "\n";
 228 print $fh "\x{7FFFF}", "\n";
 229 print $fh "\x{8FFFE}", "\n";
 230 print $fh "\x{8FFFF}", "\n";
 231 print $fh "\x{9FFFE}", "\n";
 232 print $fh "\x{9FFFF}", "\n";
 233 print $fh "\x{AFFFE}", "\n";
 234 print $fh "\x{AFFFF}", "\n";
 235 print $fh "\x{BFFFE}", "\n";
 236 print $fh "\x{BFFFF}", "\n";
 237 print $fh "\x{CFFFE}", "\n";
 238 print $fh "\x{CFFFF}", "\n";
 239 print $fh "\x{DFFFE}", "\n";
 240 print $fh "\x{DFFFF}", "\n";
 241 print $fh "\x{EFFFE}", "\n";
 242 print $fh "\x{EFFFF}", "\n";
 243 print $fh "\x{FFFFE}", "\n";
 244 print $fh "\x{FFFFF}", "\n";
 245 print $fh "\x{100000}", "\n";
 246 print $fh "\x{10FFFE}", "\n";
 247 print $fh "\x{10FFFF}", "\n";
 248 print $fh "\x{110000}", "\n";
 249 close $fh;
 250 EXPECT
 251 Unicode surrogate U+D800 is illegal in UTF-8 at - line 6.
 252 Unicode surrogate U+DFFF is illegal in UTF-8 at - line 7.
 253 Unicode non-character U+FDD0 is illegal for open interchange at - line 10.
 254 Unicode non-character U+FDEF is illegal for open interchange at - line 11.
 255 Unicode non-character U+FFFE is illegal for open interchange at - line 15.
 256 Unicode non-character U+FFFF is illegal for open interchange at - line 16.
 257 Unicode non-character U+1FFFE is illegal for open interchange at - line 18.
 258 Unicode non-character U+1FFFF is illegal for open interchange at - line 19.
 259 Unicode non-character U+2FFFE is illegal for open interchange at - line 20.
 260 Unicode non-character U+2FFFF is illegal for open interchange at - line 21.
 261 Unicode non-character U+3FFFE is illegal for open interchange at - line 22.
 262 Unicode non-character U+3FFFF is illegal for open interchange at - line 23.
 263 Unicode non-character U+4FFFE is illegal for open interchange at - line 24.
 264 Unicode non-character U+4FFFF is illegal for open interchange at - line 25.
 265 Unicode non-character U+5FFFE is illegal for open interchange at - line 26.
 266 Unicode non-character U+5FFFF is illegal for open interchange at - line 27.
 267 Unicode non-character U+6FFFE is illegal for open interchange at - line 28.
 268 Unicode non-character U+6FFFF is illegal for open interchange at - line 29.
 269 Unicode non-character U+7FFFE is illegal for open interchange at - line 30.
 270 Unicode non-character U+7FFFF is illegal for open interchange at - line 31.
 271 Unicode non-character U+8FFFE is illegal for open interchange at - line 32.
 272 Unicode non-character U+8FFFF is illegal for open interchange at - line 33.
 273 Unicode non-character U+9FFFE is illegal for open interchange at - line 34.
 274 Unicode non-character U+9FFFF is illegal for open interchange at - line 35.
 275 Unicode non-character U+AFFFE is illegal for open interchange at - line 36.
 276 Unicode non-character U+AFFFF is illegal for open interchange at - line 37.
 277 Unicode non-character U+BFFFE is illegal for open interchange at - line 38.
 278 Unicode non-character U+BFFFF is illegal for open interchange at - line 39.
 279 Unicode non-character U+CFFFE is illegal for open interchange at - line 40.
 280 Unicode non-character U+CFFFF is illegal for open interchange at - line 41.
 281 Unicode non-character U+DFFFE is illegal for open interchange at - line 42.
 282 Unicode non-character U+DFFFF is illegal for open interchange at - line 43.
 283 Unicode non-character U+EFFFE is illegal for open interchange at - line 44.
 284 Unicode non-character U+EFFFF is illegal for open interchange at - line 45.
 285 Unicode non-character U+FFFFE is illegal for open interchange at - line 46.
 286 Unicode non-character U+FFFFF is illegal for open interchange at - line 47.
 287 Unicode non-character U+10FFFE is illegal for open interchange at - line 49.
 288 Unicode non-character U+10FFFF is illegal for open interchange at - line 50.
 289 Code point 0x110000 is not Unicode, may not be portable at - line 51.
 290 ########
 291 require "../test.pl";
 292 use warnings 'utf8';
 293 my $file = tempfile();
 294 open(my $fh, "+>:utf8", $file);
 295 print $fh "\x{D800}", "\n";
 296 print $fh "\x{FFFF}", "\n";
 297 print $fh "\x{110000}", "\n";
 298 close $fh;
 299 EXPECT
 300 Unicode surrogate U+D800 is illegal in UTF-8 at - line 5.
 301 Unicode non-character U+FFFF is illegal for open interchange at - line 6.
 302 Code point 0x110000 is not Unicode, may not be portable at - line 7.
 303 ########
 304 require "../test.pl";
 305 use warnings 'utf8';
 306 no warnings 'surrogate';
 307 my $file = tempfile();
 308 open(my $fh, "+>:utf8", $file);
 309 print $fh "\x{D800}", "\n";
 310 print $fh "\x{FFFF}", "\n";
 311 print $fh "\x{110000}", "\n";
 312 close $fh;
 313 EXPECT
 314 Unicode non-character U+FFFF is illegal for open interchange at - line 7.
 315 Code point 0x110000 is not Unicode, may not be portable at - line 8.
 316 ########
 317 require "../test.pl";
 318 use warnings 'utf8';
 319 no warnings 'nonchar';
 320 my $file = tempfile();
 321 open(my $fh, "+>:utf8", $file);
 322 print $fh "\x{D800}", "\n";
 323 print $fh "\x{FFFF}", "\n";
 324 print $fh "\x{110000}", "\n";
 325 close $fh;
 326 EXPECT
 327 Unicode surrogate U+D800 is illegal in UTF-8 at - line 6.
 328 Code point 0x110000 is not Unicode, may not be portable at - line 8.
 329 ########
 330 require "../test.pl";
 331 use warnings 'utf8';
 332 no warnings 'non_unicode';
 333 my $file = tempfile();
 334 open(my $fh, "+>:utf8", $file);
 335 print $fh "\x{D800}", "\n";
 336 print $fh "\x{FFFF}", "\n";
 337 print $fh "\x{110000}", "\n";
 338 close $fh;
 339 EXPECT
 340 Unicode surrogate U+D800 is illegal in UTF-8 at - line 6.
 341 Unicode non-character U+FFFF is illegal for open interchange at - line 7.
 342 ########
 343 require "../test.pl";
 344 no warnings 'utf8';
 345 my $file = tempfile();
 346 open(my $fh, "+>:utf8", $file);
 347 print $fh "\x{D7FF}", "\n";
 348 print $fh "\x{D800}", "\n";
 349 print $fh "\x{DFFF}", "\n";
 350 print $fh "\x{E000}", "\n";
 351 print $fh "\x{FDCF}", "\n";
 352 print $fh "\x{FDD0}", "\n";
 353 print $fh "\x{FDEF}", "\n";
 354 print $fh "\x{FDF0}", "\n";
 355 print $fh "\x{FEFF}", "\n";
 356 print $fh "\x{FFFD}", "\n";
 357 print $fh "\x{FFFE}", "\n";
 358 print $fh "\x{FFFF}", "\n";
 359 print $fh "\x{10000}", "\n";
 360 print $fh "\x{1FFFE}", "\n";
 361 print $fh "\x{1FFFF}", "\n";
 362 print $fh "\x{2FFFE}", "\n";
 363 print $fh "\x{2FFFF}", "\n";
 364 print $fh "\x{3FFFE}", "\n";
 365 print $fh "\x{3FFFF}", "\n";
 366 print $fh "\x{4FFFE}", "\n";
 367 print $fh "\x{4FFFF}", "\n";
 368 print $fh "\x{5FFFE}", "\n";
 369 print $fh "\x{5FFFF}", "\n";
 370 print $fh "\x{6FFFE}", "\n";
 371 print $fh "\x{6FFFF}", "\n";
 372 print $fh "\x{7FFFE}", "\n";
 373 print $fh "\x{7FFFF}", "\n";
 374 print $fh "\x{8FFFE}", "\n";
 375 print $fh "\x{8FFFF}", "\n";
 376 print $fh "\x{9FFFE}", "\n";
 377 print $fh "\x{9FFFF}", "\n";
 378 print $fh "\x{AFFFE}", "\n";
 379 print $fh "\x{AFFFF}", "\n";
 380 print $fh "\x{BFFFE}", "\n";
 381 print $fh "\x{BFFFF}", "\n";
 382 print $fh "\x{CFFFE}", "\n";
 383 print $fh "\x{CFFFF}", "\n";
 384 print $fh "\x{DFFFE}", "\n";
 385 print $fh "\x{DFFFF}", "\n";
 386 print $fh "\x{EFFFE}", "\n";
 387 print $fh "\x{EFFFF}", "\n";
 388 print $fh "\x{FFFFE}", "\n";
 389 print $fh "\x{FFFFF}", "\n";
 390 print $fh "\x{100000}", "\n";
 391 print $fh "\x{10FFFE}", "\n";
 392 print $fh "\x{10FFFF}", "\n";
 393 print $fh "\x{110000}", "\n";
 394 close $fh;
 395 EXPECT