utf8.c AOK [utf8_to_uv] Malformed UTF-8 character my $a = ord "\x80" ; Malformed UTF-8 character my $a = ord "\xf080" ; <<<<<< this warning can't be easily triggered from perl anymore [utf16_to_utf8] Malformed UTF-16 surrogate <<<<<< Add a test when somethig actually calls utf16_to_utf8 __END__ # utf8.c [utf8_to_uv] -W BEGIN { if (ord('A') == 193) { print "SKIPPED\n# ebcdic platforms do not generate Malformed UTF-8 warnings."; exit 0; } } use utf8 ; my $a = "snøstorm" ; { no warnings 'utf8' ; my $a = "snøstorm"; use warnings 'utf8' ; my $a = "snøstorm"; } EXPECT Malformed UTF-8 character (unexpected non-continuation byte 0x73, immediately after start byte 0xf8) at - line 9. Malformed UTF-8 character (unexpected non-continuation byte 0x73, immediately after start byte 0xf8) at - line 14. ######## use warnings 'utf8'; my $d7ff = chr(0xD7FF); my $d800 = chr(0xD800); my $dfff = chr(0xDFFF); my $e000 = chr(0xE000); my $feff = chr(0xFEFF); my $fffd = chr(0xFFFD); my $fffe = chr(0xFFFE); my $ffff = chr(0xFFFF); my $hex4 = chr(0x10000); my $hex5 = chr(0x100000); my $maxm1 = chr(0x10FFFE); my $max = chr(0x10FFFF); no warnings 'utf8'; my $d7ff = chr(0xD7FF); my $d800 = chr(0xD800); my $dfff = chr(0xDFFF); my $e000 = chr(0xE000); my $feff = chr(0xFEFF); my $fffd = chr(0xFFFD); my $fffe = chr(0xFFFE); my $ffff = chr(0xFFFF); my $hex4 = chr(0x10000); my $hex5 = chr(0x100000); my $maxm1 = chr(0x10FFFE); my $max = chr(0x10FFFF); EXPECT UTF-16 surrogate 0xd800 at - line 3. UTF-16 surrogate 0xdfff at - line 4. Unicode character 0xfffe is illegal at - line 8. Unicode character 0xffff is illegal at - line 9. Unicode character 0x10fffe is illegal at - line 12. Unicode character 0x10ffff is illegal at - line 13. ######## use warnings 'utf8'; my $d7ff = pack("U", 0xD7FF); my $d800 = pack("U", 0xD800); my $dfff = pack("U", 0xDFFF); my $e000 = pack("U", 0xE000); my $feff = pack("U", 0xFEFF); my $fffd = pack("U", 0xFFFD); my $fffe = pack("U", 0xFFFE); my $ffff = pack("U", 0xFFFF); my $hex4 = pack("U", 0x10000); my $hex5 = pack("U", 0x100000); my $maxm1 = pack("U", 0x10FFFE); my $max = pack("U", 0x10FFFF); no warnings 'utf8'; my $d7ff = pack("U", 0xD7FF); my $d800 = pack("U", 0xD800); my $dfff = pack("U", 0xDFFF); my $e000 = pack("U", 0xE000); my $feff = pack("U", 0xFEFF); my $fffd = pack("U", 0xFFFD); my $fffe = pack("U", 0xFFFE); my $ffff = pack("U", 0xFFFF); my $hex4 = pack("U", 0x10000); my $hex5 = pack("U", 0x100000); my $maxm1 = pack("U", 0x10FFFE); my $max = pack("U", 0x10FFFF); EXPECT UTF-16 surrogate 0xd800 at - line 3. UTF-16 surrogate 0xdfff at - line 4. Unicode character 0xfffe is illegal at - line 8. Unicode character 0xffff is illegal at - line 9. Unicode character 0x10fffe is illegal at - line 12. Unicode character 0x10ffff is illegal at - line 13. ######## use warnings 'utf8'; my $d7ff = "\x{D7FF}"; my $d800 = "\x{D800}"; my $dfff = "\x{DFFF}"; my $e000 = "\x{E000}"; my $feff = "\x{FEFF}"; my $fffd = "\x{FFFD}"; my $fffe = "\x{FFFE}"; my $ffff = "\x{FFFF}"; my $hex4 = "\x{10000}"; my $hex5 = "\x{100000}"; my $maxm1 = "\x{10FFFE}"; my $max = "\x{10FFFF}"; no warnings 'utf8'; my $d7ff = "\x{D7FF}"; my $d800 = "\x{D800}"; my $dfff = "\x{DFFF}"; my $e000 = "\x{E000}"; my $feff = "\x{FEFF}"; my $fffd = "\x{FFFD}"; my $fffe = "\x{FFFE}"; my $ffff = "\x{FFFF}"; my $hex4 = "\x{10000}"; my $hex5 = "\x{100000}"; my $maxm1 = "\x{10FFFE}"; my $max = "\x{10FFFF}"; EXPECT UTF-16 surrogate 0xd800 at - line 3. UTF-16 surrogate 0xdfff at - line 4. Unicode character 0xfffe is illegal at - line 8. Unicode character 0xffff is illegal at - line 9. Unicode character 0x10fffe is illegal at - line 12. Unicode character 0x10ffff is illegal at - line 13.