# $::UTF8_ALLOW_EMPTY, $::UTF8_GOT_EMPTY, $REPLACEMENT, 0, 0,
# qr/empty string/
#],
- [ "orphan continuation byte malformation", I8_to_native("${I8c}a"), 2,
- $::UTF8_ALLOW_CONTINUATION, $::UTF8_GOT_CONTINUATION, $REPLACEMENT,
- 1, 1,
- qr/unexpected continuation byte/
- ],
);
if (isASCII && ! $::is64bit) { # 32-bit ASCII platform
foreach my $test (@malformations) {
my ($testname, $bytes, $length, $allow_flags, $expected_error_flags,
$allowed_uv, $expected_len, $needed_to_discern_len, $message ) = @$test;
- next unless $testname =~ /overlong/;
$test->[0] .= "; use REPLACEMENT CHAR";
$test->[5] = $REPLACEMENT;
# like being a surrogate; 0 indicates we need
# the whole string. Some categories have a
# default that is used if this is omitted.
+ [ "orphan continuation byte malformation",
+ I8_to_native("$::I8c"),
+ 0xFFFD,
+ 1,
+ ],
[ "overlong malformation, lowest 2-byte",
(isASCII) ? "\xc0\x80" : I8_to_native("\xc0\xa0"),
0, # NUL
my $length = length $bytes;
my $initially_overlong = $testname =~ /overlong/;
+ my $initially_orphan = $testname =~ /orphan/;
my $will_overflow = $allowed_uv < 0;
my $uv_string = sprintf(($allowed_uv < 0x100) ? "%02X" : "%04X", $allowed_uv);
# expect that the tests will show it isn't valid.
my $initially_malformed = 0;
- if ($initially_overlong) {
+ if ($initially_overlong || $initially_orphan) {
$non_cp_trailing_text = "if you see this, there is an error";
$cp_message_qr = qr/\Q$non_cp_trailing_text\E/;
- if (! defined $needed_to_discern_len) {
- $needed_to_discern_len = overlong_discern_len($bytes);
- }
$initially_malformed = 1;
- $correct_bytes_for_overlong = display_bytes_no_quotes(chr $allowed_uv);
$utf8n_flag_to_warn = 0;
$utf8n_flag_to_disallow = 0;
$utf8n_flag_to_warn_complement |= $::UTF8_WARN_PERL_EXTENDED;
$utf8n_flag_to_disallow_complement |= $::UTF8_DISALLOW_PERL_EXTENDED;
}
+
$controlling_warning_category = 'utf8';
+
+ if ($initially_overlong) {
+ if (! defined $needed_to_discern_len) {
+ $needed_to_discern_len = overlong_discern_len($bytes);
+ }
+ $correct_bytes_for_overlong = display_bytes_no_quotes(chr $allowed_uv);
+ }
}
elsif($will_overflow || $allowed_uv > 0x10FFFF) {
my $overlong_is_in_perl_extended_utf8 = 0;
my $dont_use_overlong_cp = 0;
+ if ($initially_orphan) {
+ next if $overlong || $short || $unexpected_noncont;
+ }
+
if ($overlong) {
if (! $initially_overlong) {
my $new_expected_len;
# now XXX, only do so for those that return an explicit code
# point.
+ if ($initially_orphan) {
+ push @malformation_names, "orphan continuation";
+ push @expected_malformation_return_flags,
+ $::UTF8_GOT_CONTINUATION;
+ $allow_flags |= $::UTF8_ALLOW_CONTINUATION
+ if $malformed_allow_type;
+ push @expected_malformation_warnings, qr/unexpected continuation/;
+ }
+
if ($overlong) {
push @malformation_names, 'overlong';
push @expected_malformation_return_flags, $::UTF8_GOT_LONG;
foreach my $do_disallow (0, 1) {
if ($do_disallow) {
- next if $initially_overlong;
+ next if $initially_overlong || $initially_orphan;
}
else {
next if $skip_most_tests;
next if $skip_most_tests && $warning_type != 1;
foreach my $use_warn_flag (0, 1) {
if ($use_warn_flag) {
- next if $initially_overlong;
+ next if $initially_overlong || $initially_orphan;
}
else {
next if $skip_most_tests;