This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
PATCH: [perl #131642] pack returning malformed UTF-8
authorKarl Williamson <khw@cpan.org>
Mon, 2 Jul 2018 04:39:47 +0000 (22:39 -0600)
committerKarl Williamson <khw@cpan.org>
Fri, 8 Mar 2019 19:41:03 +0000 (12:41 -0700)
This patch causes pack to die rather than return malformed UTF-8.  This
protects the rest of the core from unexpectedly getting malformed
inputs.

pp_pack.c
t/lib/warnings/utf8
t/op/pack.t

index 5f1a599..726f743 100644 (file)
--- a/pp_pack.c
+++ b/pp_pack.c
@@ -3149,6 +3149,21 @@ PP(pp_pack)
 
     packlist(cat, pat, patend, MARK, SP + 1);
 
+    if (SvUTF8(cat)) {
+        STRLEN result_len;
+        const char * result = SvPV_nomg(cat, result_len);
+        const U8 * error_pos;
+
+        if (! is_utf8_string_loc((U8 *) result, result_len, &error_pos)) {
+            _force_out_malformed_utf8_message(error_pos,
+                                              (U8 *) result + result_len,
+                                              0, /* no flags */
+                                              1 /* Die */
+                                            );
+            NOT_REACHED; /* NOTREACHED */
+        }
+    }
+
     SvSETMAGIC(cat);
     SP = ORIGMARK;
     PUSHs(cat);
index a9a6388..49fa4e4 100644 (file)
@@ -782,4 +782,5 @@ use warnings 'utf8';
 for(uc 0..t){0~~pack"UXc",exp}
 EXPECT
 OPTIONS regex
-Malformed UTF-8 character: \\x([[:xdigit:]]{2})\\x([[:xdigit:]]{2}) \(unexpected non-continuation byte 0x\2, immediately after start byte 0x\1; need 2 bytes, got 1\)  in smart match at - line 9.
+Malformed UTF-8 character: \\x([[:xdigit:]]{2})\\x([[:xdigit:]]{2}) \(unexpected non-continuation byte 0x\2, immediately after start byte 0x\1; need 2 bytes, got 1\) in pack at - line 9.
+Malformed UTF-8 character \(fatal\) at - line 9.
index bb9f865..4543cde 100644 (file)
@@ -955,15 +955,11 @@ is("@{[unpack('U*', pack('U*', 100, 200))]}", "100 200");
     is("@{[pack('C0U*', map { utf8::native_to_unicode($_) } 64, 202)]}",
        pack("C*", 64, @bytes202));
 
-    # does unpack U0U on byte data warn?
-    {
-       use warnings qw(NONFATAL all);;
-
-        my $bad = pack("U0C", 202);
-        local $SIG{__WARN__} = sub { $@ = "@_" };
-        my @null = unpack('U0U', $bad);
-        like($@, qr/^Malformed UTF-8 character: /);
-    }
+    # does unpack U0U on byte data fail?
+    fresh_perl_like('my $bad = pack("U0C", 202); my @null = unpack("U0U", $bad);',
+                    qr/^Malformed UTF-8 character: /,
+                    {},
+                    "pack doesn't return malformed UTF-8");
 }
 
 {