This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
utf8.c: Forbid zero-length malformation under DEBUGGING
authorKarl Williamson <khw@cpan.org>
Mon, 19 Dec 2016 20:25:06 +0000 (13:25 -0700)
committerKarl Williamson <khw@cpan.org>
Fri, 23 Dec 2016 23:48:35 +0000 (16:48 -0700)
ext/XS-APItest/t/utf8.t
pod/perldelta.pod
utf8.c

index 1b0b1da..7d640b1 100644 (file)
@@ -1199,10 +1199,12 @@ my $REPLACEMENT = 0xFFFD;
 my @malformations = (
     # ($testname, $bytes, $length, $allow_flags, $expected_error_flags,
     #  $allowed_uv, $expected_len, $needed_to_discern_len, $message )
-    [ "zero length string malformation", "", 0,
-        $UTF8_ALLOW_EMPTY, $UTF8_GOT_EMPTY, 0, 0, 0,
-        qr/empty string/
-    ],
+
+# Now considered a program bug, and asserted against
+    #[ "zero length string malformation", "", 0,
+    #    $UTF8_ALLOW_EMPTY, $UTF8_GOT_EMPTY, 0, 0, 0,
+    #    qr/empty string/
+    #],
     [ "orphan continuation byte malformation", I8_to_native("${I8c}a"), 2,
         $UTF8_ALLOW_CONTINUATION, $UTF8_GOT_CONTINUATION, $REPLACEMENT,
         1, 1,
index 2569c69..b5f5645 100644 (file)
@@ -339,6 +339,13 @@ L<perlapi/Character classification>.
 Calling macros like C<isALPHA_utf8> on malformed UTF-8 have issued a
 deprecation warning since Perl v5.18.  They now die.
 
+=item *
+
+Calling the functions C<utf8n_to_uvchr> and its derivatives, while
+passing a string length of 0, and specifying that this is allowed is now
+asserted against in DEBUGGING builds.  If you have nothing to decode,
+you shouldn't call the decode function.
+
 =back
 
 =head1 Selected Bug Fixes
diff --git a/utf8.c b/utf8.c
index 44aada5..2312648 100644 (file)
--- a/utf8.c
+++ b/utf8.c
@@ -1398,6 +1398,12 @@ Perl_utf8n_to_uvchr_error(pTHX_ const U8 *s,
                 *errors |= UTF8_GOT_EMPTY;
 
                 if (! (flags & UTF8_ALLOW_EMPTY)) {
+
+                    /* This so-called malformation is now treated as a bug in
+                     * the caller.  If you have nothing to decode, skip calling
+                     * this function */
+                    assert(0);
+
                     disallowed = TRUE;
                     if (ckWARN_d(WARN_UTF8) && ! (flags & UTF8_CHECK_ONLY)) {
                         pack_warn = packWARN(WARN_UTF8);