This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Don't assume input UTF-8 is well-formed in to_utf8_case()
authorKarl Williamson <khw@cpan.org>
Mon, 19 Dec 2016 18:46:10 +0000 (11:46 -0700)
committerKarl Williamson <khw@cpan.org>
Sat, 24 Dec 2016 05:52:43 +0000 (22:52 -0700)
This is a deprecated function, but it still should check input validity
as best it can.

This also adds to the pod that it will be removed in 5.28.

utf8.c

diff --git a/utf8.c b/utf8.c
index dc4c5b8..cf7a690 100644 (file)
--- a/utf8.c
+++ b/utf8.c
@@ -2732,6 +2732,8 @@ L</toTITLE_utf8_safe>,
 L</toLOWER_utf8_safe>,
 or L</toFOLD_utf8_safe>.
 
+This function will be removed in Perl v5.28.
+
 C<p> contains the pointer to the UTF-8 string encoding
 the character that is being converted.  This routine assumes that the character
 at C<p> is well-formed.
@@ -2763,9 +2765,19 @@ UV
 Perl_to_utf8_case(pTHX_ const U8 *p, U8* ustrp, STRLEN *lenp,
                        SV **swashp, const char *normal, const char *special)
 {
+    STRLEN len_cp;
+    UV cp;
+    const U8 * e = p + UTF8SKIP(p);
+
     PERL_ARGS_ASSERT_TO_UTF8_CASE;
 
-    return _to_utf8_case(valid_utf8_to_uvchr(p, NULL), p, ustrp, lenp, swashp, normal, special);
+    cp = utf8n_to_uvchr(p, e - p, &len_cp, UTF8_CHECK_ONLY);
+    if (len_cp == (STRLEN) -1) {
+        _force_out_malformed_utf8_message(p, e,
+                                   _UTF8_NO_CONFIDENCE_IN_CURLEN, 1 /* Die */ );
+    }
+
+    return _to_utf8_case(cp, p, ustrp, lenp, swashp, normal, special);
 }
 
     /* change namve uv1 to 'from' */