This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
utf8.c: Consolidate duplicate error msg text
authorKarl Williamson <khw@cpan.org>
Wed, 28 Sep 2016 16:19:03 +0000 (10:19 -0600)
committerKarl Williamson <khw@cpan.org>
Thu, 13 Oct 2016 17:18:11 +0000 (11:18 -0600)
This text is generated in 2 places; consolidate into one place.

embed.fnc
embed.h
proto.h
utf8.c

index e4c4e30..43a17de 100644 (file)
--- a/embed.fnc
+++ b/embed.fnc
@@ -1673,6 +1673,8 @@ ApdD      |UV     |to_utf8_case   |NN const U8 *p                                 \
                                |NN const char *normal|                         \
                                NULLOK const char *special
 #if defined(PERL_IN_UTF8_C)
+sMR    |char * |unexpected_non_continuation_text                       \
+               |NN const U8 * const s|const STRLEN len
 s      |UV     |_to_utf8_case  |const UV uv1                                   \
                                |NN const U8 *p                                 \
                                |NN U8* ustrp                                   \
diff --git a/embed.h b/embed.h
index 31d0548..6045ec7 100644 (file)
--- a/embed.h
+++ b/embed.h
 #define swash_scan_list_line(a,b,c,d,e,f,g)    S_swash_scan_list_line(aTHX_ a,b,c,d,e,f,g)
 #define swatch_get(a,b,c)      S_swatch_get(aTHX_ a,b,c)
 #define to_lower_latin1                S_to_lower_latin1
+#define unexpected_non_continuation_text(a,b)  S_unexpected_non_continuation_text(aTHX_ a,b)
 #  endif
 #  if defined(PERL_IN_UTF8_C) || defined(PERL_IN_PP_C)
 #define _to_upper_title_latin1(a,b,c,d)        Perl__to_upper_title_latin1(aTHX_ a,b,c,d)
diff --git a/proto.h b/proto.h
index 9f504ea..6491d1f 100644 (file)
--- a/proto.h
+++ b/proto.h
@@ -5604,6 +5604,11 @@ STATIC SV*       S_swatch_get(pTHX_ SV* swash, UV start, UV span)
 STATIC U8      S_to_lower_latin1(const U8 c, U8 *p, STRLEN *lenp)
                        __attribute__warn_unused_result__;
 
+STATIC char *  S_unexpected_non_continuation_text(pTHX_ const U8 * const s, const STRLEN len)
+                       __attribute__warn_unused_result__;
+#define PERL_ARGS_ASSERT_UNEXPECTED_NON_CONTINUATION_TEXT      \
+       assert(s)
+
 #endif
 #if defined(PERL_IN_UTF8_C) || defined(PERL_IN_PP_C)
 PERL_CALLCONV UV       Perl__to_upper_title_latin1(pTHX_ const U8 c, U8 *p, STRLEN *lenp, const char S_or_s);
diff --git a/utf8.c b/utf8.c
index 02f7716..2c3ecf7 100644 (file)
--- a/utf8.c
+++ b/utf8.c
@@ -33,8 +33,9 @@
 #include "perl.h"
 #include "invlist_inline.h"
 
+static const char malformed_text[] = "Malformed UTF-8 character";
 static const char unees[] =
-    "Malformed UTF-8 character (unexpected end of string)";
+                        "Malformed UTF-8 character (unexpected end of string)";
 static const char cp_above_legal_max[] =
  "Use of code point 0x%"UVXf" is deprecated; the permissible max is 0x%"UVXf"";
 
@@ -655,6 +656,23 @@ Perl__is_utf8_char_helper(const U8 * const s, const U8 * e, const U32 flags)
 #undef FE_ABOVE_OVERLONG
 #undef FF_OVERLONG_PREFIX
 
+PERL_STATIC_INLINE char *
+S_unexpected_non_continuation_text(pTHX_ const U8 * const s, const STRLEN len)
+{
+    /* Return the malformation warning text for an unexpected continuation
+     * byte. */
+
+    const char * const where = (len == 1)
+                               ? "immediately"
+                               : Perl_form(aTHX_ "%d bytes", (int) len);
+
+    PERL_ARGS_ASSERT_UNEXPECTED_NON_CONTINUATION_TEXT;
+
+    return Perl_form(aTHX_ "%s (unexpected non-continuation byte 0x%02x,"
+                           " %s after start byte 0x%02x)",
+                           malformed_text, *(s + len), where, *s);
+}
+
 /*
 
 =for apidoc utf8n_to_uvchr
@@ -776,7 +794,6 @@ Perl_utf8n_to_uvchr(pTHX_ const U8 *s, STRLEN curlen, STRLEN *retlen, U32 flags)
     bool overflowed = FALSE;
     bool do_overlong_test = TRUE;   /* May have to skip this test */
 
-    const char* const malformed_text = "Malformed UTF-8 character";
 
     PERL_ARGS_ASSERT_UTF8N_TO_UVCHR;
 
@@ -906,12 +923,8 @@ Perl_utf8n_to_uvchr(pTHX_ const U8 *s, STRLEN curlen, STRLEN *retlen, U32 flags)
     if (UNLIKELY(unexpected_non_continuation)) {
        if (!(flags & UTF8_ALLOW_NON_CONTINUATION)) {
            if (! (flags & UTF8_CHECK_ONLY)) {
-               if (curlen == 1) {
-                   sv = sv_2mortal(Perl_newSVpvf(aTHX_ "%s (unexpected non-continuation byte 0x%02x, immediately after start byte 0x%02x)", malformed_text, *s, *s0));
-               }
-               else {
-                   sv = sv_2mortal(Perl_newSVpvf(aTHX_ "%s (unexpected non-continuation byte 0x%02x, %d bytes after start byte 0x%02x, expected %d bytes)", malformed_text, *s, (int) curlen, *s0, (int)expectlen));
-               }
+                sv = sv_2mortal(Perl_newSVpvf(aTHX_ "%s",
+                                unexpected_non_continuation_text(s0, curlen)));
            }
            goto malformed;
        }
@@ -1263,14 +1276,12 @@ Perl_bytes_cmp_utf8(pTHX_ const U8 *b, STRLEN blen, const U8 *u, STRLEN ulen)
                    if (UTF8_IS_CONTINUATION(c1)) {
                        c = EIGHT_BIT_UTF8_TO_NATIVE(c, c1);
                    } else {
+                        /* diag_listed_as: Malformed UTF-8 character (%s) */
                        Perl_ck_warner_d(aTHX_ packWARN(WARN_UTF8),
-                                        "Malformed UTF-8 character "
-                                        "(unexpected non-continuation byte 0x%02x"
-                                        ", immediately after start byte 0x%02x)"
-                                        /* Dear diag.t, it's in the pod.  */
-                                        "%s%s", c1, c,
-                                        PL_op ? " in " : "",
-                                        PL_op ? OP_DESC(PL_op) : "");
+                                    "%s %s%s",
+                                    unexpected_non_continuation_text(u - 1, 1),
+                                    PL_op ? " in " : "",
+                                    PL_op ? OP_DESC(PL_op) : "");
                        return -2;
                    }
                } else {