This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Make _byte_dump_string() usable in all of core
authorKarl Williamson <khw@cpan.org>
Tue, 14 Feb 2017 02:57:53 +0000 (19:57 -0700)
committerKarl Williamson <khw@cpan.org>
Tue, 14 Feb 2017 04:24:11 +0000 (21:24 -0700)
I found myself needing this function for development debugging, which
formerly was only usable from utf8.c.  This enhances it to allow a
second format type, and makes it core-accessible.

embed.fnc
embed.h
locale.c
proto.h
utf8.c

index a384c36..0f63ed0 100644 (file)
--- a/embed.fnc
+++ b/embed.fnc
@@ -1715,6 +1715,10 @@ ApdD     |UV     |to_utf8_case   |NN const U8 *p                                 \
                                |NN SV **swashp                                 \
                                |NN const char *normal|                         \
                                NULLOK const char *special
+ApM    |char * |_byte_dump_string                                      \
+                               |NN const U8 * s                        \
+                               |const STRLEN len                       \
+                               |const bool format
 #if defined(PERL_IN_UTF8_C)
 inR    |bool   |does_utf8_overflow|NN const U8 * const s|NN const U8 * e
 inR    |bool   |is_utf8_overlong_given_start_byte_ok|NN const U8 * const s|const STRLEN len
@@ -1724,7 +1728,6 @@ sMR       |char * |unexpected_non_continuation_text                       \
                |STRLEN print_len                                       \
                |const STRLEN non_cont_byte_pos                         \
                |const STRLEN expect_len
-sM     |char * |_byte_dump_string|NN const U8 * s|const STRLEN len
 s      |void   |warn_on_first_deprecated_use                               \
                                |NN const char * const name                 \
                                |NN const char * const alternative          \
diff --git a/embed.h b/embed.h
index 2bbfc64..5b9c46c 100644 (file)
--- a/embed.h
+++ b/embed.h
@@ -27,6 +27,7 @@
 /* Hide global symbols */
 
 #define Gv_AMupdate(a,b)       Perl_Gv_AMupdate(aTHX_ a,b)
+#define _byte_dump_string(a,b,c)       Perl__byte_dump_string(aTHX_ a,b,c)
 #define _force_out_malformed_utf8_message(a,b,c,d)     Perl__force_out_malformed_utf8_message(aTHX_ a,b,c,d)
 #define _is_in_locale_category(a,b)    Perl__is_in_locale_category(aTHX_ a,b)
 #define _is_uni_FOO(a,b)       Perl__is_uni_FOO(aTHX_ a,b)
 #define isa_lookup(a,b,c,d)    S_isa_lookup(aTHX_ a,b,c,d)
 #  endif
 #  if defined(PERL_IN_UTF8_C)
-#define _byte_dump_string(a,b) S__byte_dump_string(aTHX_ a,b)
 #define _to_utf8_case(a,b,c,d,e,f,g)   S__to_utf8_case(aTHX_ a,b,c,d,e,f,g)
 #define check_and_deprecate(a,b,c,d,e,f)       S_check_and_deprecate(aTHX_ a,b,c,d,e,f)
 #define check_locale_boundary_crossing(a,b,c,d)        S_check_locale_boundary_crossing(aTHX_ a,b,c,d)
index 01962ea..1ba802f 100644 (file)
--- a/locale.c
+++ b/locale.c
@@ -1908,14 +1908,12 @@ Perl__mem_collxfrm(pTHX_ const char *input_string,
 
 #ifdef DEBUGGING
     if (DEBUG_Lv_TEST || debug_initialization) {
-        Size_t i;
 
         print_collxfrm_input_and_return(s, s + len, xlen, utf8);
         PerlIO_printf(Perl_debug_log, "Its xfrm is:");
-        for (i = COLLXFRM_HDR_LEN; i < *xlen + COLLXFRM_HDR_LEN; i++) {
-            PerlIO_printf(Perl_debug_log, " %02x", (U8) xbuf[i]);
-        }
-        PerlIO_printf(Perl_debug_log, "\n");
+        PerlIO_printf(Perl_debug_log, "%s\n",
+                      _byte_dump_string((U8 *) xbuf + COLLXFRM_HDR_LEN,
+                       *xlen, 1));
     }
 #endif
 
diff --git a/proto.h b/proto.h
index 8281563..c61980e 100644 (file)
--- a/proto.h
+++ b/proto.h
@@ -41,6 +41,9 @@ PERL_CALLCONV void*   Perl_Slab_Alloc(pTHX_ size_t sz)
 PERL_CALLCONV void     Perl_Slab_Free(pTHX_ void *op);
 #define PERL_ARGS_ASSERT_SLAB_FREE     \
        assert(op)
+PERL_CALLCONV char *   Perl__byte_dump_string(pTHX_ const U8 * s, const STRLEN len, const bool format);
+#define PERL_ARGS_ASSERT__BYTE_DUMP_STRING     \
+       assert(s)
 PERL_CALLCONV void     Perl__force_out_malformed_utf8_message(pTHX_ const U8 *const p, const U8 * const e, const U32 flags, const bool die_here);
 #define PERL_ARGS_ASSERT__FORCE_OUT_MALFORMED_UTF8_MESSAGE     \
        assert(p); assert(e)
@@ -5624,9 +5627,6 @@ STATIC bool       S_isa_lookup(pTHX_ HV *stash, const char * const name, STRLEN len, U
        assert(stash); assert(name)
 #endif
 #if defined(PERL_IN_UTF8_C)
-STATIC char *  S__byte_dump_string(pTHX_ const U8 * s, const STRLEN len);
-#define PERL_ARGS_ASSERT__BYTE_DUMP_STRING     \
-       assert(s)
 STATIC UV      S__to_utf8_case(pTHX_ const UV uv1, const U8 *p, U8* ustrp, STRLEN *lenp, SV **swashp, const char *normal, const char *special);
 #define PERL_ARGS_ASSERT__TO_UTF8_CASE \
        assert(p); assert(ustrp); assert(swashp); assert(normal)
diff --git a/utf8.c b/utf8.c
index bec68a5..89c8413 100644 (file)
--- a/utf8.c
+++ b/utf8.c
@@ -754,11 +754,15 @@ Perl__is_utf8_char_helper(const U8 * const s, const U8 * e, const U32 flags)
     return UTF8SKIP(s);
 }
 
-STATIC char *
-S__byte_dump_string(pTHX_ const U8 * s, const STRLEN len)
+char *
+Perl__byte_dump_string(pTHX_ const U8 * s, const STRLEN len, const bool format)
 {
     /* Returns a mortalized C string that is a displayable copy of the 'len'
-     * bytes starting at 's', each in a \xXY format. */
+     * bytes starting at 's'.  'format' gives how to display each byte.
+     * Currently, there are only two formats, so it is currently a bool:
+     *      0   \xab
+     *      1    ab         (that is a space between two hex digit bytes)
+     */
 
     const STRLEN output_len = 4 * len + 1;  /* 4 bytes per each input, plus a
                                                trailing NUL */
@@ -776,8 +780,13 @@ S__byte_dump_string(pTHX_ const U8 * s, const STRLEN len)
         const unsigned high_nibble = (*s & 0xF0) >> 4;
         const unsigned low_nibble =  (*s & 0x0F);
 
-        *d++ = '\\';
-        *d++ = 'x';
+        if (format) {
+            *d++ = ' ';
+        }
+        else {
+            *d++ = '\\';
+            *d++ = 'x';
+        }
 
         if (high_nibble < 10) {
             *d++ = high_nibble + '0';
@@ -827,7 +836,7 @@ S_unexpected_non_continuation_text(pTHX_ const U8 * const s,
     return Perl_form(aTHX_ "%s: %s (unexpected non-continuation byte 0x%02x,"
                            " %s after start byte 0x%02x; need %d bytes, got %d)",
                            malformed_text,
-                           _byte_dump_string(s, print_len),
+                           _byte_dump_string(s, print_len, 0),
                            *(s + non_cont_byte_pos),
                            where,
                            *s,
@@ -1401,7 +1410,7 @@ Perl_utf8n_to_uvchr_error(pTHX_ const U8 *s,
                         if (pack_warn) {
                             message = Perl_form(aTHX_ "%s: %s (overflows)",
                                             malformed_text,
-                                            _byte_dump_string(s0, send - s0));
+                                            _byte_dump_string(s0, send - s0, 0));
                         }
                     }
                 }
@@ -1437,7 +1446,7 @@ Perl_utf8n_to_uvchr_error(pTHX_ const U8 *s,
                                 "%s: %s (unexpected continuation byte 0x%02x,"
                                 " with no preceding start byte)",
                                 malformed_text,
-                                _byte_dump_string(s0, 1), *s0);
+                                _byte_dump_string(s0, 1, 0), *s0);
                     }
                 }
             }
@@ -1452,7 +1461,7 @@ Perl_utf8n_to_uvchr_error(pTHX_ const U8 *s,
                         message = Perl_form(aTHX_
                                 "%s: %s (too short; %d byte%s available, need %d)",
                                 malformed_text,
-                                _byte_dump_string(s0, send - s0),
+                                _byte_dump_string(s0, send - s0, 0),
                                 (int)avail_len,
                                 avail_len == 1 ? "" : "s",
                                 (int)expectlen);
@@ -1516,8 +1525,8 @@ Perl_utf8n_to_uvchr_error(pTHX_ const U8 *s,
                                     " should be represented with a"
                                     " different, shorter sequence)",
                                     malformed_text,
-                                    _byte_dump_string(s0, send - s0),
-                                    _byte_dump_string(s0, curlen));
+                                    _byte_dump_string(s0, send - s0, 0),
+                                    _byte_dump_string(s0, curlen, 0));
                         }
                         else {
                             U8 tmpbuf[UTF8_MAXBYTES+1];
@@ -1527,8 +1536,8 @@ Perl_utf8n_to_uvchr_error(pTHX_ const U8 *s,
                                 "%s: %s (overlong; instead use %s to represent"
                                 " U+%0*" UVXf ")",
                                 malformed_text,
-                                _byte_dump_string(s0, send - s0),
-                                _byte_dump_string(tmpbuf, e - tmpbuf),
+                                _byte_dump_string(s0, send - s0, 0),
+                                _byte_dump_string(tmpbuf, e - tmpbuf, 0),
                                 ((uv < 256) ? 2 : 4), /* Field width of 2 for
                                                          small code points */
                                 uv);
@@ -1553,7 +1562,7 @@ Perl_utf8n_to_uvchr_error(pTHX_ const U8 *s,
                             message = Perl_form(aTHX_
                                     "UTF-16 surrogate (any UTF-8 sequence that"
                                     " starts with \"%s\" is for a surrogate)",
-                                    _byte_dump_string(s0, curlen));
+                                    _byte_dump_string(s0, curlen, 0));
                         }
                         else {
                             message = Perl_form(aTHX_
@@ -1583,7 +1592,7 @@ Perl_utf8n_to_uvchr_error(pTHX_ const U8 *s,
                                     "Any UTF-8 sequence that starts with"
                                     " \"%s\" is for a non-Unicode code point,"
                                     " may not be portable",
-                                    _byte_dump_string(s0, curlen));
+                                    _byte_dump_string(s0, curlen, 0));
                         }
                         else {
                             message = Perl_form(aTHX_
@@ -1622,7 +1631,7 @@ Perl_utf8n_to_uvchr_error(pTHX_ const U8 *s,
                                         "Any UTF-8 sequence that starts with"
                                         " \"%s\" is for a non-Unicode code"
                                         " point, and is not portable",
-                                        _byte_dump_string(s0, curlen));
+                                        _byte_dump_string(s0, curlen, 0));
                         }
                         else {
                             message = Perl_form(aTHX_