From daf6caf1ef25ff48f871fa1e53adcefc11bf1d08 Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Thu, 16 Jan 2020 16:14:40 -0700 Subject: [PATCH] pv_uni_display: Use common fcn; \b mnemonic This removes the (almost) duplicate code in this function to display mnemonics for control characters that have them. The reason the two pieces of code aren't precisely the same is that the other function also uses \b as a mnemonic for backspace. Using all possible mnemonics is desirable, so a flag is added for pv_uni_display to now use \b. This is now by default enabled in double-quoted strings, but not regex patterns (as \b there means something quite different except in character classes). B.pm is changed to expect \b. --- ext/B/B.pm | 2 +- ext/B/B.xs | 2 +- ext/B/t/b.t | 1 - utf8.c | 48 ++++++++++++++++++++---------------------------- utf8.h | 8 +++++++- 5 files changed, 29 insertions(+), 32 deletions(-) diff --git a/ext/B/B.pm b/ext/B/B.pm index 8eb749c..f199a05 100644 --- a/ext/B/B.pm +++ b/ext/B/B.pm @@ -20,7 +20,7 @@ sub import { # walkoptree comes from B.xs BEGIN { - $B::VERSION = '1.78'; + $B::VERSION = '1.79'; @B::EXPORT_OK = (); # Our BOOT code needs $VERSION set, and will append to @EXPORT_OK. diff --git a/ext/B/B.xs b/ext/B/B.xs index 7bd8353..b3d04b8 100644 --- a/ext/B/B.xs +++ b/ext/B/B.xs @@ -258,7 +258,7 @@ cstring(pTHX_ SV *sv, bool perlstyle) sv_catpvs(sstr, "\\@"); else if (*s == '\\') { - if (memCHRs("nrftax\\",*(s+1))) + if (memCHRs("nrftabx\\",*(s+1))) sv_catpvn(sstr, s++, 2); else sv_catpvs(sstr, "\\\\"); diff --git a/ext/B/t/b.t b/ext/B/t/b.t index d1dba06..aa67fd3 100644 --- a/ext/B/t/b.t +++ b/ext/B/t/b.t @@ -290,7 +290,6 @@ is(B::opnumber("pp_null"), 0, "Testing opnumber with opname (pp_null)"); while (my ($test, $expect) = splice @tests, 0, 2) { is(B::perlstring($test), $expect, "B::perlstring($expect)"); utf8::upgrade $test; - $expect =~ s/\\b/sprintf("\\x{%x}", utf8::unicode_to_native(8))/eg; $expect =~ s/\\([0-7]{3})/sprintf "\\x\{%x\}", oct $1/eg; is(B::perlstring($test), $expect, "B::perlstring($expect) (Unicode)"); } diff --git a/utf8.c b/utf8.c index 7b82985..a67c987 100644 --- a/utf8.c +++ b/utf8.c @@ -4053,9 +4053,9 @@ Perl_check_utf8_print(pTHX_ const U8* s, const STRLEN len) /* =for apidoc pv_uni_display -Build to the scalar C a displayable version of the string C, -length C, the displayable version being at most C bytes long -(if longer, the rest is truncated and C<"..."> will be appended). +Build to the scalar C a displayable version of the UTF-8 encoded string +C, length C, the displayable version being at most C bytes +long (if longer, the rest is truncated and C<"..."> will be appended). The C argument can have C set to display Cable characters as themselves, C @@ -4064,6 +4064,9 @@ to display the C<\\[nrfta\\]> as the backslashed versions (like C<"\n">) C (and its alias C) have both C and C turned on. +Additionally, there is now C which allows C<\b> for a +backspace, but only when C also is set. + The pointer to the PV of the C is returned. See also L. @@ -4082,10 +4085,7 @@ Perl_pv_uni_display(pTHX_ SV *dsv, const U8 *spv, STRLEN len, STRLEN pvlim, SvUTF8_off(dsv); for (s = (const char *)spv, e = s + len; s < e; s += UTF8SKIP(s)) { UV u; - /* This serves double duty as a flag and a character to print after - a \ when flags & UNI_DISPLAY_BACKSLASH is true. - */ - char ok = 0; + bool ok = 0; if (pvlim && SvCUR(dsv) >= pvlim) { truncated++; @@ -4095,27 +4095,19 @@ Perl_pv_uni_display(pTHX_ SV *dsv, const U8 *spv, STRLEN len, STRLEN pvlim, if (u < 256) { const unsigned char c = (unsigned char)u & 0xFF; if (flags & UNI_DISPLAY_BACKSLASH) { - switch (c) { - case '\n': - ok = 'n'; break; - case '\r': - ok = 'r'; break; - case '\t': - ok = 't'; break; - case '\f': - ok = 'f'; break; - case '\a': - ok = 'a'; break; - case '\\': - ok = '\\'; break; - default: break; - } - if (ok) { - const char string = ok; - sv_catpvs(dsv, "\\"); - sv_catpvn(dsv, &string, 1); - } - } + if ( isMNEMONIC_CNTRL(c) + && ( c != '\b' + || (flags & UNI_DISPLAY_BACKSPACE))) + { + const char * mnemonic = cntrl_to_mnemonic(c); + sv_catpvn(dsv, mnemonic, strlen(mnemonic)); + ok = 1; + } + else if (c == '\\') { + sv_catpvs(dsv, "\\\\"); + ok = 1; + } + } /* isPRINT() is the locale-blind version. */ if (!ok && (flags & UNI_DISPLAY_ISPRINT) && isPRINT(c)) { const char string = c; diff --git a/utf8.h b/utf8.h index fa036f0..fb83507 100644 --- a/utf8.h +++ b/utf8.h @@ -1009,7 +1009,13 @@ Evaluates to 0xFFFD, the code point of the Unicode REPLACEMENT CHARACTER #define UNI_DISPLAY_ISPRINT 0x0001 #define UNI_DISPLAY_BACKSLASH 0x0002 -#define UNI_DISPLAY_QQ (UNI_DISPLAY_ISPRINT|UNI_DISPLAY_BACKSLASH) +#define UNI_DISPLAY_BACKSPACE 0x0004 /* Allow \b when also + UNI_DISPLAY_BACKSLASH */ +#define UNI_DISPLAY_QQ (UNI_DISPLAY_ISPRINT \ + |UNI_DISPLAY_BACKSLASH \ + |UNI_DISPLAY_BACKSPACE) + +/* Character classes could also allow \b, but not patterns in general */ #define UNI_DISPLAY_REGEX (UNI_DISPLAY_ISPRINT|UNI_DISPLAY_BACKSLASH) #define ANYOF_FOLD_SHARP_S(node, input, end) \ -- 1.8.3.1