From c728cb41064fc29351e896c61d746060cee08d64 Mon Sep 17 00:00:00 2001 From: Jarkko Hietaniemi Date: Mon, 7 Jan 2002 04:44:05 +0000 Subject: [PATCH] More regex and utf8 debug dumping. p4raw-id: //depot/perl@14114 --- dump.c | 7 ++++--- regcomp.c | 10 +++++++--- regexec.c | 17 +++++++++-------- utf8.c | 29 ++++++++++++++++++++++++++--- utf8.h | 3 +++ 5 files changed, 49 insertions(+), 17 deletions(-) diff --git a/dump.c b/dump.c index 290ee7a..0279107 100644 --- a/dump.c +++ b/dump.c @@ -279,7 +279,8 @@ Perl_sv_peek(pTHX_ SV *sv) Perl_sv_catpvf(aTHX_ t, "%s)", pv_display(tmp, SvPVX(sv), SvCUR(sv), SvLEN(sv), 127)); if (SvUTF8(sv)) Perl_sv_catpvf(aTHX_ t, " [UTF8 \"%s\"]", - sv_uni_display(tmp, sv, 8 * sv_len_utf8(sv), 0)); + sv_uni_display(tmp, sv, 8 * sv_len_utf8(sv), + UNI_DISPLAY_QQ)); SvREFCNT_dec(tmp); } } @@ -1115,7 +1116,7 @@ Perl_do_sv_dump(pTHX_ I32 level, PerlIO *file, SV *sv, I32 nest, I32 maxnest, bo PerlIO_printf(file, "( %s . ) ", pv_display(d, SvPVX(sv)-SvIVX(sv), SvIVX(sv), 0, pvlim)); PerlIO_printf(file, "%s", pv_display(d, SvPVX(sv), SvCUR(sv), SvLEN(sv), pvlim)); if (SvUTF8(sv)) /* the 8? \x{....} */ - PerlIO_printf(file, " [UTF8 \"%s\"]", sv_uni_display(d, sv, 8 * sv_len_utf8(sv), 0)); + PerlIO_printf(file, " [UTF8 \"%s\"]", sv_uni_display(d, sv, 8 * sv_len_utf8(sv), UNI_DISPLAY_QQ)); PerlIO_printf(file, "\n"); Perl_dump_indent(aTHX_ level, file, " CUR = %"IVdf"\n", (IV)SvCUR(sv)); Perl_dump_indent(aTHX_ level, file, " LEN = %"IVdf"\n", (IV)SvLEN(sv)); @@ -1247,7 +1248,7 @@ Perl_do_sv_dump(pTHX_ I32 level, PerlIO *file, SV *sv, I32 nest, I32 maxnest, bo elt = hv_iterval(hv, he); Perl_dump_indent(aTHX_ level+1, file, "Elt %s ", pv_display(d, keypv, len, 0, pvlim)); if (SvUTF8(keysv)) - PerlIO_printf(file, "[UTF8 \"%s\"] ", sv_uni_display(d, keysv, 8 * sv_len_utf8(keysv), 0)); + PerlIO_printf(file, "[UTF8 \"%s\"] ", sv_uni_display(d, keysv, 8 * sv_len_utf8(keysv), UNI_DISPLAY_QQ)); PerlIO_printf(file, "HASH = 0x%"UVxf"\n", (UV)hash); do_sv_dump(level+1, file, elt, nest+1, maxnest, dumpops, pvlim); } diff --git a/regcomp.c b/regcomp.c index 3459e0a..07b11ee 100644 --- a/regcomp.c +++ b/regcomp.c @@ -4557,9 +4557,13 @@ Perl_regprop(pTHX_ SV *sv, regnode *o) if (k == EXACT) { SV *dsv = sv_2mortal(newSVpvn("", 0)); - bool do_utf8 = DO_UTF8(sv); + /* Using is_utf8_string() is a crude hack but it may + * be the best for now since we have no flag "this EXACTish + * node was UTF-8" --jhi */ + bool do_utf8 = is_utf8_string((U8*)STRING(o), STR_LEN(o)); char *s = do_utf8 ? - pv_uni_display(dsv, (U8*)STRING(o), STR_LEN(o), 60, 0) : + pv_uni_display(dsv, (U8*)STRING(o), STR_LEN(o), 60, + UNI_DISPLAY_REGEX) : STRING(o); int len = do_utf8 ? strlen(s) : @@ -4750,7 +4754,7 @@ Perl_pregfree(pTHX_ struct regexp *r) return; DEBUG_r({ char *s = pv_uni_display(dsv, (U8*)r->precomp, r->prelen, 60, - UNI_DISPLAY_ISPRINT); + UNI_DISPLAY_REGEX); int len = SvCUR(dsv); if (!PL_colorset) reginitcolors(); diff --git a/regexec.c b/regexec.c index ee8f602..203c8e9 100644 --- a/regexec.c +++ b/regexec.c @@ -401,7 +401,8 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, DEBUG_r({ char *s = PL_reg_match_utf8 ? - sv_uni_display(dsv, sv, 60, 0) : strpos; + sv_uni_display(dsv, sv, 60, UNI_DISPLAY_REGEX) : + strpos; int len = PL_reg_match_utf8 ? strlen(s) : strend - strpos; if (!PL_colorset) @@ -1626,11 +1627,11 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * DEBUG_r({ char *s0 = UTF ? pv_uni_display(dsv0, (U8*)prog->precomp, prog->prelen, 60, - UNI_DISPLAY_ISPRINT) : + UNI_DISPLAY_REGEX) : prog->precomp; int len0 = UTF ? SvCUR(dsv0) : prog->prelen; char *s1 = do_utf8 ? sv_uni_display(dsv1, sv, 60, - UNI_DISPLAY_ISPRINT) : startpos; + UNI_DISPLAY_REGEX) : startpos; int len1 = do_utf8 ? SvCUR(dsv1) : strend - startpos; if (!PL_colorset) reginitcolors(); @@ -1822,11 +1823,11 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * regprop(prop, c); s0 = UTF ? pv_uni_display(dsv0, (U8*)SvPVX(prop), SvCUR(prop), 60, - UNI_DISPLAY_ISPRINT) : + UNI_DISPLAY_REGEX) : SvPVX(prop); len0 = UTF ? SvCUR(dsv0) : SvCUR(prop); s1 = UTF ? - sv_uni_display(dsv1, sv, 60, UNI_DISPLAY_ISPRINT) : s; + sv_uni_display(dsv1, sv, 60, UNI_DISPLAY_REGEX) : s; len1 = UTF ? SvCUR(dsv1) : strend - s; PerlIO_printf(Perl_debug_log, "Matching stclass `%*.*s' against `%*.*s'\n", @@ -2197,17 +2198,17 @@ S_regmatch(pTHX_ regnode *prog) char *s0 = do_utf8 ? pv_uni_display(dsv0, (U8*)(locinput - pref_len), - pref0_len, 60, 0) : + pref0_len, 60, UNI_DISPLAY_REGEX) : locinput - pref_len; int len0 = do_utf8 ? strlen(s0) : pref0_len; char *s1 = do_utf8 ? pv_uni_display(dsv1, (U8*)(locinput - pref_len + pref0_len), - pref_len - pref0_len, 60, 0) : + pref_len - pref0_len, 60, UNI_DISPLAY_REGEX) : locinput - pref_len + pref0_len; int len1 = do_utf8 ? strlen(s1) : pref_len - pref0_len; char *s2 = do_utf8 ? pv_uni_display(dsv2, (U8*)locinput, - PL_regeol - locinput, 60, 0) : + PL_regeol - locinput, 60, UNI_DISPLAY_REGEX) : locinput; int len2 = do_utf8 ? strlen(s2) : l; PerlIO_printf(Perl_debug_log, diff --git a/utf8.c b/utf8.c index 0a25c03..8258ef5 100644 --- a/utf8.c +++ b/utf8.c @@ -1677,14 +1677,37 @@ Perl_pv_uni_display(pTHX_ SV *dsv, U8 *spv, STRLEN len, STRLEN pvlim, UV flags) sv_setpvn(dsv, "", 0); for (s = (char *)spv, e = s + len; s < e; s += UTF8SKIP(s)) { UV u; + bool ok = FALSE; + if (pvlim && SvCUR(dsv) >= pvlim) { truncated++; break; } u = utf8_to_uvchr((U8*)s, 0); - if ((flags & UNI_DISPLAY_ISPRINT) && u < 256 && isprint(u)) - Perl_sv_catpvf(aTHX_ dsv, "%c", u); - else + if (u < 256) { + if (!ok && (flags & UNI_DISPLAY_ISPRINT) && isprint(u & 0xFF)) { + Perl_sv_catpvf(aTHX_ dsv, "%c", u); + ok = TRUE; + } + if (!ok && (flags & UNI_DISPLAY_BACKSLASH)) { + switch (u & 0xFF) { + case '\n': + Perl_sv_catpvf(aTHX_ dsv, "\\n"); ok = TRUE; break; + case '\r': + Perl_sv_catpvf(aTHX_ dsv, "\\r"); ok = TRUE; break; + case '\t': + Perl_sv_catpvf(aTHX_ dsv, "\\t"); ok = TRUE; break; + case '\f': + Perl_sv_catpvf(aTHX_ dsv, "\\f"); ok = TRUE; break; + case '\a': + Perl_sv_catpvf(aTHX_ dsv, "\\a"); ok = TRUE; break; + case '\\': + Perl_sv_catpvf(aTHX_ dsv, "\\" ); ok = TRUE; break; + default: break; + } + } + } + if (!ok) Perl_sv_catpvf(aTHX_ dsv, "\\x{%"UVxf"}", u); } if (truncated) diff --git a/utf8.h b/utf8.h index 96f1b74..8c27afa 100644 --- a/utf8.h +++ b/utf8.h @@ -194,4 +194,7 @@ END_EXTERN_C #define UNICODE_GREEK_SMALL_LETTER_SIGMA 0x03C3 #define UNI_DISPLAY_ISPRINT 0x0001 +#define UNI_DISPLAY_BACKSLASH 0x0002 +#define UNI_DISPLAY_QQ (UNI_DISPLAY_ISPRINT|UNI_DISPLAY_BACKSLASH) +#define UNI_DISPLAY_REGEX (UNI_DISPLAY_ISPRINT|UNI_DISPLAY_BACKSLASH) -- 1.8.3.1