X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/54161612423d90fe313643089d18d7a3a98460c7..14975c4165fc5ce9497b8d4867b890ea7f7721a9:/dist/Data-Dumper/Dumper.xs diff --git a/dist/Data-Dumper/Dumper.xs b/dist/Data-Dumper/Dumper.xs index c1a7ec8..372c073 100644 --- a/dist/Data-Dumper/Dumper.xs +++ b/dist/Data-Dumper/Dumper.xs @@ -12,17 +12,23 @@ # define DD_USE_OLD_ID_FORMAT #endif +#ifndef isWORDCHAR +# define isWORDCHAR(c) isALNUM(c) +#endif + static I32 num_q (const char *s, STRLEN slen); static I32 esc_q (char *dest, const char *src, STRLEN slen); -static I32 esc_q_utf8 (pTHX_ SV *sv, const char *src, STRLEN slen); -static I32 needs_quote(register const char *s, STRLEN len); +static I32 esc_q_utf8 (pTHX_ SV *sv, const char *src, STRLEN slen, I32 do_utf8, I32 useqq); +static bool globname_needs_quote(const char *s, STRLEN len); +static bool key_needs_quote(const char *s, STRLEN len); +static bool safe_decimal_number(const char *p, STRLEN len); static SV *sv_x (pTHX_ SV *sv, const char *str, STRLEN len, I32 n); static I32 DD_dump (pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv, AV *postav, I32 *levelp, I32 indent, SV *pad, SV *xpad, SV *apad, SV *sep, SV *pair, SV *freezer, SV *toaster, I32 purity, I32 deepcopy, I32 quotekeys, SV *bless, - I32 maxdepth, SV *sortkeys, int use_sparse_seen_hash); + I32 maxdepth, SV *sortkeys, int use_sparse_seen_hash, I32 useqq, IV maxrecurse); #ifndef HvNAME_get #define HvNAME_get HvNAME @@ -87,39 +93,95 @@ Perl_utf8_to_uvchr_buf(pTHX_ U8 *s, U8 *send, STRLEN *retlen) #define DD_is_integer(sv) SvIOK(sv) #endif -/* does a string need to be protected? */ -static I32 -needs_quote(register const char *s, STRLEN len) +/* does a glob name need to be protected? */ +static bool +globname_needs_quote(const char *s, STRLEN len) { const char *send = s+len; TOP: if (s[0] == ':') { if (++s ). + Previously this used (globname_)needs_quote() which accepted strings + like '::foo', but these aren't safe as unquoted keys under strict. +*/ +static bool +key_needs_quote(const char *s, STRLEN len) { + const char *send = s+len; + + if (safe_decimal_number(s, len)) { + return FALSE; + } + else if (isIDFIRST(*s)) { + while (++s '9') + return FALSE; + + ++p; + --len; + + if (len > 8) + return FALSE; + + while (len > 0) { + /* the perl code checks /\d/ but we don't want unicode digits here */ + if (*p < '0' || *p > '9') + return FALSE; + ++p; + --len; + } + return TRUE; } /* count the number of "'"s and "\"s in string */ static I32 -num_q(register const char *s, register STRLEN slen) +num_q(const char *s, STRLEN slen) { - register I32 ret = 0; + I32 ret = 0; while (slen > 0) { if (*s == '\'' || *s == '\\') @@ -135,9 +197,9 @@ num_q(register const char *s, register STRLEN slen) /* slen number of characters in s will be escaped */ /* destination must be long enough for additional chars */ static I32 -esc_q(register char *d, register const char *s, register STRLEN slen) +esc_q(char *d, const char *s, STRLEN slen) { - register I32 ret = 0; + I32 ret = 0; while (slen > 0) { switch (*s) { @@ -145,6 +207,7 @@ esc_q(register char *d, register const char *s, register STRLEN slen) case '\\': *d = '\\'; ++d; ++ret; + /* FALLTHROUGH */ default: *d = *s; ++d; ++s; --slen; @@ -154,8 +217,9 @@ esc_q(register char *d, register const char *s, register STRLEN slen) return ret; } +/* this function is also misused for implementing $Useqq */ static I32 -esc_q_utf8(pTHX_ SV* sv, register const char *src, register STRLEN slen) +esc_q_utf8(pTHX_ SV* sv, const char *src, STRLEN slen, I32 do_utf8, I32 useqq) { char *r, *rstart; const char *s = src; @@ -170,14 +234,21 @@ esc_q_utf8(pTHX_ SV* sv, register const char *src, register STRLEN slen) STRLEN qq_escapables = 0; /* " $ @ will need a \ in "" strings. */ STRLEN normal = 0; int increment; + UV next; /* this will need EBCDICification */ - for (s = src; s < send; s += increment) { - const UV k = utf8_to_uvchr_buf((U8*)s, (U8*) send, NULL); + for (s = src; s < send; do_utf8 ? s += increment : s++) { + const UV k = do_utf8 ? utf8_to_uvchr_buf((U8*)s, (U8*) send, NULL) : *(U8*)s; /* check for invalid utf8 */ increment = (k == 0 && *s != '\0') ? 1 : UTF8SKIP(s); + /* this is only used to check if the next character is an + * ASCII digit, which are invariant, so if the following collects + * a UTF-8 start byte it does no harm + */ + next = (s + increment >= send ) ? 0 : *(U8*)(s+increment); + #ifdef EBCDIC if (!isprint(k) || k > 256) { #else @@ -191,6 +262,17 @@ esc_q_utf8(pTHX_ SV* sv, register const char *src, register STRLEN slen) k <= 0xFFFFFFFF ? 8 : UVSIZE * 4 #endif ); +#ifndef EBCDIC + } else if (useqq && + /* we can't use the short form like '\0' if followed by a digit */ + (((k >= 7 && k <= 10) || k == 12 || k == 13 || k == 27) + || (k < 8 && (next < '0' || next > '9')))) { + grow += 2; + } else if (useqq && k <= 31 && (next < '0' || next > '9')) { + grow += 3; + } else if (useqq && (k <= 31 || k >= 127)) { + grow += 4; +#endif } else if (k == '\\') { backslashes++; } else if (k == '\'') { @@ -201,7 +283,7 @@ esc_q_utf8(pTHX_ SV* sv, register const char *src, register STRLEN slen) normal++; } } - if (grow) { + if (grow || useqq) { /* We have something needing hex. 3 is ""\0 */ sv_grow(sv, cur + 3 + grow + 2*backslashes + single_quotes + 2*qq_escapables + normal); @@ -209,8 +291,8 @@ esc_q_utf8(pTHX_ SV* sv, register const char *src, register STRLEN slen) *r++ = '"'; - for (s = src; s < send; s += UTF8SKIP(s)) { - const UV k = utf8_to_uvchr_buf((U8*)s, (U8*) send, NULL); + for (s = src; s < send; do_utf8 ? s += UTF8SKIP(s) : s++) { + const UV k = do_utf8 ? utf8_to_uvchr_buf((U8*)s, (U8*) send, NULL) : *(U8*)s; if (k == '"' || k == '\\' || k == '$' || k == '@') { *r++ = '\\'; @@ -220,7 +302,44 @@ esc_q_utf8(pTHX_ SV* sv, register const char *src, register STRLEN slen) #ifdef EBCDIC if (isprint(k) && k < 256) #else - if (k < 0x80) + if (useqq && (k <= 31 || k == 127 || (!do_utf8 && k > 127))) { + bool next_is_digit; + + *r++ = '\\'; + switch (k) { + case 7: *r++ = 'a'; break; + case 8: *r++ = 'b'; break; + case 9: *r++ = 't'; break; + case 10: *r++ = 'n'; break; + case 12: *r++ = 'f'; break; + case 13: *r++ = 'r'; break; + case 27: *r++ = 'e'; break; + default: + increment = (k == 0 && *s != '\0') ? 1 : UTF8SKIP(s); + + /* only ASCII digits matter here, which are invariant, + * since we only encode characters \377 and under, or + * \x177 and under for a unicode string + */ + next = (s+increment < send) ? *(U8*)(s+increment) : 0; + next_is_digit = next >= '0' && next <= '9'; + + /* faster than + * r = r + my_sprintf(r, "%o", k); + */ + if (k <= 7 && !next_is_digit) { + *r++ = (char)k + '0'; + } else if (k <= 63 && !next_is_digit) { + *r++ = (char)(k>>3) + '0'; + *r++ = (char)(k&7) + '0'; + } else { + *r++ = (char)(k>>6) + '0'; + *r++ = (char)((k&63)>>3) + '0'; + *r++ = (char)(k&7) + '0'; + } + } + } + else if (k < 0x80) #endif *r++ = (char)k; else { @@ -260,7 +379,7 @@ static SV * sv_x(pTHX_ SV *sv, const char *str, STRLEN len, I32 n) { if (!sv) - sv = newSVpvn("", 0); + sv = newSVpvs(""); #ifdef DEBUGGING else assert(SvTYPE(sv) >= SVt_PV); @@ -294,10 +413,10 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv, AV *postav, I32 *levelp, I32 indent, SV *pad, SV *xpad, SV *apad, SV *sep, SV *pair, SV *freezer, SV *toaster, I32 purity, I32 deepcopy, I32 quotekeys, SV *bless, I32 maxdepth, SV *sortkeys, - int use_sparse_seen_hash) + int use_sparse_seen_hash, I32 useqq, IV maxrecurse) { char tmpbuf[128]; - U32 i; + Size_t i; char *c, *r, *realpack; #ifdef DD_USE_OLD_ID_FORMAT char id[128]; @@ -344,7 +463,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv, { dSP; ENTER; SAVETMPS; PUSHMARK(sp); XPUSHs(val); PUTBACK; - i = perl_call_method(SvPVX_const(freezer), G_EVAL|G_VOID); + i = perl_call_method(SvPVX_const(freezer), G_EVAL|G_VOID|G_DISCARD); SPAGAIN; if (SvTRUE(ERRSV)) warn("WARNING(Freezer method call failed): %"SVf"", ERRSV); @@ -379,13 +498,13 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv, SV *postentry; if (realtype == SVt_PVHV) - sv_catpvn(retval, "{}", 2); + sv_catpvs(retval, "{}"); else if (realtype == SVt_PVAV) - sv_catpvn(retval, "[]", 2); + sv_catpvs(retval, "[]"); else - sv_catpvn(retval, "do{my $o}", 9); + sv_catpvs(retval, "do{my $o}"); postentry = newSVpvn(name, namelen); - sv_catpvn(postentry, " = ", 3); + sv_catpvs(postentry, " = "); sv_catsv(postentry, othername); av_push(postav, postentry); } @@ -398,9 +517,9 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv, } else { sv_catpvn(retval, name, 1); - sv_catpvn(retval, "{", 1); + sv_catpvs(retval, "{"); sv_catsv(retval, othername); - sv_catpvn(retval, "}", 1); + sv_catpvs(retval, "}"); } } else @@ -420,11 +539,11 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv, else { /* store our name and continue */ SV *namesv; if (name[0] == '@' || name[0] == '%') { - namesv = newSVpvn("\\", 1); + namesv = newSVpvs("\\"); sv_catpvn(namesv, name, namelen); } else if (realtype == SVt_PVCV && name[0] == '*') { - namesv = newSVpvn("\\", 2); + namesv = newSVpvs("\\"); sv_catpvn(namesv, name, namelen); (SvPVX(namesv))[1] = '&'; } @@ -465,17 +584,21 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv, if (!purity && maxdepth > 0 && *levelp >= maxdepth) { STRLEN vallen; const char * const valstr = SvPV(val,vallen); - sv_catpvn(retval, "'", 1); + sv_catpvs(retval, "'"); sv_catpvn(retval, valstr, vallen); - sv_catpvn(retval, "'", 1); + sv_catpvs(retval, "'"); return 1; } + if (maxrecurse > 0 && *levelp >= maxrecurse) { + croak("Recursion limit of %" IVdf " exceeded", maxrecurse); + } + if (realpack && !no_bless) { /* we have a blessed ref */ STRLEN blesslen; const char * const blessstr = SvPV(bless, blesslen); sv_catpvn(retval, blessstr, blesslen); - sv_catpvn(retval, "( ", 2); + sv_catpvs(retval, "( "); if (indent >= 2) { blesspad = apad; apad = newSVsv(apad); @@ -489,21 +612,58 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv, if (is_regex) { STRLEN rlen; - const char *rval = SvPV(val, rlen); - const char * const rend = rval+rlen; - const char *slash = rval; - sv_catpvn(retval, "qr/", 3); + SV *sv_pattern = NULL; + SV *sv_flags = NULL; + CV *re_pattern_cv; + const char *rval; + const char *rend; + const char *slash; + + if ((re_pattern_cv = get_cv("re::regexp_pattern", 0))) { + dSP; + I32 count; + ENTER; + SAVETMPS; + PUSHMARK(SP); + XPUSHs(val); + PUTBACK; + count = call_sv((SV*)re_pattern_cv, G_ARRAY); + SPAGAIN; + if (count >= 2) { + sv_flags = POPs; + sv_pattern = POPs; + SvREFCNT_inc(sv_flags); + SvREFCNT_inc(sv_pattern); + } + PUTBACK; + FREETMPS; + LEAVE; + if (sv_pattern) { + sv_2mortal(sv_pattern); + sv_2mortal(sv_flags); + } + } + else { + sv_pattern = val; + } + assert(sv_pattern); + rval = SvPV(sv_pattern, rlen); + rend = rval+rlen; + slash = rval; + sv_catpvs(retval, "qr/"); for (;slash < rend; slash++) { if (*slash == '\\') { ++slash; continue; } if (*slash == '/') { sv_catpvn(retval, rval, slash-rval); - sv_catpvn(retval, "\\/", 2); + sv_catpvs(retval, "\\/"); rlen -= slash-rval+1; rval = slash+1; } } sv_catpvn(retval, rval, rlen); - sv_catpvn(retval, "/", 1); + sv_catpvs(retval, "/"); + if (sv_flags) + sv_catsv(retval, sv_flags); } else if ( #if PERL_VERSION < 9 @@ -512,41 +672,44 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv, realtype <= SVt_PVMG #endif ) { /* scalar ref */ - SV * const namesv = newSVpvn("${", 2); + SV * const namesv = newSVpvs("${"); sv_catpvn(namesv, name, namelen); - sv_catpvn(namesv, "}", 1); + sv_catpvs(namesv, "}"); if (realpack) { /* blessed */ - sv_catpvn(retval, "do{\\(my $o = ", 13); + sv_catpvs(retval, "do{\\(my $o = "); DD_dump(aTHX_ ival, SvPVX_const(namesv), SvCUR(namesv), retval, seenhv, postav, levelp, indent, pad, xpad, apad, sep, pair, freezer, toaster, purity, deepcopy, quotekeys, bless, - maxdepth, sortkeys, use_sparse_seen_hash); - sv_catpvn(retval, ")}", 2); + maxdepth, sortkeys, use_sparse_seen_hash, useqq, + maxrecurse); + sv_catpvs(retval, ")}"); } /* plain */ else { - sv_catpvn(retval, "\\", 1); + sv_catpvs(retval, "\\"); DD_dump(aTHX_ ival, SvPVX_const(namesv), SvCUR(namesv), retval, seenhv, postav, levelp, indent, pad, xpad, apad, sep, pair, freezer, toaster, purity, deepcopy, quotekeys, bless, - maxdepth, sortkeys, use_sparse_seen_hash); + maxdepth, sortkeys, use_sparse_seen_hash, useqq, + maxrecurse); } SvREFCNT_dec(namesv); } else if (realtype == SVt_PVGV) { /* glob ref */ - SV * const namesv = newSVpvn("*{", 2); + SV * const namesv = newSVpvs("*{"); sv_catpvn(namesv, name, namelen); - sv_catpvn(namesv, "}", 1); - sv_catpvn(retval, "\\", 1); + sv_catpvs(namesv, "}"); + sv_catpvs(retval, "\\"); DD_dump(aTHX_ ival, SvPVX_const(namesv), SvCUR(namesv), retval, seenhv, postav, levelp, indent, pad, xpad, apad, sep, pair, freezer, toaster, purity, deepcopy, quotekeys, bless, - maxdepth, sortkeys, use_sparse_seen_hash); + maxdepth, sortkeys, use_sparse_seen_hash, useqq, + maxrecurse); SvREFCNT_dec(namesv); } else if (realtype == SVt_PVAV) { SV *totpad; - I32 ix = 0; - const I32 ixmax = av_len((AV *)ival); + SSize_t ix = 0; + const SSize_t ixmax = av_len((AV *)ival); SV * const ixsv = newSViv(0); /* allowing for a 24 char wide array index */ @@ -554,11 +717,11 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv, (void)strcpy(iname, name); inamelen = namelen; if (name[0] == '@') { - sv_catpvn(retval, "(", 1); + sv_catpvs(retval, "("); iname[0] = '$'; } else { - sv_catpvn(retval, "[", 1); + sv_catpvs(retval, "["); /* omit "->" in $foo{bar}->[0], but not in ${$foo}->[0] */ /*if (namelen > 0 && name[namelen-1] != ']' && name[namelen-1] != '}' @@ -605,7 +768,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv, if (indent >= 3) { sv_catsv(retval, totpad); sv_catsv(retval, ipad); - sv_catpvn(retval, "#", 1); + sv_catpvs(retval, "#"); sv_catsv(retval, ixsv); } sv_catsv(retval, totpad); @@ -613,9 +776,10 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv, DD_dump(aTHX_ elem, iname, ilen, retval, seenhv, postav, levelp, indent, pad, xpad, apad, sep, pair, freezer, toaster, purity, deepcopy, quotekeys, bless, - maxdepth, sortkeys, use_sparse_seen_hash); + maxdepth, sortkeys, use_sparse_seen_hash, + useqq, maxrecurse); if (ix < ixmax) - sv_catpvn(retval, ",", 1); + sv_catpvs(retval, ","); } if (ixmax >= 0) { SV * const opad = sv_x(aTHX_ Nullsv, SvPVX_const(xpad), SvCUR(xpad), (*levelp)-1); @@ -624,9 +788,9 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv, SvREFCNT_dec(opad); } if (name[0] == '@') - sv_catpvn(retval, ")", 1); + sv_catpvs(retval, ")"); else - sv_catpvn(retval, "]", 1); + sv_catpvs(retval, "]"); SvREFCNT_dec(ixsv); SvREFCNT_dec(totpad); Safefree(iname); @@ -634,7 +798,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv, else if (realtype == SVt_PVHV) { SV *totpad, *newapad; SV *sname; - HE *entry; + HE *entry = NULL; char *key; I32 klen; SV *hval; @@ -642,11 +806,11 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv, SV * const iname = newSVpvn(name, namelen); if (name[0] == '%') { - sv_catpvn(retval, "(", 1); + sv_catpvs(retval, "("); (SvPVX(iname))[0] = '$'; } else { - sv_catpvn(retval, "{", 1); + sv_catpvs(retval, "{"); /* omit "->" in $foo[0]->{bar}, but not in ${$foo}->{bar} */ if ((namelen > 0 && name[namelen-1] != ']' && name[namelen-1] != '}') @@ -654,16 +818,16 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv, && (name[1] == '{' || (name[0] == '\\' && name[2] == '{')))) { - sv_catpvn(iname, "->", 2); + sv_catpvs(iname, "->"); } } if (name[0] == '*' && name[namelen-1] == '}' && namelen >= 8 && (instr(name+namelen-8, "{SCALAR}") || instr(name+namelen-7, "{ARRAY}") || instr(name+namelen-6, "{HASH}"))) { - sv_catpvn(iname, "->", 2); + sv_catpvs(iname, "->"); } - sv_catpvn(iname, "{", 1); + sv_catpvs(iname, "{"); totpad = newSVsv(sep); sv_catsv(totpad, pad); sv_catsv(totpad, apad); @@ -672,7 +836,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv, if (sortkeys) { if (sortkeys == &PL_sv_yes) { #if PERL_VERSION < 8 - sortkeys = sv_2mortal(newSVpvn("Data::Dumper::_sortkeys", 23)); + sortkeys = sv_2mortal(newSVpvs("Data::Dumper::_sortkeys")); #else keys = newAV(); (void)hv_iterinit((HV*)ival); @@ -681,16 +845,25 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv, (void)SvREFCNT_inc(sv); av_push(keys, sv); } -# ifdef USE_LOCALE_NUMERIC - sortsv(AvARRAY(keys), - av_len(keys)+1, - IN_LOCALE ? Perl_sv_cmp_locale : Perl_sv_cmp); -# else - sortsv(AvARRAY(keys), - av_len(keys)+1, - Perl_sv_cmp); +# ifdef USE_LOCALE_COLLATE +# ifdef IN_LC /* Use this if available */ + if (IN_LC(LC_COLLATE)) +# else + if (IN_LOCALE) +# endif + { + sortsv(AvARRAY(keys), + av_len(keys)+1, + Perl_sv_cmp_locale); + } + else # endif #endif + { + sortsv(AvARRAY(keys), + av_len(keys)+1, + Perl_sv_cmp); + } } if (sortkeys != &PL_sv_yes) { dSP; ENTER; SAVETMPS; PUSHMARK(sp); @@ -723,13 +896,13 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv, bool do_utf8 = FALSE; if (sortkeys) { - if (!(keys && (I32)i <= av_len(keys))) break; + if (!(keys && (SSize_t)i <= av_len(keys))) break; } else { if (!(entry = hv_iternext((HV *)ival))) break; } if (i) - sv_catpvn(retval, ",", 1); + sv_catpvs(retval, ","); if (sortkeys) { char *key; @@ -751,31 +924,27 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv, sv_catsv(retval, totpad); sv_catsv(retval, ipad); - /* old logic was first to check utf8 flag, and if utf8 always + /* The (very) + old logic was first to check utf8 flag, and if utf8 always call esc_q_utf8. This caused test to break under -Mutf8, because there even strings like 'c' have utf8 flag on. Hence with quotekeys == 0 the XS code would still '' quote them based on flags, whereas the perl code would not, based on regexps. - The perl code is correct. - needs_quote() decides that anything that isn't a valid - perl identifier needs to be quoted, hence only correctly - formed strings with no characters outside [A-Za-z0-9_:] - won't need quoting. None of those characters are used in - the byte encoding of utf8, so anything with utf8 - encoded characters in will need quoting. Hence strings - with utf8 encoded characters in will end up inside do_utf8 - just like before, but now strings with utf8 flag set but - only ascii characters will end up in the unquoted section. - - There should also be less tests for the (probably currently) - more common doesn't need quoting case. - The code is also smaller (22044 vs 22260) because I've been - able to pull the common logic out to both sides. */ - if (quotekeys || needs_quote(key,keylen)) { - if (do_utf8) { + + The old logic checked that the string was a valid + perl glob name (foo::bar), which isn't safe under + strict, and differs from the perl code which only + accepts simple identifiers. + + With the fix for [perl #120384] I chose to make + their handling of key quoting compatible between XS + and perl. + */ + if (quotekeys || key_needs_quote(key,keylen)) { + if (do_utf8 || useqq) { STRLEN ocur = SvCUR(retval); - nlen = esc_q_utf8(aTHX_ retval, key, klen); + nlen = esc_q_utf8(aTHX_ retval, key, klen, do_utf8, useqq); nkey = SvPVX(retval) + ocur; } else { @@ -800,7 +969,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv, } sname = newSVsv(iname); sv_catpvn(sname, nkey, nlen); - sv_catpvn(sname, "}", 1); + sv_catpvs(sname, "}"); sv_catsv(retval, pair); if (indent >= 2) { @@ -820,7 +989,8 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv, DD_dump(aTHX_ hval, SvPVX_const(sname), SvCUR(sname), retval, seenhv, postav, levelp, indent, pad, xpad, newapad, sep, pair, freezer, toaster, purity, deepcopy, quotekeys, bless, - maxdepth, sortkeys, use_sparse_seen_hash); + maxdepth, sortkeys, use_sparse_seen_hash, useqq, + maxrecurse); SvREFCNT_dec(sname); Safefree(nkey_buffer); if (indent >= 2) @@ -833,14 +1003,14 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv, SvREFCNT_dec(opad); } if (name[0] == '%') - sv_catpvn(retval, ")", 1); + sv_catpvs(retval, ")"); else - sv_catpvn(retval, "}", 1); + sv_catpvs(retval, "}"); SvREFCNT_dec(iname); SvREFCNT_dec(totpad); } else if (realtype == SVt_PVCV) { - sv_catpvn(retval, "sub { \"DUMMY\" }", 15); + sv_catpvs(retval, "sub { \"DUMMY\" }"); if (purity) warn("Encountered CODE ref, using dummy placeholder"); } @@ -856,7 +1026,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv, SvREFCNT_dec(apad); apad = blesspad; } - sv_catpvn(retval, ", '", 3); + sv_catpvs(retval, ", '"); plen = strlen(realpack); pticks = num_q(realpack, plen); @@ -875,11 +1045,11 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv, else { sv_catpvn(retval, realpack, strlen(realpack)); } - sv_catpvn(retval, "' )", 3); + sv_catpvs(retval, "' )"); if (toaster && SvPOK(toaster) && SvCUR(toaster)) { - sv_catpvn(retval, "->", 2); + sv_catpvs(retval, "->"); sv_catsv(retval, toaster); - sv_catpvn(retval, "()", 2); + sv_catpvs(retval, "()"); } } SvREFCNT_dec(ipad); @@ -904,9 +1074,9 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv, if ((svp = av_fetch(seenentry, 0, FALSE)) && (othername = *svp) && (svp = av_fetch(seenentry, 2, FALSE)) && *svp && SvIV(*svp) > 0) { - sv_catpvn(retval, "${", 2); + sv_catpvs(retval, "${"); sv_catsv(retval, othername); - sv_catpvn(retval, "}", 1); + sv_catpvs(retval, "}"); return 1; } } @@ -918,7 +1088,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv, * Note that we'd have to check for weak-refs, too, but this is * already the branch for non-refs only. */ else if (val != &PL_sv_undef && (!use_sparse_seen_hash || SvREFCNT(val) > 1)) { - SV * const namesv = newSVpvn("\\", 1); + SV * const namesv = newSVpvs("\\"); sv_catpvn(namesv, name, namelen); seenentry = newAV(); av_push(seenentry, namesv); @@ -962,14 +1132,14 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv, #endif i = 0; else i -= 4; } - if (needs_quote(c,i)) { + if (globname_needs_quote(c,i)) { #ifdef GvNAMEUTF8 if (GvNAMEUTF8(val)) { sv_grow(retval, SvCUR(retval)+2); r = SvPVX(retval)+SvCUR(retval); r[0] = '*'; r[1] = '{'; SvCUR_set(retval, SvCUR(retval)+2); - esc_q_utf8(aTHX_ retval, c, i); + esc_q_utf8(aTHX_ retval, c, i, 1, useqq); sv_grow(retval, SvCUR(retval)+2); r = SvPVX(retval)+SvCUR(retval); r[0] = '}'; r[1] = '\0'; @@ -999,8 +1169,8 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv, static const char* const entries[] = { "{SCALAR}", "{ARRAY}", "{HASH}" }; static const STRLEN sizes[] = { 8, 7, 6 }; SV *e; - SV * const nname = newSVpvn("", 0); - SV * const newapad = newSVpvn("", 0); + SV * const nname = newSVpvs(""); + SV * const newapad = newSVpvs(""); GV * const gv = (GV*)val; I32 j; @@ -1017,7 +1187,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv, sv_setsv(nname, postentry); sv_catpvn(nname, entries[j], sizes[j]); - sv_catpvn(postentry, " = ", 3); + sv_catpvs(postentry, " = "); av_push(postav, postentry); e = newRV_inc(e); @@ -1029,7 +1199,8 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv, seenhv, postav, &nlevel, indent, pad, xpad, newapad, sep, pair, freezer, toaster, purity, deepcopy, quotekeys, bless, maxdepth, - sortkeys, use_sparse_seen_hash); + sortkeys, use_sparse_seen_hash, useqq, + maxrecurse); SvREFCNT_dec(e); } } @@ -1039,7 +1210,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv, } } else if (val == &PL_sv_undef || !SvOK(val)) { - sv_catpvn(retval, "undef", 5); + sv_catpvs(retval, "undef"); } #ifdef SvVOK else if (SvMAGICAL(val) && (mg = mg_find(val, 'V'))) { @@ -1055,11 +1226,20 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv, sv_catpvn(retval, (const char *)mg->mg_ptr, mg->mg_len); } #endif + else { integer_came_from_string: - c = SvPV(val, i); - if (DO_UTF8(val)) - i += esc_q_utf8(aTHX_ retval, c, i); + c = SvPV(val, i); + /* the pure perl and XS non-qq outputs have historically been + * different in this case, but for useqq, let's try to match + * the pure perl code. + * see [perl #74798] + */ + if (useqq && safe_decimal_number(c, i)) { + sv_catsv(retval, val); + } + else if (DO_UTF8(val) || useqq) + i += esc_q_utf8(aTHX_ retval, c, i, DO_UTF8(val), useqq); else { sv_grow(retval, SvCUR(retval)+3+2*i); /* 3: ""\0 */ r = SvPVX(retval) + SvCUR(retval); @@ -1090,7 +1270,7 @@ MODULE = Data::Dumper PACKAGE = Data::Dumper PREFIX = Data_Dumper_ # # This is the exact equivalent of Dump. Well, almost. The things that are # different as of now (due to Laziness): -# * doesn't do double-quotes yet. +# * doesn't deparse yet.' # void @@ -1104,11 +1284,13 @@ Data_Dumper_Dumpxs(href, ...) HV *seenhv = NULL; AV *postav, *todumpav, *namesav; I32 level = 0; - I32 indent, terse, i, imax, postlen; + I32 indent, terse, useqq; + SSize_t i, imax, postlen; SV **svp; SV *val, *name, *pad, *xpad, *apad, *sep, *pair, *varname; SV *freezer, *toaster, *bless, *sortkeys; I32 purity, deepcopy, quotekeys, maxdepth = 0; + IV maxrecurse = 1000; char tmpbuf[1024]; I32 gimme = GIMME; int use_sparse_seen_hash = 0; @@ -1121,10 +1303,11 @@ Data_Dumper_Dumpxs(href, ...) SAVETMPS; PUSHMARK(sp); - XPUSHs(href); - XPUSHs(sv_2mortal(newSVsv(ST(1)))); + EXTEND(SP, 3); /* 3 == max of all branches below */ + PUSHs(href); + PUSHs(sv_2mortal(newSVsv(ST(1)))); if (items >= 3) - XPUSHs(sv_2mortal(newSVsv(ST(2)))); + PUSHs(sv_2mortal(newSVsv(ST(2)))); PUTBACK; i = perl_call_method("new", G_SCALAR); SPAGAIN; @@ -1144,10 +1327,10 @@ Data_Dumper_Dumpxs(href, ...) = freezer = toaster = bless = sortkeys = &PL_sv_undef; name = sv_newmortal(); indent = 2; - terse = purity = deepcopy = 0; + terse = purity = deepcopy = useqq = 0; quotekeys = 1; - retval = newSVpvn("", 0); + retval = newSVpvs(""); if (SvROK(href) && (hv = (HV*)SvRV((SV*)href)) && SvTYPE(hv) == SVt_PVHV) { @@ -1168,10 +1351,8 @@ Data_Dumper_Dumpxs(href, ...) purity = SvIV(*svp); if ((svp = hv_fetch(hv, "terse", 5, FALSE))) terse = SvTRUE(*svp); -#if 0 /* useqq currently unused */ if ((svp = hv_fetch(hv, "useqq", 5, FALSE))) useqq = SvTRUE(*svp); -#endif if ((svp = hv_fetch(hv, "pad", 3, FALSE))) pad = *svp; if ((svp = hv_fetch(hv, "xpad", 4, FALSE))) @@ -1196,6 +1377,8 @@ Data_Dumper_Dumpxs(href, ...) bless = *svp; if ((svp = hv_fetch(hv, "maxdepth", 8, FALSE))) maxdepth = SvIV(*svp); + if ((svp = hv_fetch(hv, "maxrecurse", 10, FALSE))) + maxrecurse = SvIV(*svp); if ((svp = hv_fetch(hv, "sortkeys", 8, FALSE))) { sortkeys = *svp; if (! SvTRUE(sortkeys)) @@ -1213,7 +1396,7 @@ Data_Dumper_Dumpxs(href, ...) imax = av_len(todumpav); else imax = -1; - valstr = newSVpvn("",0); + valstr = newSVpvs(""); for (i = 0; i <= imax; ++i) { SV *newapad; @@ -1275,7 +1458,8 @@ Data_Dumper_Dumpxs(href, ...) DD_dump(aTHX_ val, SvPVX_const(name), SvCUR(name), valstr, seenhv, postav, &level, indent, pad, xpad, newapad, sep, pair, freezer, toaster, purity, deepcopy, quotekeys, - bless, maxdepth, sortkeys, use_sparse_seen_hash); + bless, maxdepth, sortkeys, use_sparse_seen_hash, + useqq, maxrecurse); SPAGAIN; if (indent >= 2 && !terse) @@ -1285,13 +1469,13 @@ Data_Dumper_Dumpxs(href, ...) if (postlen >= 0 || !terse) { sv_insert(valstr, 0, 0, " = ", 3); sv_insert(valstr, 0, 0, SvPVX_const(name), SvCUR(name)); - sv_catpvn(valstr, ";", 1); + sv_catpvs(valstr, ";"); } sv_catsv(retval, pad); sv_catsv(retval, valstr); sv_catsv(retval, sep); if (postlen >= 0) { - I32 i; + SSize_t i; sv_catsv(retval, pad); for (i = 0; i <= postlen; ++i) { SV *elem; @@ -1299,20 +1483,20 @@ Data_Dumper_Dumpxs(href, ...) if (svp && (elem = *svp)) { sv_catsv(retval, elem); if (i < postlen) { - sv_catpvn(retval, ";", 1); + sv_catpvs(retval, ";"); sv_catsv(retval, sep); sv_catsv(retval, pad); } } } - sv_catpvn(retval, ";", 1); + sv_catpvs(retval, ";"); sv_catsv(retval, sep); } sv_setpvn(valstr, "", 0); if (gimme == G_ARRAY) { XPUSHs(sv_2mortal(retval)); if (i < imax) /* not the last time thro ? */ - retval = newSVpvn("",0); + retval = newSVpvs(""); } } SvREFCNT_dec(postav);