X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/ee8c7f5465f003860e2347a2946abacac39bd9b9..bd2db5df3cd7c8f0ecc592ef15151e17c1504af9:/ext/Data/Dumper/Dumper.xs diff --git a/ext/Data/Dumper/Dumper.xs b/ext/Data/Dumper/Dumper.xs index bb606f4..b39b77a 100644 --- a/ext/Data/Dumper/Dumper.xs +++ b/ext/Data/Dumper/Dumper.xs @@ -2,37 +2,67 @@ #include "EXTERN.h" #include "perl.h" #include "XSUB.h" - -#ifndef PERL_VERSION -#include "patchlevel.h" -#define PERL_VERSION PATCHLEVEL +#ifdef USE_PPPORT_H +# define NEED_my_snprintf +# include "ppport.h" #endif -#if PERL_VERSION < 5 -# ifndef PL_sv_undef -# define PL_sv_undef sv_undef -# endif -# ifndef ERRSV -# define ERRSV GvSV(errgv) -# endif -# ifndef newSVpvn -# define newSVpvn newSVpv -# endif +#if PERL_VERSION < 6 +# define DD_USE_OLD_ID_FORMAT #endif -static I32 num_q (char *s, STRLEN slen); -static I32 esc_q (char *dest, char *src, STRLEN slen); -static SV *sv_x (pTHX_ SV *sv, char *str, STRLEN len, I32 n); -static I32 DD_dump (pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, +static I32 num_q (const char *s, STRLEN slen); +static I32 esc_q (char *dest, const char *src, STRLEN slen); +static I32 esc_q_utf8 (pTHX_ SV *sv, const char *src, STRLEN slen); +static I32 needs_quote(register const char *s); +static SV *sv_x (pTHX_ SV *sv, const char *str, STRLEN len, I32 n); +static I32 DD_dump (pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv, AV *postav, I32 *levelp, I32 indent, - SV *pad, SV *xpad, SV *apad, SV *sep, + SV *pad, SV *xpad, SV *apad, SV *sep, SV *pair, SV *freezer, SV *toaster, I32 purity, I32 deepcopy, I32 quotekeys, SV *bless, - I32 maxdepth); + I32 maxdepth, SV *sortkeys); + +#ifndef HvNAME_get +#define HvNAME_get HvNAME +#endif + +#if PERL_VERSION <= 6 /* Perl 5.6 and earlier */ + +# ifdef EBCDIC +# define UNI_TO_NATIVE(ch) (((ch) > 255) ? (ch) : ASCII_TO_NATIVE(ch)) +# else +# define UNI_TO_NATIVE(ch) (ch) +# endif + +UV +Perl_utf8_to_uvchr(pTHX_ U8 *s, STRLEN *retlen) +{ + const UV uv = utf8_to_uv(s, UTF8_MAXLEN, retlen, + ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY); + return UNI_TO_NATIVE(uv); +} + +# if !defined(PERL_IMPLICIT_CONTEXT) +# define utf8_to_uvchr Perl_utf8_to_uvchr +# else +# define utf8_to_uvchr(a,b) Perl_utf8_to_uvchr(aTHX_ a,b) +# endif + +#endif /* PERL_VERSION <= 6 */ + +/* Changes in 5.7 series mean that now IOK is only set if scalar is + precisely integer but in 5.6 and earlier we need to do a more + complex test */ +#if PERL_VERSION <= 6 +#define DD_is_integer(sv) (SvIOK(sv) && (SvIsUV(val) ? SvUV(sv) == SvNV(sv) : SvIV(sv) == SvNV(sv))) +#else +#define DD_is_integer(sv) SvIOK(sv) +#endif /* does a string need to be protected? */ static I32 -needs_quote(register char *s) +needs_quote(register const char *s) { TOP: if (s[0] == ':') { @@ -52,14 +82,14 @@ TOP: return 1; } } - else + else return 1; return 0; } /* count the number of "'"s and "\"s in string */ static I32 -num_q(register char *s, register STRLEN slen) +num_q(register const char *s, register STRLEN slen) { register I32 ret = 0; @@ -77,10 +107,10 @@ num_q(register char *s, register STRLEN slen) /* slen number of characters in s will be escaped */ /* destination must be long enough for additional chars */ static I32 -esc_q(register char *d, register char *s, register STRLEN slen) +esc_q(register char *d, register const char *s, register STRLEN slen) { register I32 ret = 0; - + while (slen > 0) { switch (*s) { case '\'': @@ -96,20 +126,119 @@ esc_q(register char *d, register char *s, register STRLEN slen) return ret; } +static I32 +esc_q_utf8(pTHX_ SV* sv, register const char *src, register STRLEN slen) +{ + char *r, *rstart; + const char *s = src; + const char * const send = src + slen; + STRLEN j, cur = SvCUR(sv); + /* Could count 128-255 and 256+ in two variables, if we want to + be like &qquote and make a distinction. */ + STRLEN grow = 0; /* bytes needed to represent chars 128+ */ + /* STRLEN topbit_grow = 0; bytes needed to represent chars 128-255 */ + STRLEN backslashes = 0; + STRLEN single_quotes = 0; + STRLEN qq_escapables = 0; /* " $ @ will need a \ in "" strings. */ + STRLEN normal = 0; + + /* this will need EBCDICification */ + for (s = src; s < send; s += UTF8SKIP(s)) { + const UV k = utf8_to_uvchr((U8*)s, NULL); + +#ifdef EBCDIC + if (!isprint(k) || k > 256) { +#else + if (k > 127) { +#endif + /* 4: \x{} then count the number of hex digits. */ + grow += 4 + (k <= 0xFF ? 2 : k <= 0xFFF ? 3 : k <= 0xFFFF ? 4 : +#if UVSIZE == 4 + 8 /* We may allocate a bit more than the minimum here. */ +#else + k <= 0xFFFFFFFF ? 8 : UVSIZE * 4 +#endif + ); + } else if (k == '\\') { + backslashes++; + } else if (k == '\'') { + single_quotes++; + } else if (k == '"' || k == '$' || k == '@') { + qq_escapables++; + } else { + normal++; + } + } + if (grow) { + /* We have something needing hex. 3 is ""\0 */ + sv_grow(sv, cur + 3 + grow + 2*backslashes + single_quotes + + 2*qq_escapables + normal); + rstart = r = SvPVX(sv) + cur; + + *r++ = '"'; + + for (s = src; s < send; s += UTF8SKIP(s)) { + const UV k = utf8_to_uvchr((U8*)s, NULL); + + if (k == '"' || k == '\\' || k == '$' || k == '@') { + *r++ = '\\'; + *r++ = (char)k; + } + else +#ifdef EBCDIC + if (isprint(k) && k < 256) +#else + if (k < 0x80) +#endif + *r++ = (char)k; + else { +#if PERL_VERSION < 10 + sprintf(r, "\\x{%"UVxf"}", k); + r += strlen(r); + /* my_sprintf is not supported by ppport.h */ +#else + r = r + my_sprintf(r, "\\x{%"UVxf"}", k); +#endif + } + } + *r++ = '"'; + } else { + /* Single quotes. */ + sv_grow(sv, cur + 3 + 2*backslashes + 2*single_quotes + + qq_escapables + normal); + rstart = r = SvPVX(sv) + cur; + *r++ = '\''; + for (s = src; s < send; s ++) { + const char k = *s; + if (k == '\'' || k == '\\') + *r++ = '\\'; + *r++ = k; + } + *r++ = '\''; + } + *r = '\0'; + j = r - rstart; + SvCUR_set(sv, cur + j); + + return j; +} + /* append a repeated string to an SV */ static SV * -sv_x(pTHX_ SV *sv, register char *str, STRLEN len, I32 n) +sv_x(pTHX_ SV *sv, const char *str, STRLEN len, I32 n) { - if (sv == Nullsv) + if (!sv) sv = newSVpvn("", 0); +#ifdef DEBUGGING else assert(SvTYPE(sv) >= SVt_PV); +#endif if (n > 0) { SvGROW(sv, len*n + SvCUR(sv) + 1); if (len == 1) { - char *start = SvPVX(sv) + SvCUR(sv); - SvCUR(sv) += n; + char * const start = SvPVX(sv) + SvCUR(sv); + SvCUR_set(sv, SvCUR(sv) + n); start[n] = '\0'; while (n > 0) start[--n] = str[0]; @@ -129,59 +258,78 @@ sv_x(pTHX_ SV *sv, register char *str, STRLEN len, I32 n) * efficiency raisins.) Ugggh! */ static I32 -DD_dump(pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, HV *seenhv, +DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv, AV *postav, I32 *levelp, I32 indent, SV *pad, SV *xpad, - SV *apad, SV *sep, SV *freezer, SV *toaster, I32 purity, - I32 deepcopy, I32 quotekeys, SV *bless, I32 maxdepth) + SV *apad, SV *sep, SV *pair, SV *freezer, SV *toaster, I32 purity, + I32 deepcopy, I32 quotekeys, SV *bless, I32 maxdepth, SV *sortkeys) { char tmpbuf[128]; U32 i; - char *c, *r, *realpack, id[128]; + char *c, *r, *realpack; +#ifdef DD_USE_OLD_ID_FORMAT + char id[128]; +#else + UV id_buffer; + char *const id = (char *)&id_buffer; +#endif SV **svp; SV *sv, *ipad, *ival; SV *blesspad = Nullsv; - AV *seenentry = Nullav; + AV *seenentry = NULL; char *iname; STRLEN inamelen, idlen = 0; - U32 flags; U32 realtype; + bool no_bless = 0; /* when a qr// is blessed into Regexp we dont want to bless it. + in later perls we should actually check the classname of the + engine. this gets tricky as it involves lexical issues that arent so + easy to resolve */ + bool is_regex = 0; /* we are dumping a regex, we need to know this before we bless */ if (!val) return 0; - flags = SvFLAGS(val); + /* If the ouput buffer has less than some arbitary amount of space + remaining, then enlarge it. For the test case (25M of output), + *1.1 was slower, *2.0 was the same, so the first guess of 1.5 is + deemed to be good enough. */ + if (SvTYPE(retval) >= SVt_PV && (SvLEN(retval) - SvCUR(retval)) < 42) { + sv_grow(retval, SvCUR(retval) * 3 / 2); + } + realtype = SvTYPE(val); - + if (SvGMAGICAL(val)) mg_get(val); if (SvROK(val)) { + /* If a freeze method is provided and the object has it, call + it. Warn on errors. */ if (SvOBJECT(SvRV(val)) && freezer && - SvPOK(freezer) && SvCUR(freezer)) + SvPOK(freezer) && SvCUR(freezer) && + gv_fetchmeth(SvSTASH(SvRV(val)), SvPVX_const(freezer), + SvCUR(freezer), -1) != NULL) { dSP; ENTER; SAVETMPS; PUSHMARK(sp); XPUSHs(val); PUTBACK; - i = perl_call_method(SvPVX(freezer), G_EVAL|G_SCALAR); + i = perl_call_method(SvPVX_const(freezer), G_EVAL|G_VOID); SPAGAIN; if (SvTRUE(ERRSV)) - warn("WARNING(Freezer method call failed): %s", - SvPVX(ERRSV)); - else if (i) - val = newSVsv(POPs); + warn("WARNING(Freezer method call failed): %"SVf"", ERRSV); PUTBACK; FREETMPS; LEAVE; - if (i) - (void)sv_2mortal(val); } ival = SvRV(val); - flags = SvFLAGS(ival); realtype = SvTYPE(ival); - (void) sprintf(id, "0x%lx", (unsigned long)ival); - idlen = strlen(id); +#ifdef DD_USE_OLD_ID_FORMAT + idlen = my_snprintf(id, sizeof(id), "0x%"UVxf, PTR2UV(ival)); +#else + id_buffer = PTR2UV(ival); + idlen = sizeof(id_buffer); +#endif if (SvOBJECT(ival)) - realpack = HvNAME(SvSTASH(ival)); + realpack = HvNAME_get(SvSTASH(ival)); else - realpack = Nullch; + realpack = NULL; /* if it has a name, we need to either look it up, or keep a tab * on it so we know when we hit it later @@ -210,9 +358,9 @@ DD_dump(pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, HV *seenhv, } else { if (name[0] == '@' || name[0] == '%') { - if ((SvPVX(othername))[0] == '\\' && - (SvPVX(othername))[1] == name[0]) { - sv_catpvn(retval, SvPVX(othername)+1, + if ((SvPVX_const(othername))[0] == '\\' && + (SvPVX_const(othername))[1] == name[0]) { + sv_catpvn(retval, SvPVX_const(othername)+1, SvCUR(othername)-1); } else { @@ -228,7 +376,11 @@ DD_dump(pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, HV *seenhv, return 1; } else { +#ifdef DD_USE_OLD_ID_FORMAT warn("ref name not found for %s", id); +#else + warn("ref name not found for 0x%"UVxf, PTR2UV(ival)); +#endif return 0; } } @@ -249,46 +401,46 @@ DD_dump(pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, HV *seenhv, av_push(seenentry, namesv); (void)SvREFCNT_inc(val); av_push(seenentry, val); - (void)hv_store(seenhv, id, strlen(id), - newRV((SV*)seenentry), 0); + (void)hv_store(seenhv, id, idlen, + newRV_inc((SV*)seenentry), 0); SvREFCNT_dec(seenentry); } } - - if (realpack && *realpack == 'R' && strEQ(realpack, "Regexp")) { - STRLEN rlen; - char *rval = SvPV(val, rlen); - char *slash = strchr(rval, '/'); - sv_catpvn(retval, "qr/", 3); - while (slash) { - sv_catpvn(retval, rval, slash-rval); - sv_catpvn(retval, "\\/", 2); - rlen -= slash-rval+1; - rval = slash+1; - slash = strchr(rval, '/'); - } - sv_catpvn(retval, rval, rlen); - sv_catpvn(retval, "/", 1); - return 1; - } + /* regexps dont have to be blessed into package "Regexp" + * they can be blessed into any package. + */ +#if PERL_VERSION < 8 + if (realpack && *realpack == 'R' && strEQ(realpack, "Regexp")) +#elif PERL_VERSION < 11 + if (realpack && realtype == SVt_PVMG && mg_find(ival, PERL_MAGIC_qr)) +#else + if (realpack && realtype == SVt_REGEXP) +#endif + { + is_regex = 1; + if (strEQ(realpack, "Regexp")) + no_bless = 1; + else + no_bless = 0; + } /* If purity is not set and maxdepth is set, then check depth: * if we have reached maximum depth, return the string * representation of the thing we are currently examining - * at this depth (i.e., 'Foo=ARRAY(0xdeadbeef)'). + * at this depth (i.e., 'Foo=ARRAY(0xdeadbeef)'). */ if (!purity && maxdepth > 0 && *levelp >= maxdepth) { STRLEN vallen; - char *valstr = SvPV(val,vallen); + const char * const valstr = SvPV(val,vallen); sv_catpvn(retval, "'", 1); sv_catpvn(retval, valstr, vallen); sv_catpvn(retval, "'", 1); return 1; } - if (realpack) { /* we have a blessed ref */ + if (realpack && !no_bless) { /* we have a blessed ref */ STRLEN blesslen; - char *blessstr = SvPV(bless, blesslen); + const char * const blessstr = SvPV(bless, blesslen); sv_catpvn(retval, blessstr, blesslen); sv_catpvn(retval, "( ", 2); if (indent >= 2) { @@ -299,46 +451,68 @@ DD_dump(pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, HV *seenhv, } (*levelp)++; - ipad = sv_x(aTHX_ Nullsv, SvPVX(xpad), SvCUR(xpad), *levelp); + ipad = sv_x(aTHX_ Nullsv, SvPVX_const(xpad), SvCUR(xpad), *levelp); - if (realtype <= SVt_PVBM) { /* scalar ref */ - SV *namesv = newSVpvn("${", 2); + if (is_regex) + { + STRLEN rlen; + const char *rval = SvPV(val, rlen); + const char *slash = strchr(rval, '/'); + sv_catpvn(retval, "qr/", 3); + while (slash) { + sv_catpvn(retval, rval, slash-rval); + sv_catpvn(retval, "\\/", 2); + rlen -= slash-rval+1; + rval = slash+1; + slash = strchr(rval, '/'); + } + sv_catpvn(retval, rval, rlen); + sv_catpvn(retval, "/", 1); + } + else if ( +#if PERL_VERSION < 9 + realtype <= SVt_PVBM +#else + realtype <= SVt_PVMG +#endif + ) { /* scalar ref */ + SV * const namesv = newSVpvn("${", 2); sv_catpvn(namesv, name, namelen); sv_catpvn(namesv, "}", 1); - if (realpack) { /* blessed */ + if (realpack) { /* blessed */ sv_catpvn(retval, "do{\\(my $o = ", 13); - DD_dump(aTHX_ ival, SvPVX(namesv), SvCUR(namesv), retval, seenhv, - postav, levelp, indent, pad, xpad, apad, sep, + DD_dump(aTHX_ ival, SvPVX_const(namesv), SvCUR(namesv), retval, seenhv, + postav, levelp, indent, pad, xpad, apad, sep, pair, freezer, toaster, purity, deepcopy, quotekeys, bless, - maxdepth); + maxdepth, sortkeys); sv_catpvn(retval, ")}", 2); } /* plain */ else { sv_catpvn(retval, "\\", 1); - DD_dump(aTHX_ ival, SvPVX(namesv), SvCUR(namesv), retval, seenhv, - postav, levelp, indent, pad, xpad, apad, sep, + DD_dump(aTHX_ ival, SvPVX_const(namesv), SvCUR(namesv), retval, seenhv, + postav, levelp, indent, pad, xpad, apad, sep, pair, freezer, toaster, purity, deepcopy, quotekeys, bless, - maxdepth); + maxdepth, sortkeys); } SvREFCNT_dec(namesv); } else if (realtype == SVt_PVGV) { /* glob ref */ - SV *namesv = newSVpvn("*{", 2); + SV * const namesv = newSVpvn("*{", 2); sv_catpvn(namesv, name, namelen); sv_catpvn(namesv, "}", 1); sv_catpvn(retval, "\\", 1); - DD_dump(aTHX_ ival, SvPVX(namesv), SvCUR(namesv), retval, seenhv, - postav, levelp, indent, pad, xpad, apad, sep, + DD_dump(aTHX_ ival, SvPVX_const(namesv), SvCUR(namesv), retval, seenhv, + postav, levelp, indent, pad, xpad, apad, sep, pair, freezer, toaster, purity, deepcopy, quotekeys, bless, - maxdepth); + maxdepth, sortkeys); SvREFCNT_dec(namesv); } else if (realtype == SVt_PVAV) { SV *totpad; I32 ix = 0; - I32 ixmax = av_len((AV *)ival); - - SV *ixsv = newSViv(0); + const I32 ixmax = av_len((AV *)ival); + + SV * const ixsv = newSViv(0); /* allowing for a 24 char wide array index */ New(0, iname, namelen+28, char); (void)strcpy(iname, name); @@ -385,8 +559,12 @@ DD_dump(pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, HV *seenhv, ilen = inamelen; sv_setiv(ixsv, ix); +#if PERL_VERSION < 10 (void) sprintf(iname+ilen, "%"IVdf, (IV)ix); ilen = strlen(iname); +#else + ilen = ilen + my_sprintf(iname+ilen, "%"IVdf, (IV)ix); +#endif iname[ilen++] = ']'; iname[ilen] = '\0'; if (indent >= 3) { sv_catsv(retval, totpad); @@ -397,14 +575,14 @@ DD_dump(pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, HV *seenhv, sv_catsv(retval, totpad); sv_catsv(retval, ipad); DD_dump(aTHX_ elem, iname, ilen, retval, seenhv, postav, - levelp, indent, pad, xpad, apad, sep, + levelp, indent, pad, xpad, apad, sep, pair, freezer, toaster, purity, deepcopy, quotekeys, bless, - maxdepth); + maxdepth, sortkeys); if (ix < ixmax) sv_catpvn(retval, ",", 1); } if (ixmax >= 0) { - SV *opad = sv_x(aTHX_ Nullsv, SvPVX(xpad), SvCUR(xpad), (*levelp)-1); + SV * const opad = sv_x(aTHX_ Nullsv, SvPVX_const(xpad), SvCUR(xpad), (*levelp)-1); sv_catsv(retval, totpad); sv_catsv(retval, opad); SvREFCNT_dec(opad); @@ -419,13 +597,14 @@ DD_dump(pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, HV *seenhv, } else if (realtype == SVt_PVHV) { SV *totpad, *newapad; - SV *iname, *sname; + SV *sname; HE *entry; char *key; I32 klen; SV *hval; - - iname = newSVpvn(name, namelen); + AV *keys = NULL; + + SV * const iname = newSVpvn(name, namelen); if (name[0] == '%') { sv_catpvn(retval, "(", 1); (SvPVX(iname))[0] = '$'; @@ -452,43 +631,142 @@ DD_dump(pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, HV *seenhv, totpad = newSVsv(sep); sv_catsv(totpad, pad); sv_catsv(totpad, apad); - - (void)hv_iterinit((HV*)ival); - i = 0; - while ((entry = hv_iternext((HV*)ival))) { + + /* If requested, get a sorted/filtered array of hash keys */ + if (sortkeys) { + if (sortkeys == &PL_sv_yes) { +#if PERL_VERSION < 8 + sortkeys = sv_2mortal(newSVpvn("Data::Dumper::_sortkeys", 23)); +#else + keys = newAV(); + (void)hv_iterinit((HV*)ival); + while ((entry = hv_iternext((HV*)ival))) { + sv = hv_iterkeysv(entry); + SvREFCNT_inc(sv); + av_push(keys, sv); + } +# ifdef USE_LOCALE_NUMERIC + sortsv(AvARRAY(keys), + av_len(keys)+1, + IN_LOCALE ? Perl_sv_cmp_locale : Perl_sv_cmp); +# else + sortsv(AvARRAY(keys), + av_len(keys)+1, + Perl_sv_cmp); +# endif +#endif + } + if (sortkeys != &PL_sv_yes) { + dSP; ENTER; SAVETMPS; PUSHMARK(sp); + XPUSHs(sv_2mortal(newRV_inc(ival))); PUTBACK; + i = perl_call_sv(sortkeys, G_SCALAR | G_EVAL); + SPAGAIN; + if (i) { + sv = POPs; + if (SvROK(sv) && (SvTYPE(SvRV(sv)) == SVt_PVAV)) + keys = (AV*)SvREFCNT_inc(SvRV(sv)); + } + if (! keys) + warn("Sortkeys subroutine did not return ARRAYREF\n"); + PUTBACK; FREETMPS; LEAVE; + } + if (keys) + sv_2mortal((SV*)keys); + } + else + (void)hv_iterinit((HV*)ival); + + /* foreach (keys %hash) */ + for (i = 0; 1; i++) { char *nkey; + char *nkey_buffer = NULL; I32 nticks = 0; - + SV* keysv; + STRLEN keylen; + I32 nlen; + bool do_utf8 = FALSE; + + if (sortkeys) { + if (!(keys && (I32)i <= av_len(keys))) break; + } else { + if (!(entry = hv_iternext((HV *)ival))) break; + } + if (i) sv_catpvn(retval, ",", 1); - i++; - key = hv_iterkey(entry, &klen); - hval = hv_iterval((HV*)ival, entry); - - if (quotekeys || needs_quote(key)) { - nticks = num_q(key, klen); - New(0, nkey, klen+nticks+3, char); - nkey[0] = '\''; - if (nticks) - klen += esc_q(nkey+1, key, klen); - else - (void)Copy(key, nkey+1, klen, char); - nkey[++klen] = '\''; - nkey[++klen] = '\0'; + + if (sortkeys) { + char *key; + svp = av_fetch(keys, i, FALSE); + keysv = svp ? *svp : sv_mortalcopy(&PL_sv_undef); + key = SvPV(keysv, keylen); + svp = hv_fetch((HV*)ival, key, + SvUTF8(keysv) ? -(I32)keylen : keylen, 0); + hval = svp ? *svp : sv_mortalcopy(&PL_sv_undef); } else { - New(0, nkey, klen, char); - (void)Copy(key, nkey, klen, char); + keysv = hv_iterkeysv(entry); + hval = hv_iterval((HV*)ival, entry); } - - sname = newSVsv(iname); - sv_catpvn(sname, nkey, klen); - sv_catpvn(sname, "}", 1); - sv_catsv(retval, totpad); - sv_catsv(retval, ipad); - sv_catpvn(retval, nkey, klen); - sv_catpvn(retval, " => ", 4); + key = SvPV(keysv, keylen); + do_utf8 = DO_UTF8(keysv); + klen = keylen; + + sv_catsv(retval, totpad); + sv_catsv(retval, ipad); + /* old logic was first to check utf8 flag, and if utf8 always + call esc_q_utf8. This caused test to break under -Mutf8, + because there even strings like 'c' have utf8 flag on. + Hence with quotekeys == 0 the XS code would still '' quote + them based on flags, whereas the perl code would not, + based on regexps. + The perl code is correct. + needs_quote() decides that anything that isn't a valid + perl identifier needs to be quoted, hence only correctly + formed strings with no characters outside [A-Za-z0-9_:] + won't need quoting. None of those characters are used in + the byte encoding of utf8, so anything with utf8 + encoded characters in will need quoting. Hence strings + with utf8 encoded characters in will end up inside do_utf8 + just like before, but now strings with utf8 flag set but + only ascii characters will end up in the unquoted section. + + There should also be less tests for the (probably currently) + more common doesn't need quoting case. + The code is also smaller (22044 vs 22260) because I've been + able to pull the common logic out to both sides. */ + if (quotekeys || needs_quote(key)) { + if (do_utf8) { + STRLEN ocur = SvCUR(retval); + nlen = esc_q_utf8(aTHX_ retval, key, klen); + nkey = SvPVX(retval) + ocur; + } + else { + nticks = num_q(key, klen); + New(0, nkey_buffer, klen+nticks+3, char); + nkey = nkey_buffer; + nkey[0] = '\''; + if (nticks) + klen += esc_q(nkey+1, key, klen); + else + (void)Copy(key, nkey+1, klen, char); + nkey[++klen] = '\''; + nkey[++klen] = '\0'; + nlen = klen; + sv_catpvn(retval, nkey, klen); + } + } + else { + nkey = key; + nlen = klen; + sv_catpvn(retval, nkey, klen); + } + sname = newSVsv(iname); + sv_catpvn(sname, nkey, nlen); + sv_catpvn(sname, "}", 1); + + sv_catsv(retval, pair); if (indent >= 2) { char *extra; I32 elen = 0; @@ -503,17 +781,17 @@ DD_dump(pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, HV *seenhv, else newapad = apad; - DD_dump(aTHX_ hval, SvPVX(sname), SvCUR(sname), retval, seenhv, - postav, levelp, indent, pad, xpad, newapad, sep, + DD_dump(aTHX_ hval, SvPVX_const(sname), SvCUR(sname), retval, seenhv, + postav, levelp, indent, pad, xpad, newapad, sep, pair, freezer, toaster, purity, deepcopy, quotekeys, bless, - maxdepth); + maxdepth, sortkeys); SvREFCNT_dec(sname); - Safefree(nkey); + Safefree(nkey_buffer); if (indent >= 2) SvREFCNT_dec(newapad); } if (i) { - SV *opad = sv_x(aTHX_ Nullsv, SvPVX(xpad), SvCUR(xpad), *levelp-1); + SV *opad = sv_x(aTHX_ Nullsv, SvPVX_const(xpad), SvCUR(xpad), *levelp-1); sv_catsv(retval, totpad); sv_catsv(retval, opad); SvREFCNT_dec(opad); @@ -534,13 +812,33 @@ DD_dump(pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, HV *seenhv, warn("cannot handle ref type %ld", realtype); } - if (realpack) { /* free blessed allocs */ + if (realpack && !no_bless) { /* free blessed allocs */ + I32 plen; + I32 pticks; + if (indent >= 2) { SvREFCNT_dec(apad); apad = blesspad; } sv_catpvn(retval, ", '", 3); - sv_catpvn(retval, realpack, strlen(realpack)); + + plen = strlen(realpack); + pticks = num_q(realpack, plen); + if (pticks) { /* needs escaping */ + char *npack; + char *npack_buffer = NULL; + + New(0, npack_buffer, plen+pticks+1, char); + npack = npack_buffer; + plen += esc_q(npack, realpack, plen); + npack[plen] = '\0'; + + sv_catpvn(retval, npack, plen); + Safefree(npack_buffer); + } + else { + sv_catpvn(retval, realpack, strlen(realpack)); + } sv_catpvn(retval, "' )", 3); if (toaster && SvPOK(toaster) && SvCUR(toaster)) { sv_catpvn(retval, "->", 2); @@ -555,8 +853,13 @@ DD_dump(pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, HV *seenhv, STRLEN i; if (namelen) { - (void) sprintf(id, "0x%lx", (unsigned long)val); - if ((svp = hv_fetch(seenhv, id, (idlen = strlen(id)), FALSE)) && +#ifdef DD_USE_OLD_ID_FORMAT + idlen = my_snprintf(id, sizeof(id), "0x%"UVxf, PTR2UV(val)); +#else + id_buffer = PTR2UV(val); + idlen = sizeof(id_buffer); +#endif + if ((svp = hv_fetch(seenhv, id, idlen, FALSE)) && (sv = *svp) && SvROK(sv) && (seenentry = (AV*)SvRV(sv))) { @@ -570,23 +873,38 @@ DD_dump(pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, HV *seenhv, return 1; } } - else { - SV *namesv; - namesv = newSVpvn("\\", 1); + else if (val != &PL_sv_undef) { + SV * const namesv = newSVpvn("\\", 1); sv_catpvn(namesv, name, namelen); seenentry = newAV(); av_push(seenentry, namesv); - av_push(seenentry, newRV(val)); - (void)hv_store(seenhv, id, strlen(id), newRV((SV*)seenentry), 0); + av_push(seenentry, newRV_inc(val)); + (void)hv_store(seenhv, id, idlen, newRV_inc((SV*)seenentry), 0); SvREFCNT_dec(seenentry); } } - if (SvIOK(val)) { + if (DD_is_integer(val)) { STRLEN len; - (void) sprintf(tmpbuf, "%"IVdf, SvIV(val)); - len = strlen(tmpbuf); - sv_catpvn(retval, tmpbuf, len); + if (SvIsUV(val)) + len = my_snprintf(tmpbuf, sizeof(tmpbuf), "%"UVuf, SvUV(val)); + else + len = my_snprintf(tmpbuf, sizeof(tmpbuf), "%"IVdf, SvIV(val)); + if (SvPOK(val)) { + /* Need to check to see if this is a string such as " 0". + I'm assuming from sprintf isn't going to clash with utf8. + Is this valid on EBCDIC? */ + STRLEN pvlen; + const char * const pv = SvPV(val, pvlen); + if (pvlen != len || memNE(pv, tmpbuf, len)) + goto integer_came_from_string; + } + if (len > 10) { + /* Looks like we're on a 64 bit system. Make it a string so that + if a 32 bit system reads the number it will cope better. */ + sv_catpvf(retval, "'%s'", tmpbuf); + } else + sv_catpvn(retval, tmpbuf, len); } else if (realtype == SVt_PVGV) {/* GLOBs can end up with scribbly names */ c = SvPV(val, i); @@ -613,12 +931,12 @@ DD_dump(pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, HV *seenhv, SvCUR_set(retval, SvCUR(retval)+i); if (purity) { - static char *entries[] = { "{SCALAR}", "{ARRAY}", "{HASH}" }; - static STRLEN sizes[] = { 8, 7, 6 }; + static const char* const entries[] = { "{SCALAR}", "{ARRAY}", "{HASH}" }; + static const STRLEN sizes[] = { 8, 7, 6 }; SV *e; - SV *nname = newSVpvn("", 0); - SV *newapad = newSVpvn("", 0); - GV *gv = (GV*)val; + SV * const nname = newSVpvn("", 0); + SV * const newapad = newSVpvn("", 0); + GV * const gv = (GV*)val; I32 j; for (j=0; j<3; j++) { @@ -636,16 +954,17 @@ DD_dump(pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, HV *seenhv, sv_catpvn(nname, entries[j], sizes[j]); sv_catpvn(postentry, " = ", 3); av_push(postav, postentry); - e = newRV(e); + e = newRV_inc(e); - SvCUR(newapad) = 0; + SvCUR_set(newapad, 0); if (indent >= 2) (void)sv_x(aTHX_ newapad, " ", 1, SvCUR(postentry)); - DD_dump(aTHX_ e, SvPVX(nname), SvCUR(nname), postentry, + DD_dump(aTHX_ e, SvPVX_const(nname), SvCUR(nname), postentry, seenhv, postav, &nlevel, indent, pad, xpad, - newapad, sep, freezer, toaster, purity, - deepcopy, quotekeys, bless, maxdepth); + newapad, sep, pair, freezer, toaster, purity, + deepcopy, quotekeys, bless, maxdepth, + sortkeys); SvREFCNT_dec(e); } } @@ -658,15 +977,20 @@ DD_dump(pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, HV *seenhv, sv_catpvn(retval, "undef", 5); } else { + integer_came_from_string: c = SvPV(val, i); - sv_grow(retval, SvCUR(retval)+3+2*i); - r = SvPVX(retval)+SvCUR(retval); - r[0] = '\''; - i += esc_q(r+1, c, i); - ++i; - r[i++] = '\''; - r[i] = '\0'; - SvCUR_set(retval, SvCUR(retval)+i); + if (DO_UTF8(val)) + i += esc_q_utf8(aTHX_ retval, c, i); + else { + sv_grow(retval, SvCUR(retval)+3+2*i); /* 3: ""\0 */ + r = SvPVX(retval) + SvCUR(retval); + r[0] = '\''; + i += esc_q(r+1, c, i); + ++i; + r[i++] = '\''; + r[i] = '\0'; + SvCUR_set(retval, SvCUR(retval)+i); + } } } @@ -698,13 +1022,13 @@ Data_Dumper_Dumpxs(href, ...) { HV *hv; SV *retval, *valstr; - HV *seenhv = Nullhv; + HV *seenhv = NULL; AV *postav, *todumpav, *namesav; I32 level = 0; - I32 indent, terse, useqq, i, imax, postlen; + I32 indent, terse, i, imax, postlen; SV **svp; - SV *val, *name, *pad, *xpad, *apad, *sep, *tmp, *varname; - SV *freezer, *toaster, *bless; + SV *val, *name, *pad, *xpad, *apad, *sep, *pair, *varname; + SV *freezer, *toaster, *bless, *sortkeys; I32 purity, deepcopy, quotekeys, maxdepth = 0; char tmpbuf[1024]; I32 gimme = GIMME; @@ -734,15 +1058,15 @@ Data_Dumper_Dumpxs(href, ...) (void)sv_2mortal(href); } - todumpav = namesav = Nullav; - seenhv = Nullhv; - val = pad = xpad = apad = sep = tmp = varname - = freezer = toaster = bless = &PL_sv_undef; + todumpav = namesav = NULL; + seenhv = NULL; + val = pad = xpad = apad = sep = pair = varname + = freezer = toaster = bless = sortkeys = &PL_sv_undef; name = sv_newmortal(); indent = 2; - terse = useqq = purity = deepcopy = 0; + terse = purity = deepcopy = 0; quotekeys = 1; - + retval = newSVpvn("", 0); if (SvROK(href) && (hv = (HV*)SvRV((SV*)href)) @@ -760,8 +1084,10 @@ Data_Dumper_Dumpxs(href, ...) purity = SvIV(*svp); if ((svp = hv_fetch(hv, "terse", 5, FALSE))) terse = SvTRUE(*svp); +#if 0 /* useqq currently unused */ if ((svp = hv_fetch(hv, "useqq", 5, FALSE))) useqq = SvTRUE(*svp); +#endif if ((svp = hv_fetch(hv, "pad", 3, FALSE))) pad = *svp; if ((svp = hv_fetch(hv, "xpad", 4, FALSE))) @@ -770,6 +1096,8 @@ Data_Dumper_Dumpxs(href, ...) apad = *svp; if ((svp = hv_fetch(hv, "sep", 3, FALSE))) sep = *svp; + if ((svp = hv_fetch(hv, "pair", 4, FALSE))) + pair = *svp; if ((svp = hv_fetch(hv, "varname", 7, FALSE))) varname = *svp; if ((svp = hv_fetch(hv, "freezer", 7, FALSE))) @@ -784,6 +1112,17 @@ Data_Dumper_Dumpxs(href, ...) bless = *svp; if ((svp = hv_fetch(hv, "maxdepth", 8, FALSE))) maxdepth = SvIV(*svp); + if ((svp = hv_fetch(hv, "sortkeys", 8, FALSE))) { + sortkeys = *svp; + if (! SvTRUE(sortkeys)) + sortkeys = NULL; + else if (! (SvROK(sortkeys) && + SvTYPE(SvRV(sortkeys)) == SVt_PVCV) ) + { + /* flag to use qsortsv() for sorting hash keys */ + sortkeys = &PL_sv_yes; + } + } postav = newAV(); if (todumpav) @@ -793,19 +1132,22 @@ Data_Dumper_Dumpxs(href, ...) valstr = newSVpvn("",0); for (i = 0; i <= imax; ++i) { SV *newapad; - + av_clear(postav); if ((svp = av_fetch(todumpav, i, FALSE))) val = *svp; else val = &PL_sv_undef; - if ((svp = av_fetch(namesav, i, TRUE))) + if ((svp = av_fetch(namesav, i, TRUE))) { sv_setsv(name, *svp); + if (SvOK(*svp) && !SvPOK(*svp)) + (void)SvPV_nolen_const(name); + } else - SvOK_off(name); - - if (SvOK(name)) { - if ((SvPVX(name))[0] == '*') { + (void)SvOK_off(name); + + if (SvPOK(name)) { + if ((SvPVX_const(name))[0] == '*') { if (SvROK(val)) { switch (SvTYPE(SvRV(val))) { case SVt_PVAV: @@ -825,39 +1167,38 @@ Data_Dumper_Dumpxs(href, ...) else (SvPVX(name))[0] = '$'; } - else if ((SvPVX(name))[0] != '$') + else if ((SvPVX_const(name))[0] != '$') sv_insert(name, 0, 0, "$", 1); } else { - STRLEN nchars = 0; + STRLEN nchars; sv_setpvn(name, "$", 1); sv_catsv(name, varname); - (void) sprintf(tmpbuf, "%"IVdf, (IV)(i+1)); - nchars = strlen(tmpbuf); + nchars = my_snprintf(tmpbuf, sizeof(tmpbuf), "%"IVdf, (IV)(i+1)); sv_catpvn(name, tmpbuf, nchars); } - + if (indent >= 2) { - SV *tmpsv = sv_x(aTHX_ Nullsv, " ", 1, SvCUR(name)+3); + SV * const tmpsv = sv_x(aTHX_ NULL, " ", 1, SvCUR(name)+3); newapad = newSVsv(apad); sv_catsv(newapad, tmpsv); SvREFCNT_dec(tmpsv); } else newapad = apad; - - DD_dump(aTHX_ val, SvPVX(name), SvCUR(name), valstr, seenhv, - postav, &level, indent, pad, xpad, newapad, sep, + + DD_dump(aTHX_ val, SvPVX_const(name), SvCUR(name), valstr, seenhv, + postav, &level, indent, pad, xpad, newapad, sep, pair, freezer, toaster, purity, deepcopy, quotekeys, - bless, maxdepth); - + bless, maxdepth, sortkeys); + if (indent >= 2) SvREFCNT_dec(newapad); postlen = av_len(postav); if (postlen >= 0 || !terse) { sv_insert(valstr, 0, 0, " = ", 3); - sv_insert(valstr, 0, 0, SvPVX(name), SvCUR(name)); + sv_insert(valstr, 0, 0, SvPVX_const(name), SvCUR(name)); sv_catpvn(valstr, ";", 1); } sv_catsv(retval, pad);