This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Refactoring to Sv*_set() macros - patch #5
[perl5.git] / ext / Data / Dumper / Dumper.xs
index bc2b454..0626977 100644 (file)
@@ -3,34 +3,50 @@
 #include "perl.h"
 #include "XSUB.h"
 
-#ifndef PERL_VERSION
-#include "patchlevel.h"
-#define PERL_VERSION PATCHLEVEL
-#endif
-
-#if PERL_VERSION < 5
-#  ifndef PL_sv_undef
-#    define PL_sv_undef        sv_undef
-#  endif
-#  ifndef ERRSV
-#    define ERRSV      GvSV(errgv)
-#  endif
-#  ifndef newSVpvn
-#    define newSVpvn   newSVpv
-#  endif
-#endif
-
 static I32 num_q (char *s, STRLEN slen);
 static I32 esc_q (char *dest, char *src, STRLEN slen);
 static I32 esc_q_utf8 (pTHX_ SV *sv, char *src, STRLEN slen);
 static SV *sv_x (pTHX_ SV *sv, char *str, STRLEN len, I32 n);
 static I32 DD_dump (pTHX_ SV *val, char *name, STRLEN namelen, SV *retval,
                    HV *seenhv, AV *postav, I32 *levelp, I32 indent,
-                   SV *pad, SV *xpad, SV *apad, SV *sep,
+                   SV *pad, SV *xpad, SV *apad, SV *sep, SV *pair,
                    SV *freezer, SV *toaster,
                    I32 purity, I32 deepcopy, I32 quotekeys, SV *bless,
                    I32 maxdepth, SV *sortkeys);
 
+#if PERL_VERSION <= 6 /* Perl 5.6 and earlier */
+
+# ifdef EBCDIC
+#  define UNI_TO_NATIVE(ch) (((ch) > 255) ? (ch) : ASCII_TO_NATIVE(ch))
+# else
+#  define UNI_TO_NATIVE(ch) (ch)
+# endif
+
+UV
+Perl_utf8_to_uvchr(pTHX_ U8 *s, STRLEN *retlen)
+{
+    UV uv = utf8_to_uv(s, UTF8_MAXLEN, retlen,
+                    ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
+    return UNI_TO_NATIVE(uv);
+}
+
+# if !defined(PERL_IMPLICIT_CONTEXT)
+#  define utf8_to_uvchr             Perl_utf8_to_uvchr
+# else
+#  define utf8_to_uvchr(a,b) Perl_utf8_to_uvchr(aTHX_ a,b)
+# endif
+
+#endif /* PERL_VERSION <= 6 */
+
+/* Changes in 5.7 series mean that now IOK is only set if scalar is
+   precisely integer but in 5.6 and earlier we need to do a more
+   complex test  */
+#if PERL_VERSION <= 6
+#define DD_is_integer(sv) (SvIOK(sv) && (SvIsUV(val) ? SvUV(sv) == SvNV(sv) : SvIV(sv) == SvNV(sv)))
+#else
+#define DD_is_integer(sv) SvIOK(sv)
+#endif
+
 /* does a string need to be protected? */
 static I32
 needs_quote(register char *s)
@@ -136,7 +152,8 @@ esc_q_utf8(pTHX_ SV* sv, register char *src, register STRLEN slen)
     }
     if (grow) {
         /* We have something needing hex. 3 is ""\0 */
-        sv_grow(sv, cur+3+grow+2*qq_escapables+2*backslashes+normal);
+        sv_grow(sv, cur + 3 + grow + 2*backslashes + single_quotes
+               + 2*qq_escapables + normal);
         rstart = r = SvPVX(sv) + cur;
 
         *r++ = '"';
@@ -146,18 +163,28 @@ esc_q_utf8(pTHX_ SV* sv, register char *src, register STRLEN slen)
 
             if (k == '"' || k == '\\' || k == '$' || k == '@') {
                 *r++ = '\\';
-                *r++ = k;
+                *r++ = (char)k;
             }
             else if (k < 0x80)
-                *r++ = k;
+                *r++ = (char)k;
             else {
-                r += sprintf(r, "\\x{%"UVxf"}", k);
+             /* The return value of sprintf() is unportable.
+              * In modern systems it returns (int) the number of characters,
+              * but in older systems it might return (char*) the original
+              * buffer, or it might even be (void).  The easiest portable
+              * thing to do is probably use sprintf() in void context and
+              * then strlen(buffer) for the length.  The more proper way
+              * would of course be to figure out the prototype of sprintf.
+              * --jhi */
+               sprintf(r, "\\x{%"UVxf"}", k);
+                r += strlen(r);
             }
         }
         *r++ = '"';
     } else {
         /* Single quotes.  */
-        sv_grow(sv, cur+3+grow+2*single_quotes+2*backslashes+normal);
+        sv_grow(sv, cur + 3 + 2*backslashes + 2*single_quotes
+               + qq_escapables + normal);
         rstart = r = SvPVX(sv) + cur;
         *r++ = '\'';
         for (s = src; s < send; s ++) {
@@ -188,7 +215,7 @@ sv_x(pTHX_ SV *sv, register char *str, STRLEN len, I32 n)
        SvGROW(sv, len*n + SvCUR(sv) + 1);
        if (len == 1) {
            char *start = SvPVX(sv) + SvCUR(sv);
-           SvCUR(sv) += n;
+           SvCUR_set(sv, SvCUR(sv) + n);
            start[n] = '\0';
            while (n > 0)
                start[--n] = str[0];
@@ -210,7 +237,7 @@ sv_x(pTHX_ SV *sv, register char *str, STRLEN len, I32 n)
 static I32
 DD_dump(pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, HV *seenhv,
        AV *postav, I32 *levelp, I32 indent, SV *pad, SV *xpad,
-       SV *apad, SV *sep, SV *freezer, SV *toaster, I32 purity,
+       SV *apad, SV *sep, SV *pair, SV *freezer, SV *toaster, I32 purity,
        I32 deepcopy, I32 quotekeys, SV *bless, I32 maxdepth, SV *sortkeys)
 {
     char tmpbuf[128];
@@ -233,26 +260,25 @@ DD_dump(pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, HV *seenhv,
         mg_get(val);
     if (SvROK(val)) {
 
+        /* If a freeze method is provided and the object has it, call
+           it.  Warn on errors. */
        if (SvOBJECT(SvRV(val)) && freezer &&
-           SvPOK(freezer) && SvCUR(freezer))
+           SvPOK(freezer) && SvCUR(freezer) &&
+            gv_fetchmeth(SvSTASH(SvRV(val)), SvPVX(freezer), 
+                         SvCUR(freezer), -1) != NULL)
        {
            dSP; ENTER; SAVETMPS; PUSHMARK(sp);
            XPUSHs(val); PUTBACK;
-           i = perl_call_method(SvPVX(freezer), G_EVAL|G_SCALAR);
+           i = perl_call_method(SvPVX(freezer), G_EVAL|G_VOID);
            SPAGAIN;
            if (SvTRUE(ERRSV))
-               warn("WARNING(Freezer method call failed): %s",
-                    SvPVX(ERRSV));
-           else if (i)
-               val = newSVsv(POPs);
+               warn("WARNING(Freezer method call failed): %"SVf"", ERRSV);
            PUTBACK; FREETMPS; LEAVE;
-           if (i)
-               (void)sv_2mortal(val);
        }
        
        ival = SvRV(val);
        realtype = SvTYPE(ival);
-        (void) sprintf(id, "0x%lx", (unsigned long)ival);
+        (void) sprintf(id, "0x%"UVxf, PTR2UV(ival));
        idlen = strlen(id);
        if (SvOBJECT(ival))
            realpack = HvNAME(SvSTASH(ival));
@@ -326,7 +352,7 @@ DD_dump(pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, HV *seenhv,
                (void)SvREFCNT_inc(val);
                av_push(seenentry, val);
                (void)hv_store(seenhv, id, strlen(id),
-                              newRV((SV*)seenentry), 0);
+                              newRV_inc((SV*)seenentry), 0);
                SvREFCNT_dec(seenentry);
            }
        }
@@ -384,7 +410,7 @@ DD_dump(pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, HV *seenhv,
            if (realpack) {                                  /* blessed */
                sv_catpvn(retval, "do{\\(my $o = ", 13);
                DD_dump(aTHX_ ival, SvPVX(namesv), SvCUR(namesv), retval, seenhv,
-                       postav, levelp, indent, pad, xpad, apad, sep,
+                       postav, levelp, indent, pad, xpad, apad, sep, pair,
                        freezer, toaster, purity, deepcopy, quotekeys, bless,
                        maxdepth, sortkeys);
                sv_catpvn(retval, ")}", 2);
@@ -392,7 +418,7 @@ DD_dump(pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, HV *seenhv,
            else {
                sv_catpvn(retval, "\\", 1);
                DD_dump(aTHX_ ival, SvPVX(namesv), SvCUR(namesv), retval, seenhv,
-                       postav, levelp, indent, pad, xpad, apad, sep,
+                       postav, levelp, indent, pad, xpad, apad, sep, pair,
                        freezer, toaster, purity, deepcopy, quotekeys, bless,
                        maxdepth, sortkeys);
            }
@@ -404,7 +430,7 @@ DD_dump(pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, HV *seenhv,
            sv_catpvn(namesv, "}", 1);
            sv_catpvn(retval, "\\", 1);
            DD_dump(aTHX_ ival, SvPVX(namesv), SvCUR(namesv), retval, seenhv,
-                   postav, levelp,     indent, pad, xpad, apad, sep,
+                   postav, levelp,     indent, pad, xpad, apad, sep, pair,
                    freezer, toaster, purity, deepcopy, quotekeys, bless,
                    maxdepth, sortkeys);
            SvREFCNT_dec(namesv);
@@ -473,7 +499,7 @@ DD_dump(pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, HV *seenhv,
                sv_catsv(retval, totpad);
                sv_catsv(retval, ipad);
                DD_dump(aTHX_ elem, iname, ilen, retval, seenhv, postav,
-                       levelp, indent, pad, xpad, apad, sep,
+                       levelp, indent, pad, xpad, apad, sep, pair,
                        freezer, toaster, purity, deepcopy, quotekeys, bless,
                        maxdepth, sortkeys);
                if (ix < ixmax)
@@ -533,6 +559,9 @@ DD_dump(pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, HV *seenhv,
            /* If requested, get a sorted/filtered array of hash keys */
            if (sortkeys) {
                if (sortkeys == &PL_sv_yes) {
+#if PERL_VERSION < 8
+                    sortkeys = sv_2mortal(newSVpvn("Data::Dumper::_sortkeys", 23));
+#else
                    keys = newAV();
                    (void)hv_iterinit((HV*)ival);
                    while ((entry = hv_iternext((HV*)ival))) {
@@ -540,17 +569,18 @@ DD_dump(pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, HV *seenhv,
                        SvREFCNT_inc(sv);
                        av_push(keys, sv);
                    }
-#ifdef USE_LOCALE_NUMERIC
+# ifdef USE_LOCALE_NUMERIC
                    sortsv(AvARRAY(keys), 
                           av_len(keys)+1, 
                           IN_LOCALE ? Perl_sv_cmp_locale : Perl_sv_cmp);
-#else
+# else
                    sortsv(AvARRAY(keys), 
                           av_len(keys)+1, 
                           Perl_sv_cmp);
+# endif
 #endif
                }
-               else {
+               if (sortkeys != &PL_sv_yes) {
                    dSP; ENTER; SAVETMPS; PUSHMARK(sp);
                    XPUSHs(sv_2mortal(newRV_inc(ival))); PUTBACK;
                    i = perl_call_sv(sortkeys, G_SCALAR | G_EVAL);
@@ -572,13 +602,15 @@ DD_dump(pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, HV *seenhv,
 
             /* foreach (keys %hash) */
             for (i = 0; 1; i++) {
-               char *nkey = NULL;
+               char *nkey;
+                char *nkey_buffer = NULL;
                I32 nticks = 0;
                SV* keysv;
                STRLEN keylen;
+                I32 nlen;
                bool do_utf8 = FALSE;
 
-                if ((sortkeys && !(keys && i <= av_len(keys))) ||
+                if ((sortkeys && !(keys && (I32)i <= av_len(keys))) ||
                     !(entry = hv_iternext((HV *)ival)))
                     break;
 
@@ -590,7 +622,8 @@ DD_dump(pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, HV *seenhv,
                    svp = av_fetch(keys, i, FALSE);
                    keysv = svp ? *svp : sv_mortalcopy(&PL_sv_undef);
                    key = SvPV(keysv, keylen);
-                   svp = hv_fetch((HV*)ival, key, keylen, 0);
+                   svp = hv_fetch((HV*)ival, key,
+                                   SvUTF8(keysv) ? -(I32)keylen : keylen, 0);
                    hval = svp ? *svp : sv_mortalcopy(&PL_sv_undef);
                }
                else {
@@ -602,22 +635,39 @@ DD_dump(pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, HV *seenhv,
                key = SvPV(keysv, keylen);
                klen = keylen;
 
-               if (do_utf8) {
-                   char *okey = SvPVX(retval) + SvCUR(retval);
-                   I32 nlen;
-
-                   sv_catsv(retval, totpad);
-                   sv_catsv(retval, ipad);
-                   nlen = esc_q_utf8(aTHX_ retval, key, klen);
+                sv_catsv(retval, totpad);
+                sv_catsv(retval, ipad);
+                /* old logic was first to check utf8 flag, and if utf8 always
+                   call esc_q_utf8.  This caused test to break under -Mutf8,
+                   because there even strings like 'c' have utf8 flag on.
+                   Hence with quotekeys == 0 the XS code would still '' quote
+                   them based on flags, whereas the perl code would not,
+                   based on regexps.
+                   The perl code is correct.
+                   needs_quote() decides that anything that isn't a valid
+                   perl identifier needs to be quoted, hence only correctly
+                   formed strings with no characters outside [A-Za-z0-9_:]
+                   won't need quoting.  None of those characters are used in
+                   the byte encoding of utf8, so anything with utf8
+                   encoded characters in will need quoting. Hence strings
+                   with utf8 encoded characters in will end up inside do_utf8
+                   just like before, but now strings with utf8 flag set but
+                   only ascii characters will end up in the unquoted section.
 
-                   sname = newSVsv(iname);
-                   sv_catpvn(sname, okey, nlen);
-                   sv_catpvn(sname, "}", 1);
-               }
-               else {
-                   if (quotekeys || needs_quote(key)) {
+                   There should also be less tests for the (probably currently)
+                   more common doesn't need quoting case.
+                   The code is also smaller (22044 vs 22260) because I've been
+                   able to pull the common logic out to both sides.  */
+                if (quotekeys || needs_quote(key)) {
+                    if (do_utf8) {
+                        STRLEN ocur = SvCUR(retval);
+                        nlen = esc_q_utf8(aTHX_ retval, key, klen);
+                        nkey = SvPVX(retval) + ocur;
+                    }
+                    else {
                        nticks = num_q(key, klen);
-                       New(0, nkey, klen+nticks+3, char);
+                       New(0, nkey_buffer, klen+nticks+3, char);
+                        nkey = nkey_buffer;
                        nkey[0] = '\'';
                        if (nticks)
                            klen += esc_q(nkey+1, key, klen);
@@ -625,21 +675,20 @@ DD_dump(pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, HV *seenhv,
                            (void)Copy(key, nkey+1, klen, char);
                        nkey[++klen] = '\'';
                        nkey[++klen] = '\0';
+                        nlen = klen;
+                        sv_catpvn(retval, nkey, klen);
                    }
-                   else {
-                       New(0, nkey, klen, char);
-                       (void)Copy(key, nkey, klen, char);
-                   }
-
-                   sname = newSVsv(iname);
-                   sv_catpvn(sname, nkey, klen);
-                   sv_catpvn(sname, "}", 1);
-
-                   sv_catsv(retval, totpad);
-                   sv_catsv(retval, ipad);
-                   sv_catpvn(retval, nkey, klen);
+                }
+                else {
+                    nkey = key;
+                    nlen = klen;
+                    sv_catpvn(retval, nkey, klen);
                }
-               sv_catpvn(retval, " => ", 4);
+                sname = newSVsv(iname);
+                sv_catpvn(sname, nkey, nlen);
+                sv_catpvn(sname, "}", 1);
+
+               sv_catsv(retval, pair);
                if (indent >= 2) {
                    char *extra;
                    I32 elen = 0;
@@ -655,11 +704,11 @@ DD_dump(pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, HV *seenhv,
                    newapad = apad;
 
                DD_dump(aTHX_ hval, SvPVX(sname), SvCUR(sname), retval, seenhv,
-                       postav, levelp, indent, pad, xpad, newapad, sep,
+                       postav, levelp, indent, pad, xpad, newapad, sep, pair,
                        freezer, toaster, purity, deepcopy, quotekeys, bless,
                        maxdepth, sortkeys);
                SvREFCNT_dec(sname);
-               Safefree(nkey);
+               Safefree(nkey_buffer);
                if (indent >= 2)
                    SvREFCNT_dec(newapad);
            }
@@ -706,7 +755,7 @@ DD_dump(pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, HV *seenhv,
        STRLEN i;
        
        if (namelen) {
-           (void) sprintf(id, "0x%lx", (unsigned long)val);
+           (void) sprintf(id, "0x%"UVxf, PTR2UV(val));
            if ((svp = hv_fetch(seenhv, id, (idlen = strlen(id)), FALSE)) &&
                (sv = *svp) && SvROK(sv) &&
                (seenentry = (AV*)SvRV(sv)))
@@ -721,30 +770,25 @@ DD_dump(pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, HV *seenhv,
                    return 1;
                }
            }
-           else {
+           else if (val != &PL_sv_undef) {
                SV *namesv;
                namesv = newSVpvn("\\", 1);
                sv_catpvn(namesv, name, namelen);
                seenentry = newAV();
                av_push(seenentry, namesv);
-               av_push(seenentry, newRV(val));
-               (void)hv_store(seenhv, id, strlen(id), newRV((SV*)seenentry), 0);
+               av_push(seenentry, newRV_inc(val));
+               (void)hv_store(seenhv, id, strlen(id), newRV_inc((SV*)seenentry), 0);
                SvREFCNT_dec(seenentry);
            }
        }
 
-       if (SvIOK(val)) {
+        if (DD_is_integer(val)) {
             STRLEN len;
            if (SvIsUV(val))
              (void) sprintf(tmpbuf, "%"UVuf, SvUV(val));
            else
              (void) sprintf(tmpbuf, "%"IVdf, SvIV(val));
             len = strlen(tmpbuf);
-            /* For 5.6.x and earlier will need to change this test to check
-               NV if NOK, as there NOK trumps IOK, and NV=3.5,IV=3 is valid.
-               Current code will Dump that as $VAR1 = 3;
-               Changes in 5.7 series mean that now IOK is only set if scalar
-               is precisely integer.  */
             if (SvPOK(val)) {
               /* Need to check to see if this is a string such as " 0".
                  I'm assuming from sprintf isn't going to clash with utf8.
@@ -809,15 +853,15 @@ DD_dump(pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, HV *seenhv,
                        sv_catpvn(nname, entries[j], sizes[j]);
                        sv_catpvn(postentry, " = ", 3);
                        av_push(postav, postentry);
-                       e = newRV(e);
+                       e = newRV_inc(e);
                        
-                       SvCUR(newapad) = 0;
+                       SvCUR_set(newapad, 0);
                        if (indent >= 2)
                            (void)sv_x(aTHX_ newapad, " ", 1, SvCUR(postentry));
                        
                        DD_dump(aTHX_ e, SvPVX(nname), SvCUR(nname), postentry,
                                seenhv, postav, &nlevel, indent, pad, xpad,
-                               newapad, sep, freezer, toaster, purity,
+                               newapad, sep, pair, freezer, toaster, purity,
                                deepcopy, quotekeys, bless, maxdepth, 
                                sortkeys);
                        SvREFCNT_dec(e);
@@ -882,7 +926,7 @@ Data_Dumper_Dumpxs(href, ...)
            I32 level = 0;
            I32 indent, terse, i, imax, postlen;
            SV **svp;
-           SV *val, *name, *pad, *xpad, *apad, *sep, *varname;
+           SV *val, *name, *pad, *xpad, *apad, *sep, *pair, *varname;
            SV *freezer, *toaster, *bless, *sortkeys;
            I32 purity, deepcopy, quotekeys, maxdepth = 0;
            char tmpbuf[1024];
@@ -915,7 +959,7 @@ Data_Dumper_Dumpxs(href, ...)
 
            todumpav = namesav = Nullav;
            seenhv = Nullhv;
-           val = pad = xpad = apad = sep = varname
+           val = pad = xpad = apad = sep = pair = varname
                = freezer = toaster = bless = &PL_sv_undef;
            name = sv_newmortal();
            indent = 2;
@@ -951,6 +995,8 @@ Data_Dumper_Dumpxs(href, ...)
                    apad = *svp;
                if ((svp = hv_fetch(hv, "sep", 3, FALSE)))
                    sep = *svp;
+               if ((svp = hv_fetch(hv, "pair", 4, FALSE)))
+                   pair = *svp;
                if ((svp = hv_fetch(hv, "varname", 7, FALSE)))
                    varname = *svp;
                if ((svp = hv_fetch(hv, "freezer", 7, FALSE)))
@@ -1039,7 +1085,7 @@ Data_Dumper_Dumpxs(href, ...)
                        newapad = apad;
                
                    DD_dump(aTHX_ val, SvPVX(name), SvCUR(name), valstr, seenhv,
-                           postav, &level, indent, pad, xpad, newapad, sep,
+                           postav, &level, indent, pad, xpad, newapad, sep, pair,
                            freezer, toaster, purity, deepcopy, quotekeys,
                            bless, maxdepth, sortkeys);