This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
make Dumper.xs work with 5.10.x
[perl5.git] / ext / Data / Dumper / Dumper.xs
index bb606f4..b39b77a 100644 (file)
@@ -2,37 +2,67 @@
 #include "EXTERN.h"
 #include "perl.h"
 #include "XSUB.h"
-
-#ifndef PERL_VERSION
-#include "patchlevel.h"
-#define PERL_VERSION PATCHLEVEL
+#ifdef USE_PPPORT_H
+#  define NEED_my_snprintf
+#  include "ppport.h"
 #endif
 
-#if PERL_VERSION < 5
-#  ifndef PL_sv_undef
-#    define PL_sv_undef        sv_undef
-#  endif
-#  ifndef ERRSV
-#    define ERRSV      GvSV(errgv)
-#  endif
-#  ifndef newSVpvn
-#    define newSVpvn   newSVpv
-#  endif
+#if PERL_VERSION < 6
+#  define DD_USE_OLD_ID_FORMAT
 #endif
 
-static I32 num_q (char *s, STRLEN slen);
-static I32 esc_q (char *dest, char *src, STRLEN slen);
-static SV *sv_x (pTHX_ SV *sv, char *str, STRLEN len, I32 n);
-static I32 DD_dump (pTHX_ SV *val, char *name, STRLEN namelen, SV *retval,
+static I32 num_q (const char *s, STRLEN slen);
+static I32 esc_q (char *dest, const char *src, STRLEN slen);
+static I32 esc_q_utf8 (pTHX_ SV *sv, const char *src, STRLEN slen);
+static I32 needs_quote(register const char *s);
+static SV *sv_x (pTHX_ SV *sv, const char *str, STRLEN len, I32 n);
+static I32 DD_dump (pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval,
                    HV *seenhv, AV *postav, I32 *levelp, I32 indent,
-                   SV *pad, SV *xpad, SV *apad, SV *sep,
+                   SV *pad, SV *xpad, SV *apad, SV *sep, SV *pair,
                    SV *freezer, SV *toaster,
                    I32 purity, I32 deepcopy, I32 quotekeys, SV *bless,
-                   I32 maxdepth);
+                   I32 maxdepth, SV *sortkeys);
+
+#ifndef HvNAME_get
+#define HvNAME_get HvNAME
+#endif
+
+#if PERL_VERSION <= 6 /* Perl 5.6 and earlier */
+
+# ifdef EBCDIC
+#  define UNI_TO_NATIVE(ch) (((ch) > 255) ? (ch) : ASCII_TO_NATIVE(ch))
+# else
+#  define UNI_TO_NATIVE(ch) (ch)
+# endif
+
+UV
+Perl_utf8_to_uvchr(pTHX_ U8 *s, STRLEN *retlen)
+{
+    const UV uv = utf8_to_uv(s, UTF8_MAXLEN, retlen,
+                    ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
+    return UNI_TO_NATIVE(uv);
+}
+
+# if !defined(PERL_IMPLICIT_CONTEXT)
+#  define utf8_to_uvchr             Perl_utf8_to_uvchr
+# else
+#  define utf8_to_uvchr(a,b) Perl_utf8_to_uvchr(aTHX_ a,b)
+# endif
+
+#endif /* PERL_VERSION <= 6 */
+
+/* Changes in 5.7 series mean that now IOK is only set if scalar is
+   precisely integer but in 5.6 and earlier we need to do a more
+   complex test  */
+#if PERL_VERSION <= 6
+#define DD_is_integer(sv) (SvIOK(sv) && (SvIsUV(val) ? SvUV(sv) == SvNV(sv) : SvIV(sv) == SvNV(sv)))
+#else
+#define DD_is_integer(sv) SvIOK(sv)
+#endif
 
 /* does a string need to be protected? */
 static I32
-needs_quote(register char *s)
+needs_quote(register const char *s)
 {
 TOP:
     if (s[0] == ':') {
@@ -52,14 +82,14 @@ TOP:
                    return 1;
            }
     }
-    else 
+    else
        return 1;
     return 0;
 }
 
 /* count the number of "'"s and "\"s in string */
 static I32
-num_q(register char *s, register STRLEN slen)
+num_q(register const char *s, register STRLEN slen)
 {
     register I32 ret = 0;
 
@@ -77,10 +107,10 @@ num_q(register char *s, register STRLEN slen)
 /* slen number of characters in s will be escaped */
 /* destination must be long enough for additional chars */
 static I32
-esc_q(register char *d, register char *s, register STRLEN slen)
+esc_q(register char *d, register const char *s, register STRLEN slen)
 {
     register I32 ret = 0;
-    
+
     while (slen > 0) {
        switch (*s) {
        case '\'':
@@ -96,20 +126,119 @@ esc_q(register char *d, register char *s, register STRLEN slen)
     return ret;
 }
 
+static I32
+esc_q_utf8(pTHX_ SV* sv, register const char *src, register STRLEN slen)
+{
+    char *r, *rstart;
+    const char *s = src;
+    const char * const send = src + slen;
+    STRLEN j, cur = SvCUR(sv);
+    /* Could count 128-255 and 256+ in two variables, if we want to
+       be like &qquote and make a distinction.  */
+    STRLEN grow = 0;   /* bytes needed to represent chars 128+ */
+    /* STRLEN topbit_grow = 0; bytes needed to represent chars 128-255 */
+    STRLEN backslashes = 0;
+    STRLEN single_quotes = 0;
+    STRLEN qq_escapables = 0;  /* " $ @ will need a \ in "" strings.  */
+    STRLEN normal = 0;
+
+    /* this will need EBCDICification */
+    for (s = src; s < send; s += UTF8SKIP(s)) {
+        const UV k = utf8_to_uvchr((U8*)s, NULL);
+
+#ifdef EBCDIC
+       if (!isprint(k) || k > 256) {
+#else
+       if (k > 127) {
+#endif
+            /* 4: \x{} then count the number of hex digits.  */
+            grow += 4 + (k <= 0xFF ? 2 : k <= 0xFFF ? 3 : k <= 0xFFFF ? 4 :
+#if UVSIZE == 4
+                8 /* We may allocate a bit more than the minimum here.  */
+#else
+                k <= 0xFFFFFFFF ? 8 : UVSIZE * 4
+#endif
+                );
+        } else if (k == '\\') {
+            backslashes++;
+        } else if (k == '\'') {
+            single_quotes++;
+        } else if (k == '"' || k == '$' || k == '@') {
+            qq_escapables++;
+        } else {
+            normal++;
+        }
+    }
+    if (grow) {
+        /* We have something needing hex. 3 is ""\0 */
+        sv_grow(sv, cur + 3 + grow + 2*backslashes + single_quotes
+               + 2*qq_escapables + normal);
+        rstart = r = SvPVX(sv) + cur;
+
+        *r++ = '"';
+
+        for (s = src; s < send; s += UTF8SKIP(s)) {
+            const UV k = utf8_to_uvchr((U8*)s, NULL);
+
+            if (k == '"' || k == '\\' || k == '$' || k == '@') {
+                *r++ = '\\';
+                *r++ = (char)k;
+            }
+            else
+#ifdef EBCDIC
+             if (isprint(k) && k < 256)
+#else
+             if (k < 0x80)
+#endif
+                *r++ = (char)k;
+            else {
+#if PERL_VERSION < 10
+                sprintf(r, "\\x{%"UVxf"}", k);
+                r += strlen(r);
+                /* my_sprintf is not supported by ppport.h */
+#else
+                r = r + my_sprintf(r, "\\x{%"UVxf"}", k);
+#endif
+            }
+        }
+        *r++ = '"';
+    } else {
+        /* Single quotes.  */
+        sv_grow(sv, cur + 3 + 2*backslashes + 2*single_quotes
+               + qq_escapables + normal);
+        rstart = r = SvPVX(sv) + cur;
+        *r++ = '\'';
+        for (s = src; s < send; s ++) {
+            const char k = *s;
+            if (k == '\'' || k == '\\')
+                *r++ = '\\';
+            *r++ = k;
+        }
+        *r++ = '\'';
+    }
+    *r = '\0';
+    j = r - rstart;
+    SvCUR_set(sv, cur + j);
+
+    return j;
+}
+
 /* append a repeated string to an SV */
 static SV *
-sv_x(pTHX_ SV *sv, register char *str, STRLEN len, I32 n)
+sv_x(pTHX_ SV *sv, const char *str, STRLEN len, I32 n)
 {
-    if (sv == Nullsv)
+    if (!sv)
        sv = newSVpvn("", 0);
+#ifdef DEBUGGING
     else
        assert(SvTYPE(sv) >= SVt_PV);
+#endif
 
     if (n > 0) {
        SvGROW(sv, len*n + SvCUR(sv) + 1);
        if (len == 1) {
-           char *start = SvPVX(sv) + SvCUR(sv);
-           SvCUR(sv) += n;
+           char * const start = SvPVX(sv) + SvCUR(sv);
+           SvCUR_set(sv, SvCUR(sv) + n);
            start[n] = '\0';
            while (n > 0)
                start[--n] = str[0];
@@ -129,59 +258,78 @@ sv_x(pTHX_ SV *sv, register char *str, STRLEN len, I32 n)
  * efficiency raisins.)  Ugggh!
  */
 static I32
-DD_dump(pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, HV *seenhv,
+DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
        AV *postav, I32 *levelp, I32 indent, SV *pad, SV *xpad,
-       SV *apad, SV *sep, SV *freezer, SV *toaster, I32 purity,
-       I32 deepcopy, I32 quotekeys, SV *bless, I32 maxdepth)
+       SV *apad, SV *sep, SV *pair, SV *freezer, SV *toaster, I32 purity,
+       I32 deepcopy, I32 quotekeys, SV *bless, I32 maxdepth, SV *sortkeys)
 {
     char tmpbuf[128];
     U32 i;
-    char *c, *r, *realpack, id[128];
+    char *c, *r, *realpack;
+#ifdef DD_USE_OLD_ID_FORMAT
+    char id[128];
+#else
+    UV id_buffer;
+    char *const id = (char *)&id_buffer;
+#endif
     SV **svp;
     SV *sv, *ipad, *ival;
     SV *blesspad = Nullsv;
-    AV *seenentry = Nullav;
+    AV *seenentry = NULL;
     char *iname;
     STRLEN inamelen, idlen = 0;
-    U32 flags;
     U32 realtype;
+    bool no_bless = 0; /* when a qr// is blessed into Regexp we dont want to bless it.
+                          in later perls we should actually check the classname of the 
+                          engine. this gets tricky as it involves lexical issues that arent so
+                          easy to resolve */
+    bool is_regex = 0; /* we are dumping a regex, we need to know this before we bless */
 
     if (!val)
        return 0;
 
-    flags = SvFLAGS(val);
+    /* If the ouput buffer has less than some arbitary amount of space
+       remaining, then enlarge it. For the test case (25M of output),
+       *1.1 was slower, *2.0 was the same, so the first guess of 1.5 is
+       deemed to be good enough.  */
+    if (SvTYPE(retval) >= SVt_PV && (SvLEN(retval) - SvCUR(retval)) < 42) {
+       sv_grow(retval, SvCUR(retval) * 3 / 2);
+    }
+
     realtype = SvTYPE(val);
-    
+
     if (SvGMAGICAL(val))
         mg_get(val);
     if (SvROK(val)) {
 
+        /* If a freeze method is provided and the object has it, call
+           it.  Warn on errors. */
        if (SvOBJECT(SvRV(val)) && freezer &&
-           SvPOK(freezer) && SvCUR(freezer))
+           SvPOK(freezer) && SvCUR(freezer) &&
+            gv_fetchmeth(SvSTASH(SvRV(val)), SvPVX_const(freezer), 
+                         SvCUR(freezer), -1) != NULL)
        {
            dSP; ENTER; SAVETMPS; PUSHMARK(sp);
            XPUSHs(val); PUTBACK;
-           i = perl_call_method(SvPVX(freezer), G_EVAL|G_SCALAR);
+           i = perl_call_method(SvPVX_const(freezer), G_EVAL|G_VOID);
            SPAGAIN;
            if (SvTRUE(ERRSV))
-               warn("WARNING(Freezer method call failed): %s",
-                    SvPVX(ERRSV));
-           else if (i)
-               val = newSVsv(POPs);
+               warn("WARNING(Freezer method call failed): %"SVf"", ERRSV);
            PUTBACK; FREETMPS; LEAVE;
-           if (i)
-               (void)sv_2mortal(val);
        }
        
        ival = SvRV(val);
-       flags = SvFLAGS(ival);
        realtype = SvTYPE(ival);
-        (void) sprintf(id, "0x%lx", (unsigned long)ival);
-       idlen = strlen(id);
+#ifdef DD_USE_OLD_ID_FORMAT
+        idlen = my_snprintf(id, sizeof(id), "0x%"UVxf, PTR2UV(ival));
+#else
+       id_buffer = PTR2UV(ival);
+       idlen = sizeof(id_buffer);
+#endif
        if (SvOBJECT(ival))
-           realpack = HvNAME(SvSTASH(ival));
+           realpack = HvNAME_get(SvSTASH(ival));
        else
-           realpack = Nullch;
+           realpack = NULL;
 
        /* if it has a name, we need to either look it up, or keep a tab
         * on it so we know when we hit it later
@@ -210,9 +358,9 @@ DD_dump(pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, HV *seenhv,
                    }
                    else {
                        if (name[0] == '@' || name[0] == '%') {
-                           if ((SvPVX(othername))[0] == '\\' &&
-                               (SvPVX(othername))[1] == name[0]) {
-                               sv_catpvn(retval, SvPVX(othername)+1,
+                           if ((SvPVX_const(othername))[0] == '\\' &&
+                               (SvPVX_const(othername))[1] == name[0]) {
+                               sv_catpvn(retval, SvPVX_const(othername)+1,
                                          SvCUR(othername)-1);
                            }
                            else {
@@ -228,7 +376,11 @@ DD_dump(pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, HV *seenhv,
                    return 1;
                }
                else {
+#ifdef DD_USE_OLD_ID_FORMAT
                    warn("ref name not found for %s", id);
+#else
+                   warn("ref name not found for 0x%"UVxf, PTR2UV(ival));
+#endif
                    return 0;
                }
            }
@@ -249,46 +401,46 @@ DD_dump(pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, HV *seenhv,
                av_push(seenentry, namesv);
                (void)SvREFCNT_inc(val);
                av_push(seenentry, val);
-               (void)hv_store(seenhv, id, strlen(id),
-                              newRV((SV*)seenentry), 0);
+               (void)hv_store(seenhv, id, idlen,
+                              newRV_inc((SV*)seenentry), 0);
                SvREFCNT_dec(seenentry);
            }
        }
-
-       if (realpack && *realpack == 'R' && strEQ(realpack, "Regexp")) {
-           STRLEN rlen;
-           char *rval = SvPV(val, rlen);
-           char *slash = strchr(rval, '/');
-           sv_catpvn(retval, "qr/", 3);
-           while (slash) {
-               sv_catpvn(retval, rval, slash-rval);
-               sv_catpvn(retval, "\\/", 2);
-               rlen -= slash-rval+1;
-               rval = slash+1;
-               slash = strchr(rval, '/');
-           }
-           sv_catpvn(retval, rval, rlen);
-           sv_catpvn(retval, "/", 1);
-           return 1;
-       }
+        /* regexps dont have to be blessed into package "Regexp"
+         * they can be blessed into any package. 
+         */
+#if PERL_VERSION < 8
+       if (realpack && *realpack == 'R' && strEQ(realpack, "Regexp")) 
+#elif PERL_VERSION < 11
+        if (realpack && realtype == SVt_PVMG && mg_find(ival, PERL_MAGIC_qr))
+#else        
+        if (realpack && realtype == SVt_REGEXP) 
+#endif
+        {
+            is_regex = 1;
+            if (strEQ(realpack, "Regexp")) 
+                no_bless = 1;
+            else
+                no_bless = 0;
+        }
 
        /* If purity is not set and maxdepth is set, then check depth:
         * if we have reached maximum depth, return the string
         * representation of the thing we are currently examining
-        * at this depth (i.e., 'Foo=ARRAY(0xdeadbeef)'). 
+        * at this depth (i.e., 'Foo=ARRAY(0xdeadbeef)').
         */
        if (!purity && maxdepth > 0 && *levelp >= maxdepth) {
            STRLEN vallen;
-           char *valstr = SvPV(val,vallen);
+           const char * const valstr = SvPV(val,vallen);
            sv_catpvn(retval, "'", 1);
            sv_catpvn(retval, valstr, vallen);
            sv_catpvn(retval, "'", 1);
            return 1;
        }
 
-       if (realpack) {                         /* we have a blessed ref */
+       if (realpack && !no_bless) {                            /* we have a blessed ref */
            STRLEN blesslen;
-           char *blessstr = SvPV(bless, blesslen);
+           const char * const blessstr = SvPV(bless, blesslen);
            sv_catpvn(retval, blessstr, blesslen);
            sv_catpvn(retval, "( ", 2);
            if (indent >= 2) {
@@ -299,46 +451,68 @@ DD_dump(pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, HV *seenhv,
        }
 
        (*levelp)++;
-       ipad = sv_x(aTHX_ Nullsv, SvPVX(xpad), SvCUR(xpad), *levelp);
+       ipad = sv_x(aTHX_ Nullsv, SvPVX_const(xpad), SvCUR(xpad), *levelp);
 
-       if (realtype <= SVt_PVBM) {                          /* scalar ref */
-           SV *namesv = newSVpvn("${", 2);
+        if (is_regex) 
+        {
+            STRLEN rlen;
+           const char *rval = SvPV(val, rlen);
+           const char *slash = strchr(rval, '/');
+           sv_catpvn(retval, "qr/", 3);
+           while (slash) {
+               sv_catpvn(retval, rval, slash-rval);
+               sv_catpvn(retval, "\\/", 2);
+               rlen -= slash-rval+1;
+               rval = slash+1;
+               slash = strchr(rval, '/');
+           }
+           sv_catpvn(retval, rval, rlen);
+           sv_catpvn(retval, "/", 1);
+       } 
+        else if (
+#if PERL_VERSION < 9
+               realtype <= SVt_PVBM
+#else
+               realtype <= SVt_PVMG
+#endif
+       ) {                          /* scalar ref */
+           SV * const namesv = newSVpvn("${", 2);
            sv_catpvn(namesv, name, namelen);
            sv_catpvn(namesv, "}", 1);
-           if (realpack) {                                  /* blessed */ 
+           if (realpack) {                                  /* blessed */
                sv_catpvn(retval, "do{\\(my $o = ", 13);
-               DD_dump(aTHX_ ival, SvPVX(namesv), SvCUR(namesv), retval, seenhv,
-                       postav, levelp, indent, pad, xpad, apad, sep,
+               DD_dump(aTHX_ ival, SvPVX_const(namesv), SvCUR(namesv), retval, seenhv,
+                       postav, levelp, indent, pad, xpad, apad, sep, pair,
                        freezer, toaster, purity, deepcopy, quotekeys, bless,
-                       maxdepth);
+                       maxdepth, sortkeys);
                sv_catpvn(retval, ")}", 2);
            }                                                /* plain */
            else {
                sv_catpvn(retval, "\\", 1);
-               DD_dump(aTHX_ ival, SvPVX(namesv), SvCUR(namesv), retval, seenhv,
-                       postav, levelp, indent, pad, xpad, apad, sep,
+               DD_dump(aTHX_ ival, SvPVX_const(namesv), SvCUR(namesv), retval, seenhv,
+                       postav, levelp, indent, pad, xpad, apad, sep, pair,
                        freezer, toaster, purity, deepcopy, quotekeys, bless,
-                       maxdepth);
+                       maxdepth, sortkeys);
            }
            SvREFCNT_dec(namesv);
        }
        else if (realtype == SVt_PVGV) {                     /* glob ref */
-           SV *namesv = newSVpvn("*{", 2);
+           SV * const namesv = newSVpvn("*{", 2);
            sv_catpvn(namesv, name, namelen);
            sv_catpvn(namesv, "}", 1);
            sv_catpvn(retval, "\\", 1);
-           DD_dump(aTHX_ ival, SvPVX(namesv), SvCUR(namesv), retval, seenhv,
-                   postav, levelp,     indent, pad, xpad, apad, sep,
+           DD_dump(aTHX_ ival, SvPVX_const(namesv), SvCUR(namesv), retval, seenhv,
+                   postav, levelp,     indent, pad, xpad, apad, sep, pair,
                    freezer, toaster, purity, deepcopy, quotekeys, bless,
-                   maxdepth);
+                   maxdepth, sortkeys);
            SvREFCNT_dec(namesv);
        }
        else if (realtype == SVt_PVAV) {
            SV *totpad;
            I32 ix = 0;
-           I32 ixmax = av_len((AV *)ival);
-           
-           SV *ixsv = newSViv(0);
+           const I32 ixmax = av_len((AV *)ival);
+       
+           SV * const ixsv = newSViv(0);
            /* allowing for a 24 char wide array index */
            New(0, iname, namelen+28, char);
            (void)strcpy(iname, name);
@@ -385,8 +559,12 @@ DD_dump(pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, HV *seenhv,
                
                ilen = inamelen;
                sv_setiv(ixsv, ix);
+#if PERL_VERSION < 10
                 (void) sprintf(iname+ilen, "%"IVdf, (IV)ix);
                ilen = strlen(iname);
+#else
+                ilen = ilen + my_sprintf(iname+ilen, "%"IVdf, (IV)ix);
+#endif
                iname[ilen++] = ']'; iname[ilen] = '\0';
                if (indent >= 3) {
                    sv_catsv(retval, totpad);
@@ -397,14 +575,14 @@ DD_dump(pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, HV *seenhv,
                sv_catsv(retval, totpad);
                sv_catsv(retval, ipad);
                DD_dump(aTHX_ elem, iname, ilen, retval, seenhv, postav,
-                       levelp, indent, pad, xpad, apad, sep,
+                       levelp, indent, pad, xpad, apad, sep, pair,
                        freezer, toaster, purity, deepcopy, quotekeys, bless,
-                       maxdepth);
+                       maxdepth, sortkeys);
                if (ix < ixmax)
                    sv_catpvn(retval, ",", 1);
            }
            if (ixmax >= 0) {
-               SV *opad = sv_x(aTHX_ Nullsv, SvPVX(xpad), SvCUR(xpad), (*levelp)-1);
+               SV * const opad = sv_x(aTHX_ Nullsv, SvPVX_const(xpad), SvCUR(xpad), (*levelp)-1);
                sv_catsv(retval, totpad);
                sv_catsv(retval, opad);
                SvREFCNT_dec(opad);
@@ -419,13 +597,14 @@ DD_dump(pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, HV *seenhv,
        }
        else if (realtype == SVt_PVHV) {
            SV *totpad, *newapad;
-           SV *iname, *sname;
+           SV *sname;
            HE *entry;
            char *key;
            I32 klen;
            SV *hval;
-           
-           iname = newSVpvn(name, namelen);
+           AV *keys = NULL;
+       
+           SV * const iname = newSVpvn(name, namelen);
            if (name[0] == '%') {
                sv_catpvn(retval, "(", 1);
                (SvPVX(iname))[0] = '$';
@@ -452,43 +631,142 @@ DD_dump(pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, HV *seenhv,
            totpad = newSVsv(sep);
            sv_catsv(totpad, pad);
            sv_catsv(totpad, apad);
-           
-           (void)hv_iterinit((HV*)ival);
-           i = 0;
-           while ((entry = hv_iternext((HV*)ival)))  {
+       
+           /* If requested, get a sorted/filtered array of hash keys */
+           if (sortkeys) {
+               if (sortkeys == &PL_sv_yes) {
+#if PERL_VERSION < 8
+                    sortkeys = sv_2mortal(newSVpvn("Data::Dumper::_sortkeys", 23));
+#else
+                   keys = newAV();
+                   (void)hv_iterinit((HV*)ival);
+                   while ((entry = hv_iternext((HV*)ival))) {
+                       sv = hv_iterkeysv(entry);
+                       SvREFCNT_inc(sv);
+                       av_push(keys, sv);
+                   }
+# ifdef USE_LOCALE_NUMERIC
+                   sortsv(AvARRAY(keys), 
+                          av_len(keys)+1, 
+                          IN_LOCALE ? Perl_sv_cmp_locale : Perl_sv_cmp);
+# else
+                   sortsv(AvARRAY(keys), 
+                          av_len(keys)+1, 
+                          Perl_sv_cmp);
+# endif
+#endif
+               }
+               if (sortkeys != &PL_sv_yes) {
+                   dSP; ENTER; SAVETMPS; PUSHMARK(sp);
+                   XPUSHs(sv_2mortal(newRV_inc(ival))); PUTBACK;
+                   i = perl_call_sv(sortkeys, G_SCALAR | G_EVAL);
+                   SPAGAIN;
+                   if (i) {
+                       sv = POPs;
+                       if (SvROK(sv) && (SvTYPE(SvRV(sv)) == SVt_PVAV))
+                           keys = (AV*)SvREFCNT_inc(SvRV(sv));
+                   }
+                   if (! keys)
+                       warn("Sortkeys subroutine did not return ARRAYREF\n");
+                   PUTBACK; FREETMPS; LEAVE;
+               }
+               if (keys)
+                   sv_2mortal((SV*)keys);
+           }
+           else
+               (void)hv_iterinit((HV*)ival);
+
+            /* foreach (keys %hash) */
+            for (i = 0; 1; i++) {
                char *nkey;
+                char *nkey_buffer = NULL;
                I32 nticks = 0;
-               
+               SV* keysv;
+               STRLEN keylen;
+                I32 nlen;
+               bool do_utf8 = FALSE;
+
+               if (sortkeys) {
+                   if (!(keys && (I32)i <= av_len(keys))) break;
+               } else {
+                   if (!(entry = hv_iternext((HV *)ival))) break;
+               }
+
                if (i)
                    sv_catpvn(retval, ",", 1);
-               i++;
-               key = hv_iterkey(entry, &klen);
-               hval = hv_iterval((HV*)ival, entry);
-
-               if (quotekeys || needs_quote(key)) {
-                   nticks = num_q(key, klen);
-                   New(0, nkey, klen+nticks+3, char);
-                   nkey[0] = '\'';
-                   if (nticks)
-                       klen += esc_q(nkey+1, key, klen);
-                   else
-                       (void)Copy(key, nkey+1, klen, char);
-                   nkey[++klen] = '\'';
-                   nkey[++klen] = '\0';
+
+               if (sortkeys) {
+                   char *key;
+                   svp = av_fetch(keys, i, FALSE);
+                   keysv = svp ? *svp : sv_mortalcopy(&PL_sv_undef);
+                   key = SvPV(keysv, keylen);
+                   svp = hv_fetch((HV*)ival, key,
+                                   SvUTF8(keysv) ? -(I32)keylen : keylen, 0);
+                   hval = svp ? *svp : sv_mortalcopy(&PL_sv_undef);
                }
                else {
-                   New(0, nkey, klen, char);
-                   (void)Copy(key, nkey, klen, char);
+                   keysv = hv_iterkeysv(entry);
+                   hval = hv_iterval((HV*)ival, entry);
                }
-               
-               sname = newSVsv(iname);
-               sv_catpvn(sname, nkey, klen);
-               sv_catpvn(sname, "}", 1);
 
-               sv_catsv(retval, totpad);
-               sv_catsv(retval, ipad);
-               sv_catpvn(retval, nkey, klen);
-               sv_catpvn(retval, " => ", 4);
+               key = SvPV(keysv, keylen);
+               do_utf8 = DO_UTF8(keysv);
+               klen = keylen;
+
+                sv_catsv(retval, totpad);
+                sv_catsv(retval, ipad);
+                /* old logic was first to check utf8 flag, and if utf8 always
+                   call esc_q_utf8.  This caused test to break under -Mutf8,
+                   because there even strings like 'c' have utf8 flag on.
+                   Hence with quotekeys == 0 the XS code would still '' quote
+                   them based on flags, whereas the perl code would not,
+                   based on regexps.
+                   The perl code is correct.
+                   needs_quote() decides that anything that isn't a valid
+                   perl identifier needs to be quoted, hence only correctly
+                   formed strings with no characters outside [A-Za-z0-9_:]
+                   won't need quoting.  None of those characters are used in
+                   the byte encoding of utf8, so anything with utf8
+                   encoded characters in will need quoting. Hence strings
+                   with utf8 encoded characters in will end up inside do_utf8
+                   just like before, but now strings with utf8 flag set but
+                   only ascii characters will end up in the unquoted section.
+
+                   There should also be less tests for the (probably currently)
+                   more common doesn't need quoting case.
+                   The code is also smaller (22044 vs 22260) because I've been
+                   able to pull the common logic out to both sides.  */
+                if (quotekeys || needs_quote(key)) {
+                    if (do_utf8) {
+                        STRLEN ocur = SvCUR(retval);
+                        nlen = esc_q_utf8(aTHX_ retval, key, klen);
+                        nkey = SvPVX(retval) + ocur;
+                    }
+                    else {
+                       nticks = num_q(key, klen);
+                       New(0, nkey_buffer, klen+nticks+3, char);
+                        nkey = nkey_buffer;
+                       nkey[0] = '\'';
+                       if (nticks)
+                           klen += esc_q(nkey+1, key, klen);
+                       else
+                           (void)Copy(key, nkey+1, klen, char);
+                       nkey[++klen] = '\'';
+                       nkey[++klen] = '\0';
+                        nlen = klen;
+                        sv_catpvn(retval, nkey, klen);
+                   }
+                }
+                else {
+                    nkey = key;
+                    nlen = klen;
+                    sv_catpvn(retval, nkey, klen);
+               }
+                sname = newSVsv(iname);
+                sv_catpvn(sname, nkey, nlen);
+                sv_catpvn(sname, "}", 1);
+
+               sv_catsv(retval, pair);
                if (indent >= 2) {
                    char *extra;
                    I32 elen = 0;
@@ -503,17 +781,17 @@ DD_dump(pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, HV *seenhv,
                else
                    newapad = apad;
 
-               DD_dump(aTHX_ hval, SvPVX(sname), SvCUR(sname), retval, seenhv,
-                       postav, levelp, indent, pad, xpad, newapad, sep,
+               DD_dump(aTHX_ hval, SvPVX_const(sname), SvCUR(sname), retval, seenhv,
+                       postav, levelp, indent, pad, xpad, newapad, sep, pair,
                        freezer, toaster, purity, deepcopy, quotekeys, bless,
-                       maxdepth);
+                       maxdepth, sortkeys);
                SvREFCNT_dec(sname);
-               Safefree(nkey);
+               Safefree(nkey_buffer);
                if (indent >= 2)
                    SvREFCNT_dec(newapad);
            }
            if (i) {
-               SV *opad = sv_x(aTHX_ Nullsv, SvPVX(xpad), SvCUR(xpad), *levelp-1);
+               SV *opad = sv_x(aTHX_ Nullsv, SvPVX_const(xpad), SvCUR(xpad), *levelp-1);
                sv_catsv(retval, totpad);
                sv_catsv(retval, opad);
                SvREFCNT_dec(opad);
@@ -534,13 +812,33 @@ DD_dump(pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, HV *seenhv,
            warn("cannot handle ref type %ld", realtype);
        }
 
-       if (realpack) {  /* free blessed allocs */
+       if (realpack && !no_bless) {  /* free blessed allocs */
+           I32 plen;
+           I32 pticks;
+
            if (indent >= 2) {
                SvREFCNT_dec(apad);
                apad = blesspad;
            }
            sv_catpvn(retval, ", '", 3);
-           sv_catpvn(retval, realpack, strlen(realpack));
+
+           plen = strlen(realpack);
+           pticks = num_q(realpack, plen);
+           if (pticks) { /* needs escaping */
+               char *npack;
+               char *npack_buffer = NULL;
+
+               New(0, npack_buffer, plen+pticks+1, char);
+               npack = npack_buffer;
+               plen += esc_q(npack, realpack, plen);
+               npack[plen] = '\0';
+
+               sv_catpvn(retval, npack, plen);
+               Safefree(npack_buffer);
+           }
+           else {
+               sv_catpvn(retval, realpack, strlen(realpack));
+           }
            sv_catpvn(retval, "' )", 3);
            if (toaster && SvPOK(toaster) && SvCUR(toaster)) {
                sv_catpvn(retval, "->", 2);
@@ -555,8 +853,13 @@ DD_dump(pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, HV *seenhv,
        STRLEN i;
        
        if (namelen) {
-           (void) sprintf(id, "0x%lx", (unsigned long)val);
-           if ((svp = hv_fetch(seenhv, id, (idlen = strlen(id)), FALSE)) &&
+#ifdef DD_USE_OLD_ID_FORMAT
+           idlen = my_snprintf(id, sizeof(id), "0x%"UVxf, PTR2UV(val));
+#else
+           id_buffer = PTR2UV(val);
+           idlen = sizeof(id_buffer);
+#endif
+           if ((svp = hv_fetch(seenhv, id, idlen, FALSE)) &&
                (sv = *svp) && SvROK(sv) &&
                (seenentry = (AV*)SvRV(sv)))
            {
@@ -570,23 +873,38 @@ DD_dump(pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, HV *seenhv,
                    return 1;
                }
            }
-           else {
-               SV *namesv;
-               namesv = newSVpvn("\\", 1);
+           else if (val != &PL_sv_undef) {
+               SV * const namesv = newSVpvn("\\", 1);
                sv_catpvn(namesv, name, namelen);
                seenentry = newAV();
                av_push(seenentry, namesv);
-               av_push(seenentry, newRV(val));
-               (void)hv_store(seenhv, id, strlen(id), newRV((SV*)seenentry), 0);
+               av_push(seenentry, newRV_inc(val));
+               (void)hv_store(seenhv, id, idlen, newRV_inc((SV*)seenentry), 0);
                SvREFCNT_dec(seenentry);
            }
        }
 
-       if (SvIOK(val)) {
+        if (DD_is_integer(val)) {
             STRLEN len;
-            (void) sprintf(tmpbuf, "%"IVdf, SvIV(val));
-            len = strlen(tmpbuf);
-           sv_catpvn(retval, tmpbuf, len);
+           if (SvIsUV(val))
+             len = my_snprintf(tmpbuf, sizeof(tmpbuf), "%"UVuf, SvUV(val));
+           else
+             len = my_snprintf(tmpbuf, sizeof(tmpbuf), "%"IVdf, SvIV(val));
+            if (SvPOK(val)) {
+              /* Need to check to see if this is a string such as " 0".
+                 I'm assuming from sprintf isn't going to clash with utf8.
+                 Is this valid on EBCDIC?  */
+              STRLEN pvlen;
+              const char * const pv = SvPV(val, pvlen);
+              if (pvlen != len || memNE(pv, tmpbuf, len))
+                goto integer_came_from_string;
+            }
+            if (len > 10) {
+              /* Looks like we're on a 64 bit system.  Make it a string so that
+                 if a 32 bit system reads the number it will cope better.  */
+              sv_catpvf(retval, "'%s'", tmpbuf);
+            } else
+              sv_catpvn(retval, tmpbuf, len);
        }
        else if (realtype == SVt_PVGV) {/* GLOBs can end up with scribbly names */
            c = SvPV(val, i);
@@ -613,12 +931,12 @@ DD_dump(pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, HV *seenhv,
            SvCUR_set(retval, SvCUR(retval)+i);
 
            if (purity) {
-               static char *entries[] = { "{SCALAR}", "{ARRAY}", "{HASH}" };
-               static STRLEN sizes[] = { 8, 7, 6 };
+               static const char* const entries[] = { "{SCALAR}", "{ARRAY}", "{HASH}" };
+               static const STRLEN sizes[] = { 8, 7, 6 };
                SV *e;
-               SV *nname = newSVpvn("", 0);
-               SV *newapad = newSVpvn("", 0);
-               GV *gv = (GV*)val;
+               SV * const nname = newSVpvn("", 0);
+               SV * const newapad = newSVpvn("", 0);
+               GV * const gv = (GV*)val;
                I32 j;
                
                for (j=0; j<3; j++) {
@@ -636,16 +954,17 @@ DD_dump(pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, HV *seenhv,
                        sv_catpvn(nname, entries[j], sizes[j]);
                        sv_catpvn(postentry, " = ", 3);
                        av_push(postav, postentry);
-                       e = newRV(e);
+                       e = newRV_inc(e);
                        
-                       SvCUR(newapad) = 0;
+                       SvCUR_set(newapad, 0);
                        if (indent >= 2)
                            (void)sv_x(aTHX_ newapad, " ", 1, SvCUR(postentry));
                        
-                       DD_dump(aTHX_ e, SvPVX(nname), SvCUR(nname), postentry,
+                       DD_dump(aTHX_ e, SvPVX_const(nname), SvCUR(nname), postentry,
                                seenhv, postav, &nlevel, indent, pad, xpad,
-                               newapad, sep, freezer, toaster, purity,
-                               deepcopy, quotekeys, bless, maxdepth);
+                               newapad, sep, pair, freezer, toaster, purity,
+                               deepcopy, quotekeys, bless, maxdepth, 
+                               sortkeys);
                        SvREFCNT_dec(e);
                    }
                }
@@ -658,15 +977,20 @@ DD_dump(pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, HV *seenhv,
            sv_catpvn(retval, "undef", 5);
        }
        else {
+        integer_came_from_string:
            c = SvPV(val, i);
-           sv_grow(retval, SvCUR(retval)+3+2*i);
-           r = SvPVX(retval)+SvCUR(retval);
-           r[0] = '\'';
-           i += esc_q(r+1, c, i);
-           ++i;
-           r[i++] = '\'';
-           r[i] = '\0';
-           SvCUR_set(retval, SvCUR(retval)+i);
+           if (DO_UTF8(val))
+               i += esc_q_utf8(aTHX_ retval, c, i);
+           else {
+               sv_grow(retval, SvCUR(retval)+3+2*i); /* 3: ""\0 */
+               r = SvPVX(retval) + SvCUR(retval);
+               r[0] = '\'';
+               i += esc_q(r+1, c, i);
+               ++i;
+               r[i++] = '\'';
+               r[i] = '\0';
+               SvCUR_set(retval, SvCUR(retval)+i);
+           }
        }
     }
 
@@ -698,13 +1022,13 @@ Data_Dumper_Dumpxs(href, ...)
        {
            HV *hv;
            SV *retval, *valstr;
-           HV *seenhv = Nullhv;
+           HV *seenhv = NULL;
            AV *postav, *todumpav, *namesav;
            I32 level = 0;
-           I32 indent, terse, useqq, i, imax, postlen;
+           I32 indent, terse, i, imax, postlen;
            SV **svp;
-           SV *val, *name, *pad, *xpad, *apad, *sep, *tmp, *varname;
-           SV *freezer, *toaster, *bless;
+           SV *val, *name, *pad, *xpad, *apad, *sep, *pair, *varname;
+           SV *freezer, *toaster, *bless, *sortkeys;
            I32 purity, deepcopy, quotekeys, maxdepth = 0;
            char tmpbuf[1024];
            I32 gimme = GIMME;
@@ -734,15 +1058,15 @@ Data_Dumper_Dumpxs(href, ...)
                    (void)sv_2mortal(href);
            }
 
-           todumpav = namesav = Nullav;
-           seenhv = Nullhv;
-           val = pad = xpad = apad = sep = tmp = varname
-               = freezer = toaster = bless = &PL_sv_undef;
+           todumpav = namesav = NULL;
+           seenhv = NULL;
+           val = pad = xpad = apad = sep = pair = varname
+               = freezer = toaster = bless = sortkeys = &PL_sv_undef;
            name = sv_newmortal();
            indent = 2;
-           terse = useqq = purity = deepcopy = 0;
+           terse = purity = deepcopy = 0;
            quotekeys = 1;
-           
+       
            retval = newSVpvn("", 0);
            if (SvROK(href)
                && (hv = (HV*)SvRV((SV*)href))
@@ -760,8 +1084,10 @@ Data_Dumper_Dumpxs(href, ...)
                    purity = SvIV(*svp);
                if ((svp = hv_fetch(hv, "terse", 5, FALSE)))
                    terse = SvTRUE(*svp);
+#if 0 /* useqq currently unused */
                if ((svp = hv_fetch(hv, "useqq", 5, FALSE)))
                    useqq = SvTRUE(*svp);
+#endif
                if ((svp = hv_fetch(hv, "pad", 3, FALSE)))
                    pad = *svp;
                if ((svp = hv_fetch(hv, "xpad", 4, FALSE)))
@@ -770,6 +1096,8 @@ Data_Dumper_Dumpxs(href, ...)
                    apad = *svp;
                if ((svp = hv_fetch(hv, "sep", 3, FALSE)))
                    sep = *svp;
+               if ((svp = hv_fetch(hv, "pair", 4, FALSE)))
+                   pair = *svp;
                if ((svp = hv_fetch(hv, "varname", 7, FALSE)))
                    varname = *svp;
                if ((svp = hv_fetch(hv, "freezer", 7, FALSE)))
@@ -784,6 +1112,17 @@ Data_Dumper_Dumpxs(href, ...)
                    bless = *svp;
                if ((svp = hv_fetch(hv, "maxdepth", 8, FALSE)))
                    maxdepth = SvIV(*svp);
+               if ((svp = hv_fetch(hv, "sortkeys", 8, FALSE))) {
+                   sortkeys = *svp;
+                   if (! SvTRUE(sortkeys))
+                       sortkeys = NULL;
+                   else if (! (SvROK(sortkeys) &&
+                               SvTYPE(SvRV(sortkeys)) == SVt_PVCV) )
+                   {
+                       /* flag to use qsortsv() for sorting hash keys */       
+                       sortkeys = &PL_sv_yes; 
+                   }
+               }
                postav = newAV();
 
                if (todumpav)
@@ -793,19 +1132,22 @@ Data_Dumper_Dumpxs(href, ...)
                valstr = newSVpvn("",0);
                for (i = 0; i <= imax; ++i) {
                    SV *newapad;
-                   
+               
                    av_clear(postav);
                    if ((svp = av_fetch(todumpav, i, FALSE)))
                        val = *svp;
                    else
                        val = &PL_sv_undef;
-                   if ((svp = av_fetch(namesav, i, TRUE)))
+                   if ((svp = av_fetch(namesav, i, TRUE))) {
                        sv_setsv(name, *svp);
+                       if (SvOK(*svp) && !SvPOK(*svp))
+                           (void)SvPV_nolen_const(name);
+                   }
                    else
-                       SvOK_off(name);
-                   
-                   if (SvOK(name)) {
-                       if ((SvPVX(name))[0] == '*') {
+                       (void)SvOK_off(name);
+               
+                   if (SvPOK(name)) {
+                       if ((SvPVX_const(name))[0] == '*') {
                            if (SvROK(val)) {
                                switch (SvTYPE(SvRV(val))) {
                                case SVt_PVAV:
@@ -825,39 +1167,38 @@ Data_Dumper_Dumpxs(href, ...)
                            else
                                (SvPVX(name))[0] = '$';
                        }
-                       else if ((SvPVX(name))[0] != '$')
+                       else if ((SvPVX_const(name))[0] != '$')
                            sv_insert(name, 0, 0, "$", 1);
                    }
                    else {
-                       STRLEN nchars = 0;
+                       STRLEN nchars;
                        sv_setpvn(name, "$", 1);
                        sv_catsv(name, varname);
-                       (void) sprintf(tmpbuf, "%"IVdf, (IV)(i+1));
-                       nchars = strlen(tmpbuf);
+                       nchars = my_snprintf(tmpbuf, sizeof(tmpbuf), "%"IVdf, (IV)(i+1));
                        sv_catpvn(name, tmpbuf, nchars);
                    }
-                   
+               
                    if (indent >= 2) {
-                       SV *tmpsv = sv_x(aTHX_ Nullsv, " ", 1, SvCUR(name)+3);
+                       SV * const tmpsv = sv_x(aTHX_ NULL, " ", 1, SvCUR(name)+3);
                        newapad = newSVsv(apad);
                        sv_catsv(newapad, tmpsv);
                        SvREFCNT_dec(tmpsv);
                    }
                    else
                        newapad = apad;
-                   
-                   DD_dump(aTHX_ val, SvPVX(name), SvCUR(name), valstr, seenhv,
-                           postav, &level, indent, pad, xpad, newapad, sep,
+               
+                   DD_dump(aTHX_ val, SvPVX_const(name), SvCUR(name), valstr, seenhv,
+                           postav, &level, indent, pad, xpad, newapad, sep, pair,
                            freezer, toaster, purity, deepcopy, quotekeys,
-                           bless, maxdepth);
-                   
+                           bless, maxdepth, sortkeys);
+               
                    if (indent >= 2)
                        SvREFCNT_dec(newapad);
 
                    postlen = av_len(postav);
                    if (postlen >= 0 || !terse) {
                        sv_insert(valstr, 0, 0, " = ", 3);
-                       sv_insert(valstr, 0, 0, SvPVX(name), SvCUR(name));
+                       sv_insert(valstr, 0, 0, SvPVX_const(name), SvCUR(name));
                        sv_catpvn(valstr, ";", 1);
                    }
                    sv_catsv(retval, pad);