This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
gcc thinks it can see how 'entry' can be accessed uninitialized.
[perl5.git] / dist / Data-Dumper / Dumper.xs
index 30a9b40..372c073 100644 (file)
 #  define DD_USE_OLD_ID_FORMAT
 #endif
 
+#ifndef isWORDCHAR
+#   define isWORDCHAR(c) isALNUM(c)
+#endif
+
 static I32 num_q (const char *s, STRLEN slen);
 static I32 esc_q (char *dest, const char *src, STRLEN slen);
-static I32 esc_q_utf8 (pTHX_ SV *sv, const char *src, STRLEN slen);
-static I32 needs_quote(register const char *s, STRLEN len);
+static I32 esc_q_utf8 (pTHX_ SV *sv, const char *src, STRLEN slen, I32 do_utf8, I32 useqq);
+static bool globname_needs_quote(const char *s, STRLEN len);
+static bool key_needs_quote(const char *s, STRLEN len);
+static bool safe_decimal_number(const char *p, STRLEN len);
 static SV *sv_x (pTHX_ SV *sv, const char *str, STRLEN len, I32 n);
 static I32 DD_dump (pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval,
                    HV *seenhv, AV *postav, I32 *levelp, I32 indent,
                    SV *pad, SV *xpad, SV *apad, SV *sep, SV *pair,
                    SV *freezer, SV *toaster,
                    I32 purity, I32 deepcopy, I32 quotekeys, SV *bless,
-                   I32 maxdepth, SV *sortkeys);
+                   I32 maxdepth, SV *sortkeys, int use_sparse_seen_hash, I32 useqq, IV maxrecurse);
 
 #ifndef HvNAME_get
 #define HvNAME_get HvNAME
 #endif
 
+/* Perls 7 through portions of 15 used utf8_to_uvchr() which didn't have a
+ * length parameter.  This wrongly allowed reading beyond the end of buffer
+ * given malformed input */
+
 #if PERL_VERSION <= 6 /* Perl 5.6 and earlier */
 
 # ifdef EBCDIC
@@ -37,21 +47,43 @@ static I32 DD_dump (pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval,
 # endif
 
 UV
-Perl_utf8_to_uvchr(pTHX_ U8 *s, STRLEN *retlen)
+Perl_utf8_to_uvchr_buf(pTHX_ U8 *s, U8 *send, STRLEN *retlen)
 {
-    const UV uv = utf8_to_uv(s, UTF8_MAXLEN, retlen,
+    const UV uv = utf8_to_uv(s, send - s, retlen,
                     ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
     return UNI_TO_NATIVE(uv);
 }
 
 # if !defined(PERL_IMPLICIT_CONTEXT)
-#  define utf8_to_uvchr             Perl_utf8_to_uvchr
+#  define utf8_to_uvchr_buf         Perl_utf8_to_uvchr_buf
 # else
-#  define utf8_to_uvchr(a,b) Perl_utf8_to_uvchr(aTHX_ a,b)
+#  define utf8_to_uvchr_buf(a,b,c) Perl_utf8_to_uvchr_buf(aTHX_ a,b,c)
 # endif
 
 #endif /* PERL_VERSION <= 6 */
 
+/* Perl 5.7 through part of 5.15 */
+#if PERL_VERSION > 6 && PERL_VERSION <= 15 && ! defined(utf8_to_uvchr_buf)
+
+UV
+Perl_utf8_to_uvchr_buf(pTHX_ U8 *s, U8 *send, STRLEN *retlen)
+{
+    /* We have to discard <send> for these versions; hence can read off the
+     * end of the buffer if there is a malformation that indicates the
+     * character is longer than the space available */
+
+    const UV uv = utf8_to_uvchr(s, retlen);
+    return UNI_TO_NATIVE(uv);
+}
+
+# if !defined(PERL_IMPLICIT_CONTEXT)
+#  define utf8_to_uvchr_buf         Perl_utf8_to_uvchr_buf
+# else
+#  define utf8_to_uvchr_buf(a,b,c) Perl_utf8_to_uvchr_buf(aTHX_ a,b,c)
+# endif
+
+#endif /* PERL_VERSION > 6 && <= 15 */
+
 /* Changes in 5.7 series mean that now IOK is only set if scalar is
    precisely integer but in 5.6 and earlier we need to do a more
    complex test  */
@@ -61,39 +93,95 @@ Perl_utf8_to_uvchr(pTHX_ U8 *s, STRLEN *retlen)
 #define DD_is_integer(sv) SvIOK(sv)
 #endif
 
-/* does a string need to be protected? */
-static I32
-needs_quote(register const char *s, STRLEN len)
+/* does a glob name need to be protected? */
+static bool
+globname_needs_quote(const char *s, STRLEN len)
 {
     const char *send = s+len;
 TOP:
     if (s[0] == ':') {
        if (++s<send) {
            if (*s++ != ':')
-               return 1;
+                return TRUE;
        }
        else
-           return 1;
+           return TRUE;
     }
     if (isIDFIRST(*s)) {
        while (++s<send)
-           if (!isALNUM(*s)) {
+           if (!isWORDCHAR(*s)) {
                if (*s == ':')
                    goto TOP;
                else
-                   return 1;
+                    return TRUE;
            }
     }
     else
-       return 1;
-    return 0;
+        return TRUE;
+
+    return FALSE;
+}
+
+/* does a hash key need to be quoted (to the left of => ).
+   Previously this used (globname_)needs_quote() which accepted strings
+   like '::foo', but these aren't safe as unquoted keys under strict.
+*/
+static bool
+key_needs_quote(const char *s, STRLEN len) {
+    const char *send = s+len;
+
+    if (safe_decimal_number(s, len)) {
+        return FALSE;
+    }
+    else if (isIDFIRST(*s)) {
+        while (++s<send)
+            if (!isWORDCHAR(*s))
+                return TRUE;
+    }
+    else
+        return TRUE;
+
+    return FALSE;
+}
+
+/* Check that the SV can be represented as a simple decimal integer.
+ *
+ * The perl code does this by matching against /^(?:0|-?[1-9]\d{0,8})\z/
+*/
+static bool
+safe_decimal_number(const char *p, STRLEN len) {
+    if (len == 1 && *p == '0')
+        return TRUE;
+
+    if (len && *p == '-') {
+        ++p;
+        --len;
+    }
+
+    if (len == 0 || *p < '1' || *p > '9')
+        return FALSE;
+
+    ++p;
+    --len;
+
+    if (len > 8)
+        return FALSE;
+
+    while (len > 0) {
+         /* the perl code checks /\d/ but we don't want unicode digits here */
+         if (*p < '0' || *p > '9')
+             return FALSE;
+         ++p;
+         --len;
+    }
+    return TRUE;
 }
 
 /* count the number of "'"s and "\"s in string */
 static I32
-num_q(register const char *s, register STRLEN slen)
+num_q(const char *s, STRLEN slen)
 {
-    register I32 ret = 0;
+    I32 ret = 0;
 
     while (slen > 0) {
        if (*s == '\'' || *s == '\\')
@@ -109,9 +197,9 @@ num_q(register const char *s, register STRLEN slen)
 /* slen number of characters in s will be escaped */
 /* destination must be long enough for additional chars */
 static I32
-esc_q(register char *d, register const char *s, register STRLEN slen)
+esc_q(char *d, const char *s, STRLEN slen)
 {
-    register I32 ret = 0;
+    I32 ret = 0;
 
     while (slen > 0) {
        switch (*s) {
@@ -119,6 +207,7 @@ esc_q(register char *d, register const char *s, register STRLEN slen)
        case '\\':
            *d = '\\';
            ++d; ++ret;
+            /* FALLTHROUGH */
        default:
            *d = *s;
            ++d; ++s; --slen;
@@ -128,8 +217,9 @@ esc_q(register char *d, register const char *s, register STRLEN slen)
     return ret;
 }
 
+/* this function is also misused for implementing $Useqq */
 static I32
-esc_q_utf8(pTHX_ SV* sv, register const char *src, register STRLEN slen)
+esc_q_utf8(pTHX_ SV* sv, const char *src, STRLEN slen, I32 do_utf8, I32 useqq)
 {
     char *r, *rstart;
     const char *s = src;
@@ -144,14 +234,21 @@ esc_q_utf8(pTHX_ SV* sv, register const char *src, register STRLEN slen)
     STRLEN qq_escapables = 0;  /* " $ @ will need a \ in "" strings.  */
     STRLEN normal = 0;
     int increment;
+    UV next;
 
     /* this will need EBCDICification */
-    for (s = src; s < send; s += increment) {
-        const UV k = utf8_to_uvchr((U8*)s, NULL);
+    for (s = src; s < send; do_utf8 ? s += increment : s++) {
+        const UV k = do_utf8 ? utf8_to_uvchr_buf((U8*)s, (U8*) send, NULL) : *(U8*)s;
 
         /* check for invalid utf8 */
         increment = (k == 0 && *s != '\0') ? 1 : UTF8SKIP(s);
 
+       /* this is only used to check if the next character is an
+        * ASCII digit, which are invariant, so if the following collects
+        * a UTF-8 start byte it does no harm
+        */
+       next = (s + increment >= send ) ? 0 : *(U8*)(s+increment);
+
 #ifdef EBCDIC
        if (!isprint(k) || k > 256) {
 #else
@@ -165,6 +262,17 @@ esc_q_utf8(pTHX_ SV* sv, register const char *src, register STRLEN slen)
                 k <= 0xFFFFFFFF ? 8 : UVSIZE * 4
 #endif
                 );
+#ifndef EBCDIC
+       } else if (useqq &&
+           /* we can't use the short form like '\0' if followed by a digit */
+                   (((k >= 7 && k <= 10) || k == 12 || k == 13 || k == 27)
+                 || (k < 8 && (next < '0' || next > '9')))) {
+           grow += 2;
+       } else if (useqq && k <= 31 && (next < '0' || next > '9')) {
+           grow += 3;
+       } else if (useqq && (k <= 31 || k >= 127)) {
+           grow += 4;
+#endif
         } else if (k == '\\') {
             backslashes++;
         } else if (k == '\'') {
@@ -175,7 +283,7 @@ esc_q_utf8(pTHX_ SV* sv, register const char *src, register STRLEN slen)
             normal++;
         }
     }
-    if (grow) {
+    if (grow || useqq) {
         /* We have something needing hex. 3 is ""\0 */
         sv_grow(sv, cur + 3 + grow + 2*backslashes + single_quotes
                + 2*qq_escapables + normal);
@@ -183,8 +291,8 @@ esc_q_utf8(pTHX_ SV* sv, register const char *src, register STRLEN slen)
 
         *r++ = '"';
 
-        for (s = src; s < send; s += UTF8SKIP(s)) {
-            const UV k = utf8_to_uvchr((U8*)s, NULL);
+        for (s = src; s < send; do_utf8 ? s += UTF8SKIP(s) : s++) {
+            const UV k = do_utf8 ? utf8_to_uvchr_buf((U8*)s, (U8*) send, NULL) : *(U8*)s;
 
             if (k == '"' || k == '\\' || k == '$' || k == '@') {
                 *r++ = '\\';
@@ -194,7 +302,44 @@ esc_q_utf8(pTHX_ SV* sv, register const char *src, register STRLEN slen)
 #ifdef EBCDIC
              if (isprint(k) && k < 256)
 #else
-             if (k < 0x80)
+             if (useqq && (k <= 31 || k == 127 || (!do_utf8 && k > 127))) {
+               bool next_is_digit;
+
+               *r++ = '\\';
+               switch (k) {
+               case 7:  *r++ = 'a'; break;
+               case 8:  *r++ = 'b'; break;
+               case 9:  *r++ = 't'; break;
+               case 10: *r++ = 'n'; break;
+               case 12: *r++ = 'f'; break;
+               case 13: *r++ = 'r'; break;
+               case 27: *r++ = 'e'; break;
+               default:
+                   increment = (k == 0 && *s != '\0') ? 1 : UTF8SKIP(s);
+
+                   /* only ASCII digits matter here, which are invariant,
+                    * since we only encode characters \377 and under, or
+                    * \x177 and under for a unicode string
+                    */
+                   next = (s+increment < send) ? *(U8*)(s+increment) : 0;
+                   next_is_digit = next >= '0' && next <= '9';
+
+                   /* faster than
+                    * r = r + my_sprintf(r, "%o", k);
+                    */
+                   if (k <= 7 && !next_is_digit) {
+                       *r++ = (char)k + '0';
+                   } else if (k <= 63 && !next_is_digit) {
+                       *r++ = (char)(k>>3) + '0';
+                       *r++ = (char)(k&7) + '0';
+                   } else {
+                       *r++ = (char)(k>>6) + '0';
+                       *r++ = (char)((k&63)>>3) + '0';
+                       *r++ = (char)(k&7) + '0';
+                   }
+               }
+           }
+           else if (k < 0x80)
 #endif
                 *r++ = (char)k;
             else {
@@ -234,7 +379,7 @@ static SV *
 sv_x(pTHX_ SV *sv, const char *str, STRLEN len, I32 n)
 {
     if (!sv)
-       sv = newSVpvn("", 0);
+       sv = newSVpvs("");
 #ifdef DEBUGGING
     else
        assert(SvTYPE(sv) >= SVt_PV);
@@ -267,10 +412,11 @@ static I32
 DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
        AV *postav, I32 *levelp, I32 indent, SV *pad, SV *xpad,
        SV *apad, SV *sep, SV *pair, SV *freezer, SV *toaster, I32 purity,
-       I32 deepcopy, I32 quotekeys, SV *bless, I32 maxdepth, SV *sortkeys)
+       I32 deepcopy, I32 quotekeys, SV *bless, I32 maxdepth, SV *sortkeys,
+        int use_sparse_seen_hash, I32 useqq, IV maxrecurse)
 {
     char tmpbuf[128];
-    U32 i;
+    Size_t i;
     char *c, *r, *realpack;
 #ifdef DD_USE_OLD_ID_FORMAT
     char id[128];
@@ -317,7 +463,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
        {
            dSP; ENTER; SAVETMPS; PUSHMARK(sp);
            XPUSHs(val); PUTBACK;
-           i = perl_call_method(SvPVX_const(freezer), G_EVAL|G_VOID);
+           i = perl_call_method(SvPVX_const(freezer), G_EVAL|G_VOID|G_DISCARD);
            SPAGAIN;
            if (SvTRUE(ERRSV))
                warn("WARNING(Freezer method call failed): %"SVf"", ERRSV);
@@ -352,13 +498,13 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
                        SV *postentry;
                        
                        if (realtype == SVt_PVHV)
-                           sv_catpvn(retval, "{}", 2);
+                           sv_catpvs(retval, "{}");
                        else if (realtype == SVt_PVAV)
-                           sv_catpvn(retval, "[]", 2);
+                           sv_catpvs(retval, "[]");
                        else
-                           sv_catpvn(retval, "do{my $o}", 9);
+                           sv_catpvs(retval, "do{my $o}");
                        postentry = newSVpvn(name, namelen);
-                       sv_catpvn(postentry, " = ", 3);
+                       sv_catpvs(postentry, " = ");
                        sv_catsv(postentry, othername);
                        av_push(postav, postentry);
                    }
@@ -371,9 +517,9 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
                            }
                            else {
                                sv_catpvn(retval, name, 1);
-                               sv_catpvn(retval, "{", 1);
+                               sv_catpvs(retval, "{");
                                sv_catsv(retval, othername);
-                               sv_catpvn(retval, "}", 1);
+                               sv_catpvs(retval, "}");
                            }
                        }
                        else
@@ -393,11 +539,11 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
            else {   /* store our name and continue */
                SV *namesv;
                if (name[0] == '@' || name[0] == '%') {
-                   namesv = newSVpvn("\\", 1);
+                   namesv = newSVpvs("\\");
                    sv_catpvn(namesv, name, namelen);
                }
                else if (realtype == SVt_PVCV && name[0] == '*') {
-                   namesv = newSVpvn("\\", 2);
+                   namesv = newSVpvs("\\");
                    sv_catpvn(namesv, name, namelen);
                    (SvPVX(namesv))[1] = '&';
                }
@@ -438,17 +584,21 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
        if (!purity && maxdepth > 0 && *levelp >= maxdepth) {
            STRLEN vallen;
            const char * const valstr = SvPV(val,vallen);
-           sv_catpvn(retval, "'", 1);
+           sv_catpvs(retval, "'");
            sv_catpvn(retval, valstr, vallen);
-           sv_catpvn(retval, "'", 1);
+           sv_catpvs(retval, "'");
            return 1;
        }
 
+       if (maxrecurse > 0 && *levelp >= maxrecurse) {
+           croak("Recursion limit of %" IVdf " exceeded", maxrecurse);
+       }
+
        if (realpack && !no_bless) {                            /* we have a blessed ref */
            STRLEN blesslen;
            const char * const blessstr = SvPV(bless, blesslen);
            sv_catpvn(retval, blessstr, blesslen);
-           sv_catpvn(retval, "( ", 2);
+           sv_catpvs(retval, "( ");
            if (indent >= 2) {
                blesspad = apad;
                apad = newSVsv(apad);
@@ -462,18 +612,58 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
         if (is_regex) 
         {
             STRLEN rlen;
-           const char *rval = SvPV(val, rlen);
-           const char *slash = strchr(rval, '/');
-           sv_catpvn(retval, "qr/", 3);
-           while (slash) {
+           SV *sv_pattern = NULL;
+           SV *sv_flags = NULL;
+           CV *re_pattern_cv;
+           const char *rval;
+           const char *rend;
+           const char *slash;
+
+           if ((re_pattern_cv = get_cv("re::regexp_pattern", 0))) {
+             dSP;
+             I32 count;
+             ENTER;
+             SAVETMPS;
+             PUSHMARK(SP);
+             XPUSHs(val);
+             PUTBACK;
+             count = call_sv((SV*)re_pattern_cv, G_ARRAY);
+             SPAGAIN;
+             if (count >= 2) {
+               sv_flags = POPs;
+               sv_pattern = POPs;
+               SvREFCNT_inc(sv_flags);
+               SvREFCNT_inc(sv_pattern);
+             }
+             PUTBACK;
+             FREETMPS;
+             LEAVE;
+             if (sv_pattern) {
+               sv_2mortal(sv_pattern);
+               sv_2mortal(sv_flags);
+             }
+           }
+           else {
+             sv_pattern = val;
+           }
+           assert(sv_pattern);
+           rval = SvPV(sv_pattern, rlen);
+           rend = rval+rlen;
+           slash = rval;
+           sv_catpvs(retval, "qr/");
+           for (;slash < rend; slash++) {
+             if (*slash == '\\') { ++slash; continue; }
+             if (*slash == '/') {    
                sv_catpvn(retval, rval, slash-rval);
-               sv_catpvn(retval, "\\/", 2);
+               sv_catpvs(retval, "\\/");
                rlen -= slash-rval+1;
                rval = slash+1;
-               slash = strchr(rval, '/');
+             }
            }
            sv_catpvn(retval, rval, rlen);
-           sv_catpvn(retval, "/", 1);
+           sv_catpvs(retval, "/");
+           if (sv_flags)
+             sv_catsv(retval, sv_flags);
        } 
         else if (
 #if PERL_VERSION < 9
@@ -482,41 +672,44 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
                realtype <= SVt_PVMG
 #endif
        ) {                          /* scalar ref */
-           SV * const namesv = newSVpvn("${", 2);
+           SV * const namesv = newSVpvs("${");
            sv_catpvn(namesv, name, namelen);
-           sv_catpvn(namesv, "}", 1);
+           sv_catpvs(namesv, "}");
            if (realpack) {                                  /* blessed */
-               sv_catpvn(retval, "do{\\(my $o = ", 13);
+               sv_catpvs(retval, "do{\\(my $o = ");
                DD_dump(aTHX_ ival, SvPVX_const(namesv), SvCUR(namesv), retval, seenhv,
                        postav, levelp, indent, pad, xpad, apad, sep, pair,
                        freezer, toaster, purity, deepcopy, quotekeys, bless,
-                       maxdepth, sortkeys);
-               sv_catpvn(retval, ")}", 2);
+                       maxdepth, sortkeys, use_sparse_seen_hash, useqq,
+                       maxrecurse);
+               sv_catpvs(retval, ")}");
            }                                                /* plain */
            else {
-               sv_catpvn(retval, "\\", 1);
+               sv_catpvs(retval, "\\");
                DD_dump(aTHX_ ival, SvPVX_const(namesv), SvCUR(namesv), retval, seenhv,
                        postav, levelp, indent, pad, xpad, apad, sep, pair,
                        freezer, toaster, purity, deepcopy, quotekeys, bless,
-                       maxdepth, sortkeys);
+                       maxdepth, sortkeys, use_sparse_seen_hash, useqq,
+                       maxrecurse);
            }
            SvREFCNT_dec(namesv);
        }
        else if (realtype == SVt_PVGV) {                     /* glob ref */
-           SV * const namesv = newSVpvn("*{", 2);
+           SV * const namesv = newSVpvs("*{");
            sv_catpvn(namesv, name, namelen);
-           sv_catpvn(namesv, "}", 1);
-           sv_catpvn(retval, "\\", 1);
+           sv_catpvs(namesv, "}");
+           sv_catpvs(retval, "\\");
            DD_dump(aTHX_ ival, SvPVX_const(namesv), SvCUR(namesv), retval, seenhv,
                    postav, levelp,     indent, pad, xpad, apad, sep, pair,
                    freezer, toaster, purity, deepcopy, quotekeys, bless,
-                   maxdepth, sortkeys);
+                   maxdepth, sortkeys, use_sparse_seen_hash, useqq,
+                   maxrecurse);
            SvREFCNT_dec(namesv);
        }
        else if (realtype == SVt_PVAV) {
            SV *totpad;
-           I32 ix = 0;
-           const I32 ixmax = av_len((AV *)ival);
+           SSize_t ix = 0;
+           const SSize_t ixmax = av_len((AV *)ival);
        
            SV * const ixsv = newSViv(0);
            /* allowing for a 24 char wide array index */
@@ -524,11 +717,11 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
            (void)strcpy(iname, name);
            inamelen = namelen;
            if (name[0] == '@') {
-               sv_catpvn(retval, "(", 1);
+               sv_catpvs(retval, "(");
                iname[0] = '$';
            }
            else {
-               sv_catpvn(retval, "[", 1);
+               sv_catpvs(retval, "[");
                /* omit "->" in $foo{bar}->[0], but not in ${$foo}->[0] */
                /*if (namelen > 0
                    && name[namelen-1] != ']' && name[namelen-1] != '}'
@@ -575,7 +768,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
                if (indent >= 3) {
                    sv_catsv(retval, totpad);
                    sv_catsv(retval, ipad);
-                   sv_catpvn(retval, "#", 1);
+                   sv_catpvs(retval, "#");
                    sv_catsv(retval, ixsv);
                }
                sv_catsv(retval, totpad);
@@ -583,9 +776,10 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
                DD_dump(aTHX_ elem, iname, ilen, retval, seenhv, postav,
                        levelp, indent, pad, xpad, apad, sep, pair,
                        freezer, toaster, purity, deepcopy, quotekeys, bless,
-                       maxdepth, sortkeys);
+                       maxdepth, sortkeys, use_sparse_seen_hash,
+                       useqq, maxrecurse);
                if (ix < ixmax)
-                   sv_catpvn(retval, ",", 1);
+                   sv_catpvs(retval, ",");
            }
            if (ixmax >= 0) {
                SV * const opad = sv_x(aTHX_ Nullsv, SvPVX_const(xpad), SvCUR(xpad), (*levelp)-1);
@@ -594,9 +788,9 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
                SvREFCNT_dec(opad);
            }
            if (name[0] == '@')
-               sv_catpvn(retval, ")", 1);
+               sv_catpvs(retval, ")");
            else
-               sv_catpvn(retval, "]", 1);
+               sv_catpvs(retval, "]");
            SvREFCNT_dec(ixsv);
            SvREFCNT_dec(totpad);
            Safefree(iname);
@@ -604,7 +798,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
        else if (realtype == SVt_PVHV) {
            SV *totpad, *newapad;
            SV *sname;
-           HE *entry;
+           HE *entry = NULL;
            char *key;
            I32 klen;
            SV *hval;
@@ -612,11 +806,11 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
        
            SV * const iname = newSVpvn(name, namelen);
            if (name[0] == '%') {
-               sv_catpvn(retval, "(", 1);
+               sv_catpvs(retval, "(");
                (SvPVX(iname))[0] = '$';
            }
            else {
-               sv_catpvn(retval, "{", 1);
+               sv_catpvs(retval, "{");
                /* omit "->" in $foo[0]->{bar}, but not in ${$foo}->{bar} */
                if ((namelen > 0
                     && name[namelen-1] != ']' && name[namelen-1] != '}')
@@ -624,16 +818,16 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
                        && (name[1] == '{'
                            || (name[0] == '\\' && name[2] == '{'))))
                {
-                   sv_catpvn(iname, "->", 2);
+                   sv_catpvs(iname, "->");
                }
            }
            if (name[0] == '*' && name[namelen-1] == '}' && namelen >= 8 &&
                (instr(name+namelen-8, "{SCALAR}") ||
                 instr(name+namelen-7, "{ARRAY}") ||
                 instr(name+namelen-6, "{HASH}"))) {
-               sv_catpvn(iname, "->", 2);
+               sv_catpvs(iname, "->");
            }
-           sv_catpvn(iname, "{", 1);
+           sv_catpvs(iname, "{");
            totpad = newSVsv(sep);
            sv_catsv(totpad, pad);
            sv_catsv(totpad, apad);
@@ -642,25 +836,34 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
            if (sortkeys) {
                if (sortkeys == &PL_sv_yes) {
 #if PERL_VERSION < 8
-                    sortkeys = sv_2mortal(newSVpvn("Data::Dumper::_sortkeys", 23));
+                    sortkeys = sv_2mortal(newSVpvs("Data::Dumper::_sortkeys"));
 #else
                    keys = newAV();
                    (void)hv_iterinit((HV*)ival);
                    while ((entry = hv_iternext((HV*)ival))) {
                        sv = hv_iterkeysv(entry);
-                       SvREFCNT_inc(sv);
+                       (void)SvREFCNT_inc(sv);
                        av_push(keys, sv);
                    }
-# ifdef USE_LOCALE_NUMERIC
-                   sortsv(AvARRAY(keys), 
-                          av_len(keys)+1, 
-                          IN_LOCALE ? Perl_sv_cmp_locale : Perl_sv_cmp);
-# else
-                   sortsv(AvARRAY(keys), 
-                          av_len(keys)+1, 
-                          Perl_sv_cmp);
+# ifdef USE_LOCALE_COLLATE
+#       ifdef IN_LC     /* Use this if available */
+                    if (IN_LC(LC_COLLATE))
+#       else
+                    if (IN_LOCALE)
+#       endif
+                    {
+                        sortsv(AvARRAY(keys),
+                          av_len(keys)+1,
+                           Perl_sv_cmp_locale);
+                    }
+                    else
 # endif
 #endif
+                    {
+                        sortsv(AvARRAY(keys),
+                          av_len(keys)+1,
+                           Perl_sv_cmp);
+                    }
                }
                if (sortkeys != &PL_sv_yes) {
                    dSP; ENTER; SAVETMPS; PUSHMARK(sp);
@@ -693,13 +896,13 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
                bool do_utf8 = FALSE;
 
                if (sortkeys) {
-                   if (!(keys && (I32)i <= av_len(keys))) break;
+                   if (!(keys && (SSize_t)i <= av_len(keys))) break;
                } else {
                    if (!(entry = hv_iternext((HV *)ival))) break;
                }
 
                if (i)
-                   sv_catpvn(retval, ",", 1);
+                   sv_catpvs(retval, ",");
 
                if (sortkeys) {
                    char *key;
@@ -721,31 +924,27 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
 
                 sv_catsv(retval, totpad);
                 sv_catsv(retval, ipad);
-                /* old logic was first to check utf8 flag, and if utf8 always
+                /* The (very)
+                   old logic was first to check utf8 flag, and if utf8 always
                    call esc_q_utf8.  This caused test to break under -Mutf8,
                    because there even strings like 'c' have utf8 flag on.
                    Hence with quotekeys == 0 the XS code would still '' quote
                    them based on flags, whereas the perl code would not,
                    based on regexps.
-                   The perl code is correct.
-                   needs_quote() decides that anything that isn't a valid
-                   perl identifier needs to be quoted, hence only correctly
-                   formed strings with no characters outside [A-Za-z0-9_:]
-                   won't need quoting.  None of those characters are used in
-                   the byte encoding of utf8, so anything with utf8
-                   encoded characters in will need quoting. Hence strings
-                   with utf8 encoded characters in will end up inside do_utf8
-                   just like before, but now strings with utf8 flag set but
-                   only ascii characters will end up in the unquoted section.
-
-                   There should also be less tests for the (probably currently)
-                   more common doesn't need quoting case.
-                   The code is also smaller (22044 vs 22260) because I've been
-                   able to pull the common logic out to both sides.  */
-                if (quotekeys || needs_quote(key,keylen)) {
-                    if (do_utf8) {
+
+                   The old logic checked that the string was a valid
+                   perl glob name (foo::bar), which isn't safe under
+                   strict, and differs from the perl code which only
+                   accepts simple identifiers.
+
+                   With the fix for [perl #120384] I chose to make
+                   their handling of key quoting compatible between XS
+                   and perl.
+                 */
+                if (quotekeys || key_needs_quote(key,keylen)) {
+                    if (do_utf8 || useqq) {
                         STRLEN ocur = SvCUR(retval);
-                        nlen = esc_q_utf8(aTHX_ retval, key, klen);
+                        nlen = esc_q_utf8(aTHX_ retval, key, klen, do_utf8, useqq);
                         nkey = SvPVX(retval) + ocur;
                     }
                     else {
@@ -770,7 +969,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
                }
                 sname = newSVsv(iname);
                 sv_catpvn(sname, nkey, nlen);
-                sv_catpvn(sname, "}", 1);
+                sv_catpvs(sname, "}");
 
                sv_catsv(retval, pair);
                if (indent >= 2) {
@@ -790,7 +989,8 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
                DD_dump(aTHX_ hval, SvPVX_const(sname), SvCUR(sname), retval, seenhv,
                        postav, levelp, indent, pad, xpad, newapad, sep, pair,
                        freezer, toaster, purity, deepcopy, quotekeys, bless,
-                       maxdepth, sortkeys);
+                       maxdepth, sortkeys, use_sparse_seen_hash, useqq,
+                       maxrecurse);
                SvREFCNT_dec(sname);
                Safefree(nkey_buffer);
                if (indent >= 2)
@@ -803,14 +1003,14 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
                SvREFCNT_dec(opad);
            }
            if (name[0] == '%')
-               sv_catpvn(retval, ")", 1);
+               sv_catpvs(retval, ")");
            else
-               sv_catpvn(retval, "}", 1);
+               sv_catpvs(retval, "}");
            SvREFCNT_dec(iname);
            SvREFCNT_dec(totpad);
        }
        else if (realtype == SVt_PVCV) {
-           sv_catpvn(retval, "sub { \"DUMMY\" }", 15);
+           sv_catpvs(retval, "sub { \"DUMMY\" }");
            if (purity)
                warn("Encountered CODE ref, using dummy placeholder");
        }
@@ -826,7 +1026,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
                SvREFCNT_dec(apad);
                apad = blesspad;
            }
-           sv_catpvn(retval, ", '", 3);
+           sv_catpvs(retval, ", '");
 
            plen = strlen(realpack);
            pticks = num_q(realpack, plen);
@@ -845,11 +1045,11 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
            else {
                sv_catpvn(retval, realpack, strlen(realpack));
            }
-           sv_catpvn(retval, "' )", 3);
+           sv_catpvs(retval, "' )");
            if (toaster && SvPOK(toaster) && SvCUR(toaster)) {
-               sv_catpvn(retval, "->", 2);
+               sv_catpvs(retval, "->");
                sv_catsv(retval, toaster);
-               sv_catpvn(retval, "()", 2);
+               sv_catpvs(retval, "()");
            }
        }
        SvREFCNT_dec(ipad);
@@ -874,14 +1074,21 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
                if ((svp = av_fetch(seenentry, 0, FALSE)) && (othername = *svp)
                    && (svp = av_fetch(seenentry, 2, FALSE)) && *svp && SvIV(*svp) > 0)
                {
-                   sv_catpvn(retval, "${", 2);
+                   sv_catpvs(retval, "${");
                    sv_catsv(retval, othername);
-                   sv_catpvn(retval, "}", 1);
+                   sv_catpvs(retval, "}");
                    return 1;
                }
            }
-           else if (val != &PL_sv_undef) {
-               SV * const namesv = newSVpvn("\\", 1);
+            /* If we're allowed to keep only a sparse "seen" hash
+             * (IOW, the user does not expect it to contain everything
+             * after the dump, then only store in seen hash if the SV
+             * ref count is larger than 1. If it's 1, then we know that
+             * there is no other reference, duh. This is an optimization.
+             * Note that we'd have to check for weak-refs, too, but this is
+             * already the branch for non-refs only. */
+           else if (val != &PL_sv_undef && (!use_sparse_seen_hash || SvREFCNT(val) > 1)) {
+               SV * const namesv = newSVpvs("\\");
                sv_catpvn(namesv, name, namelen);
                seenentry = newAV();
                av_push(seenentry, namesv);
@@ -918,16 +1125,21 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
            if(i) ++c, --i;                     /* just get the name */
            if (i >= 6 && strncmp(c, "main::", 6) == 0) {
                c += 4;
-               if (i == 6) i = 0; else i -= 4;
+#if PERL_VERSION < 7
+               if (i == 6 || (i == 7 && c[6] == '\0'))
+#else
+               if (i == 6)
+#endif
+                   i = 0; else i -= 4;
            }
-           if (needs_quote(c,i)) {
+            if (globname_needs_quote(c,i)) {
 #ifdef GvNAMEUTF8
              if (GvNAMEUTF8(val)) {
                sv_grow(retval, SvCUR(retval)+2);
                r = SvPVX(retval)+SvCUR(retval);
                r[0] = '*'; r[1] = '{';
                SvCUR_set(retval, SvCUR(retval)+2);
-               esc_q_utf8(aTHX_ retval, c, i);
+               esc_q_utf8(aTHX_ retval, c, i, 1, useqq);
                sv_grow(retval, SvCUR(retval)+2);
                r = SvPVX(retval)+SvCUR(retval);
                r[0] = '}'; r[1] = '\0';
@@ -957,8 +1169,8 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
                static const char* const entries[] = { "{SCALAR}", "{ARRAY}", "{HASH}" };
                static const STRLEN sizes[] = { 8, 7, 6 };
                SV *e;
-               SV * const nname = newSVpvn("", 0);
-               SV * const newapad = newSVpvn("", 0);
+               SV * const nname = newSVpvs("");
+               SV * const newapad = newSVpvs("");
                GV * const gv = (GV*)val;
                I32 j;
                
@@ -975,7 +1187,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
                        
                        sv_setsv(nname, postentry);
                        sv_catpvn(nname, entries[j], sizes[j]);
-                       sv_catpvn(postentry, " = ", 3);
+                       sv_catpvs(postentry, " = ");
                        av_push(postav, postentry);
                        e = newRV_inc(e);
                        
@@ -987,7 +1199,8 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
                                seenhv, postav, &nlevel, indent, pad, xpad,
                                newapad, sep, pair, freezer, toaster, purity,
                                deepcopy, quotekeys, bless, maxdepth, 
-                               sortkeys);
+                               sortkeys, use_sparse_seen_hash, useqq,
+                               maxrecurse);
                        SvREFCNT_dec(e);
                    }
                }
@@ -997,11 +1210,11 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
            }
        }
        else if (val == &PL_sv_undef || !SvOK(val)) {
-           sv_catpvn(retval, "undef", 5);
+           sv_catpvs(retval, "undef");
        }
 #ifdef SvVOK
        else if (SvMAGICAL(val) && (mg = mg_find(val, 'V'))) {
-# ifndef PL_vtbl_vstring
+# if !defined(PL_vtbl_vstring) && PERL_VERSION < 17
            SV * const vecsv = sv_newmortal();
 #  if PERL_VERSION < 10
            scan_vstring(mg->mg_ptr, vecsv);
@@ -1013,11 +1226,20 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
            sv_catpvn(retval, (const char *)mg->mg_ptr, mg->mg_len);
        }
 #endif
+
        else {
         integer_came_from_string:
-           c = SvPV(val, i);
-           if (DO_UTF8(val))
-               i += esc_q_utf8(aTHX_ retval, c, i);
+            c = SvPV(val, i);
+            /* the pure perl and XS non-qq outputs have historically been
+             * different in this case, but for useqq, let's try to match
+             * the pure perl code.
+             * see [perl #74798]
+             */
+            if (useqq && safe_decimal_number(c, i)) {
+                sv_catsv(retval, val);
+            }
+           else if (DO_UTF8(val) || useqq)
+               i += esc_q_utf8(aTHX_ retval, c, i, DO_UTF8(val), useqq);
            else {
                sv_grow(retval, SvCUR(retval)+3+2*i); /* 3: ""\0 */
                r = SvPVX(retval) + SvCUR(retval);
@@ -1048,7 +1270,7 @@ MODULE = Data::Dumper             PACKAGE = Data::Dumper         PREFIX = Data_Dumper_
 #
 # This is the exact equivalent of Dump.  Well, almost. The things that are
 # different as of now (due to Laziness):
-#   * doesn't do double-quotes yet.
+#   * doesn't deparse yet.'
 #
 
 void
@@ -1062,13 +1284,16 @@ Data_Dumper_Dumpxs(href, ...)
            HV *seenhv = NULL;
            AV *postav, *todumpav, *namesav;
            I32 level = 0;
-           I32 indent, terse, i, imax, postlen;
+           I32 indent, terse, useqq;
+           SSize_t i, imax, postlen;
            SV **svp;
            SV *val, *name, *pad, *xpad, *apad, *sep, *pair, *varname;
            SV *freezer, *toaster, *bless, *sortkeys;
            I32 purity, deepcopy, quotekeys, maxdepth = 0;
+           IV maxrecurse = 1000;
            char tmpbuf[1024];
            I32 gimme = GIMME;
+            int use_sparse_seen_hash = 0;
 
            if (!SvROK(href)) {         /* call new to get an object first */
                if (items < 2)
@@ -1078,10 +1303,11 @@ Data_Dumper_Dumpxs(href, ...)
                SAVETMPS;
                
                PUSHMARK(sp);
-               XPUSHs(href);
-               XPUSHs(sv_2mortal(newSVsv(ST(1))));
+                EXTEND(SP, 3); /* 3 == max of all branches below */
+               PUSHs(href);
+               PUSHs(sv_2mortal(newSVsv(ST(1))));
                if (items >= 3)
-                   XPUSHs(sv_2mortal(newSVsv(ST(2))));
+                   PUSHs(sv_2mortal(newSVsv(ST(2))));
                PUTBACK;
                i = perl_call_method("new", G_SCALAR);
                SPAGAIN;
@@ -1101,16 +1327,20 @@ Data_Dumper_Dumpxs(href, ...)
                = freezer = toaster = bless = sortkeys = &PL_sv_undef;
            name = sv_newmortal();
            indent = 2;
-           terse = purity = deepcopy = 0;
+           terse = purity = deepcopy = useqq = 0;
            quotekeys = 1;
        
-           retval = newSVpvn("", 0);
+           retval = newSVpvs("");
            if (SvROK(href)
                && (hv = (HV*)SvRV((SV*)href))
                && SvTYPE(hv) == SVt_PVHV)              {
 
                if ((svp = hv_fetch(hv, "seen", 4, FALSE)) && SvROK(*svp))
                    seenhv = (HV*)SvRV(*svp);
+                else
+                    use_sparse_seen_hash = 1;
+               if ((svp = hv_fetch(hv, "noseen", 6, FALSE)))
+                   use_sparse_seen_hash = (SvOK(*svp) && SvIV(*svp) != 0);
                if ((svp = hv_fetch(hv, "todump", 6, FALSE)) && SvROK(*svp))
                    todumpav = (AV*)SvRV(*svp);
                if ((svp = hv_fetch(hv, "names", 5, FALSE)) && SvROK(*svp))
@@ -1121,10 +1351,8 @@ Data_Dumper_Dumpxs(href, ...)
                    purity = SvIV(*svp);
                if ((svp = hv_fetch(hv, "terse", 5, FALSE)))
                    terse = SvTRUE(*svp);
-#if 0 /* useqq currently unused */
                if ((svp = hv_fetch(hv, "useqq", 5, FALSE)))
                    useqq = SvTRUE(*svp);
-#endif
                if ((svp = hv_fetch(hv, "pad", 3, FALSE)))
                    pad = *svp;
                if ((svp = hv_fetch(hv, "xpad", 4, FALSE)))
@@ -1149,6 +1377,8 @@ Data_Dumper_Dumpxs(href, ...)
                    bless = *svp;
                if ((svp = hv_fetch(hv, "maxdepth", 8, FALSE)))
                    maxdepth = SvIV(*svp);
+               if ((svp = hv_fetch(hv, "maxrecurse", 10, FALSE)))
+                   maxrecurse = SvIV(*svp);
                if ((svp = hv_fetch(hv, "sortkeys", 8, FALSE))) {
                    sortkeys = *svp;
                    if (! SvTRUE(sortkeys))
@@ -1166,7 +1396,7 @@ Data_Dumper_Dumpxs(href, ...)
                    imax = av_len(todumpav);
                else
                    imax = -1;
-               valstr = newSVpvn("",0);
+               valstr = newSVpvs("");
                for (i = 0; i <= imax; ++i) {
                    SV *newapad;
                
@@ -1228,7 +1458,8 @@ Data_Dumper_Dumpxs(href, ...)
                    DD_dump(aTHX_ val, SvPVX_const(name), SvCUR(name), valstr, seenhv,
                            postav, &level, indent, pad, xpad, newapad, sep, pair,
                            freezer, toaster, purity, deepcopy, quotekeys,
-                           bless, maxdepth, sortkeys);
+                           bless, maxdepth, sortkeys, use_sparse_seen_hash,
+                           useqq, maxrecurse);
                    SPAGAIN;
                
                    if (indent >= 2 && !terse)
@@ -1238,13 +1469,13 @@ Data_Dumper_Dumpxs(href, ...)
                    if (postlen >= 0 || !terse) {
                        sv_insert(valstr, 0, 0, " = ", 3);
                        sv_insert(valstr, 0, 0, SvPVX_const(name), SvCUR(name));
-                       sv_catpvn(valstr, ";", 1);
+                       sv_catpvs(valstr, ";");
                    }
                    sv_catsv(retval, pad);
                    sv_catsv(retval, valstr);
                    sv_catsv(retval, sep);
                    if (postlen >= 0) {
-                       I32 i;
+                       SSize_t i;
                        sv_catsv(retval, pad);
                        for (i = 0; i <= postlen; ++i) {
                            SV *elem;
@@ -1252,20 +1483,20 @@ Data_Dumper_Dumpxs(href, ...)
                            if (svp && (elem = *svp)) {
                                sv_catsv(retval, elem);
                                if (i < postlen) {
-                                   sv_catpvn(retval, ";", 1);
+                                   sv_catpvs(retval, ";");
                                    sv_catsv(retval, sep);
                                    sv_catsv(retval, pad);
                                }
                            }
                        }
-                       sv_catpvn(retval, ";", 1);
+                       sv_catpvs(retval, ";");
                            sv_catsv(retval, sep);
                    }
                    sv_setpvn(valstr, "", 0);
                    if (gimme == G_ARRAY) {
                        XPUSHs(sv_2mortal(retval));
                        if (i < imax)   /* not the last time thro ? */
-                           retval = newSVpvn("",0);
+                           retval = newSVpvs("");
                    }
                }
                SvREFCNT_dec(postav);