cf0717e435b039103dab85e55ba57584cc5d3d7a
[perl.git] / dist / Data-Dumper / Dumper.xs
1 #define PERL_NO_GET_CONTEXT
2 #include "EXTERN.h"
3 #include "perl.h"
4 #include "XSUB.h"
5 #ifdef USE_PPPORT_H
6 #  define NEED_my_snprintf
7 #  define NEED_sv_2pv_flags
8 #  include "ppport.h"
9 #endif
10
11 #if PERL_VERSION < 8
12 #  define DD_USE_OLD_ID_FORMAT
13 #endif
14
15 static I32 num_q (const char *s, STRLEN slen);
16 static I32 esc_q (char *dest, const char *src, STRLEN slen);
17 static I32 esc_q_utf8 (pTHX_ SV *sv, const char *src, STRLEN slen);
18 static I32 needs_quote(register const char *s);
19 static SV *sv_x (pTHX_ SV *sv, const char *str, STRLEN len, I32 n);
20 static I32 DD_dump (pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval,
21                     HV *seenhv, AV *postav, I32 *levelp, I32 indent,
22                     SV *pad, SV *xpad, SV *apad, SV *sep, SV *pair,
23                     SV *freezer, SV *toaster,
24                     I32 purity, I32 deepcopy, I32 quotekeys, SV *bless,
25                     I32 maxdepth, SV *sortkeys);
26
27 #ifndef HvNAME_get
28 #define HvNAME_get HvNAME
29 #endif
30
31 #if PERL_VERSION <= 6 /* Perl 5.6 and earlier */
32
33 # ifdef EBCDIC
34 #  define UNI_TO_NATIVE(ch) (((ch) > 255) ? (ch) : ASCII_TO_NATIVE(ch))
35 # else
36 #  define UNI_TO_NATIVE(ch) (ch)
37 # endif
38
39 UV
40 Perl_utf8_to_uvchr(pTHX_ U8 *s, STRLEN *retlen)
41 {
42     const UV uv = utf8_to_uv(s, UTF8_MAXLEN, retlen,
43                     ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
44     return UNI_TO_NATIVE(uv);
45 }
46
47 # if !defined(PERL_IMPLICIT_CONTEXT)
48 #  define utf8_to_uvchr      Perl_utf8_to_uvchr
49 # else
50 #  define utf8_to_uvchr(a,b) Perl_utf8_to_uvchr(aTHX_ a,b)
51 # endif
52
53 #endif /* PERL_VERSION <= 6 */
54
55 /* Changes in 5.7 series mean that now IOK is only set if scalar is
56    precisely integer but in 5.6 and earlier we need to do a more
57    complex test  */
58 #if PERL_VERSION <= 6
59 #define DD_is_integer(sv) (SvIOK(sv) && (SvIsUV(val) ? SvUV(sv) == SvNV(sv) : SvIV(sv) == SvNV(sv)))
60 #else
61 #define DD_is_integer(sv) SvIOK(sv)
62 #endif
63
64 /* does a string need to be protected? */
65 static I32
66 needs_quote(register const char *s)
67 {
68 TOP:
69     if (s[0] == ':') {
70         if (*++s) {
71             if (*s++ != ':')
72                 return 1;
73         }
74         else
75             return 1;
76     }
77     if (isIDFIRST(*s)) {
78         while (*++s)
79             if (!isALNUM(*s)) {
80                 if (*s == ':')
81                     goto TOP;
82                 else
83                     return 1;
84             }
85     }
86     else
87         return 1;
88     return 0;
89 }
90
91 /* count the number of "'"s and "\"s in string */
92 static I32
93 num_q(register const char *s, register STRLEN slen)
94 {
95     register I32 ret = 0;
96
97     while (slen > 0) {
98         if (*s == '\'' || *s == '\\')
99             ++ret;
100         ++s;
101         --slen;
102     }
103     return ret;
104 }
105
106
107 /* returns number of chars added to escape "'"s and "\"s in s */
108 /* slen number of characters in s will be escaped */
109 /* destination must be long enough for additional chars */
110 static I32
111 esc_q(register char *d, register const char *s, register STRLEN slen)
112 {
113     register I32 ret = 0;
114
115     while (slen > 0) {
116         switch (*s) {
117         case '\'':
118         case '\\':
119             *d = '\\';
120             ++d; ++ret;
121         default:
122             *d = *s;
123             ++d; ++s; --slen;
124             break;
125         }
126     }
127     return ret;
128 }
129
130 static I32
131 esc_q_utf8(pTHX_ SV* sv, register const char *src, register STRLEN slen)
132 {
133     char *r, *rstart;
134     const char *s = src;
135     const char * const send = src + slen;
136     STRLEN j, cur = SvCUR(sv);
137     /* Could count 128-255 and 256+ in two variables, if we want to
138        be like &qquote and make a distinction.  */
139     STRLEN grow = 0;    /* bytes needed to represent chars 128+ */
140     /* STRLEN topbit_grow = 0;  bytes needed to represent chars 128-255 */
141     STRLEN backslashes = 0;
142     STRLEN single_quotes = 0;
143     STRLEN qq_escapables = 0;   /* " $ @ will need a \ in "" strings.  */
144     STRLEN normal = 0;
145     int increment;
146
147     /* this will need EBCDICification */
148     for (s = src; s < send; s += increment) {
149         const UV k = utf8_to_uvchr((U8*)s, NULL);
150
151         /* check for invalid utf8 */
152         increment = (k == 0 && *s != '\0') ? 1 : UTF8SKIP(s);
153
154 #ifdef EBCDIC
155         if (!isprint(k) || k > 256) {
156 #else
157         if (k > 127) {
158 #endif
159             /* 4: \x{} then count the number of hex digits.  */
160             grow += 4 + (k <= 0xFF ? 2 : k <= 0xFFF ? 3 : k <= 0xFFFF ? 4 :
161 #if UVSIZE == 4
162                 8 /* We may allocate a bit more than the minimum here.  */
163 #else
164                 k <= 0xFFFFFFFF ? 8 : UVSIZE * 4
165 #endif
166                 );
167         } else if (k == '\\') {
168             backslashes++;
169         } else if (k == '\'') {
170             single_quotes++;
171         } else if (k == '"' || k == '$' || k == '@') {
172             qq_escapables++;
173         } else {
174             normal++;
175         }
176     }
177     if (grow) {
178         /* We have something needing hex. 3 is ""\0 */
179         sv_grow(sv, cur + 3 + grow + 2*backslashes + single_quotes
180                 + 2*qq_escapables + normal);
181         rstart = r = SvPVX(sv) + cur;
182
183         *r++ = '"';
184
185         for (s = src; s < send; s += UTF8SKIP(s)) {
186             const UV k = utf8_to_uvchr((U8*)s, NULL);
187
188             if (k == '"' || k == '\\' || k == '$' || k == '@') {
189                 *r++ = '\\';
190                 *r++ = (char)k;
191             }
192             else
193 #ifdef EBCDIC
194               if (isprint(k) && k < 256)
195 #else
196               if (k < 0x80)
197 #endif
198                 *r++ = (char)k;
199             else {
200 #if PERL_VERSION < 10
201                 sprintf(r, "\\x{%"UVxf"}", k);
202                 r += strlen(r);
203                 /* my_sprintf is not supported by ppport.h */
204 #else
205                 r = r + my_sprintf(r, "\\x{%"UVxf"}", k);
206 #endif
207             }
208         }
209         *r++ = '"';
210     } else {
211         /* Single quotes.  */
212         sv_grow(sv, cur + 3 + 2*backslashes + 2*single_quotes
213                 + qq_escapables + normal);
214         rstart = r = SvPVX(sv) + cur;
215         *r++ = '\'';
216         for (s = src; s < send; s ++) {
217             const char k = *s;
218             if (k == '\'' || k == '\\')
219                 *r++ = '\\';
220             *r++ = k;
221         }
222         *r++ = '\'';
223     }
224     *r = '\0';
225     j = r - rstart;
226     SvCUR_set(sv, cur + j);
227
228     return j;
229 }
230
231 /* append a repeated string to an SV */
232 static SV *
233 sv_x(pTHX_ SV *sv, const char *str, STRLEN len, I32 n)
234 {
235     if (!sv)
236         sv = newSVpvn("", 0);
237 #ifdef DEBUGGING
238     else
239         assert(SvTYPE(sv) >= SVt_PV);
240 #endif
241
242     if (n > 0) {
243         SvGROW(sv, len*n + SvCUR(sv) + 1);
244         if (len == 1) {
245             char * const start = SvPVX(sv) + SvCUR(sv);
246             SvCUR_set(sv, SvCUR(sv) + n);
247             start[n] = '\0';
248             while (n > 0)
249                 start[--n] = str[0];
250         }
251         else
252             while (n > 0) {
253                 sv_catpvn(sv, str, len);
254                 --n;
255             }
256     }
257     return sv;
258 }
259
260 /*
261  * This ought to be split into smaller functions. (it is one long function since
262  * it exactly parallels the perl version, which was one long thing for
263  * efficiency raisins.)  Ugggh!
264  */
265 static I32
266 DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
267         AV *postav, I32 *levelp, I32 indent, SV *pad, SV *xpad,
268         SV *apad, SV *sep, SV *pair, SV *freezer, SV *toaster, I32 purity,
269         I32 deepcopy, I32 quotekeys, SV *bless, I32 maxdepth, SV *sortkeys)
270 {
271     char tmpbuf[128];
272     U32 i;
273     char *c, *r, *realpack;
274 #ifdef DD_USE_OLD_ID_FORMAT
275     char id[128];
276 #else
277     UV id_buffer;
278     char *const id = (char *)&id_buffer;
279 #endif
280     SV **svp;
281     SV *sv, *ipad, *ival;
282     SV *blesspad = Nullsv;
283     AV *seenentry = NULL;
284     char *iname;
285     STRLEN inamelen, idlen = 0;
286     U32 realtype;
287     bool no_bless = 0; /* when a qr// is blessed into Regexp we dont want to bless it.
288                           in later perls we should actually check the classname of the 
289                           engine. this gets tricky as it involves lexical issues that arent so
290                           easy to resolve */
291     bool is_regex = 0; /* we are dumping a regex, we need to know this before we bless */
292
293     if (!val)
294         return 0;
295
296     /* If the ouput buffer has less than some arbitrary amount of space
297        remaining, then enlarge it. For the test case (25M of output),
298        *1.1 was slower, *2.0 was the same, so the first guess of 1.5 is
299         deemed to be good enough.  */
300     if (SvTYPE(retval) >= SVt_PV && (SvLEN(retval) - SvCUR(retval)) < 42) {
301         sv_grow(retval, SvCUR(retval) * 3 / 2);
302     }
303
304     realtype = SvTYPE(val);
305
306     if (SvGMAGICAL(val))
307         mg_get(val);
308     if (SvROK(val)) {
309
310         /* If a freeze method is provided and the object has it, call
311            it.  Warn on errors. */
312         if (SvOBJECT(SvRV(val)) && freezer &&
313             SvPOK(freezer) && SvCUR(freezer) &&
314             gv_fetchmeth(SvSTASH(SvRV(val)), SvPVX_const(freezer), 
315                          SvCUR(freezer), -1) != NULL)
316         {
317             dSP; ENTER; SAVETMPS; PUSHMARK(sp);
318             XPUSHs(val); PUTBACK;
319             i = perl_call_method(SvPVX_const(freezer), G_EVAL|G_VOID);
320             SPAGAIN;
321             if (SvTRUE(ERRSV))
322                 warn("WARNING(Freezer method call failed): %"SVf"", ERRSV);
323             PUTBACK; FREETMPS; LEAVE;
324         }
325         
326         ival = SvRV(val);
327         realtype = SvTYPE(ival);
328 #ifdef DD_USE_OLD_ID_FORMAT
329         idlen = my_snprintf(id, sizeof(id), "0x%"UVxf, PTR2UV(ival));
330 #else
331         id_buffer = PTR2UV(ival);
332         idlen = sizeof(id_buffer);
333 #endif
334         if (SvOBJECT(ival))
335             realpack = HvNAME_get(SvSTASH(ival));
336         else
337             realpack = NULL;
338
339         /* if it has a name, we need to either look it up, or keep a tab
340          * on it so we know when we hit it later
341          */
342         if (namelen) {
343             if ((svp = hv_fetch(seenhv, id, idlen, FALSE))
344                 && (sv = *svp) && SvROK(sv) && (seenentry = (AV*)SvRV(sv)))
345             {
346                 SV *othername;
347                 if ((svp = av_fetch(seenentry, 0, FALSE))
348                     && (othername = *svp))
349                 {
350                     if (purity && *levelp > 0) {
351                         SV *postentry;
352                         
353                         if (realtype == SVt_PVHV)
354                             sv_catpvn(retval, "{}", 2);
355                         else if (realtype == SVt_PVAV)
356                             sv_catpvn(retval, "[]", 2);
357                         else
358                             sv_catpvn(retval, "do{my $o}", 9);
359                         postentry = newSVpvn(name, namelen);
360                         sv_catpvn(postentry, " = ", 3);
361                         sv_catsv(postentry, othername);
362                         av_push(postav, postentry);
363                     }
364                     else {
365                         if (name[0] == '@' || name[0] == '%') {
366                             if ((SvPVX_const(othername))[0] == '\\' &&
367                                 (SvPVX_const(othername))[1] == name[0]) {
368                                 sv_catpvn(retval, SvPVX_const(othername)+1,
369                                           SvCUR(othername)-1);
370                             }
371                             else {
372                                 sv_catpvn(retval, name, 1);
373                                 sv_catpvn(retval, "{", 1);
374                                 sv_catsv(retval, othername);
375                                 sv_catpvn(retval, "}", 1);
376                             }
377                         }
378                         else
379                             sv_catsv(retval, othername);
380                     }
381                     return 1;
382                 }
383                 else {
384 #ifdef DD_USE_OLD_ID_FORMAT
385                     warn("ref name not found for %s", id);
386 #else
387                     warn("ref name not found for 0x%"UVxf, PTR2UV(ival));
388 #endif
389                     return 0;
390                 }
391             }
392             else {   /* store our name and continue */
393                 SV *namesv;
394                 if (name[0] == '@' || name[0] == '%') {
395                     namesv = newSVpvn("\\", 1);
396                     sv_catpvn(namesv, name, namelen);
397                 }
398                 else if (realtype == SVt_PVCV && name[0] == '*') {
399                     namesv = newSVpvn("\\", 2);
400                     sv_catpvn(namesv, name, namelen);
401                     (SvPVX(namesv))[1] = '&';
402                 }
403                 else
404                     namesv = newSVpvn(name, namelen);
405                 seenentry = newAV();
406                 av_push(seenentry, namesv);
407                 (void)SvREFCNT_inc(val);
408                 av_push(seenentry, val);
409                 (void)hv_store(seenhv, id, idlen,
410                                newRV_inc((SV*)seenentry), 0);
411                 SvREFCNT_dec(seenentry);
412             }
413         }
414         /* regexps dont have to be blessed into package "Regexp"
415          * they can be blessed into any package. 
416          */
417 #if PERL_VERSION < 8
418         if (realpack && *realpack == 'R' && strEQ(realpack, "Regexp")) 
419 #elif PERL_VERSION < 11
420         if (realpack && realtype == SVt_PVMG && mg_find(ival, PERL_MAGIC_qr))
421 #else        
422         if (realpack && realtype == SVt_REGEXP) 
423 #endif
424         {
425             is_regex = 1;
426             if (strEQ(realpack, "Regexp")) 
427                 no_bless = 1;
428             else
429                 no_bless = 0;
430         }
431
432         /* If purity is not set and maxdepth is set, then check depth:
433          * if we have reached maximum depth, return the string
434          * representation of the thing we are currently examining
435          * at this depth (i.e., 'Foo=ARRAY(0xdeadbeef)').
436          */
437         if (!purity && maxdepth > 0 && *levelp >= maxdepth) {
438             STRLEN vallen;
439             const char * const valstr = SvPV(val,vallen);
440             sv_catpvn(retval, "'", 1);
441             sv_catpvn(retval, valstr, vallen);
442             sv_catpvn(retval, "'", 1);
443             return 1;
444         }
445
446         if (realpack && !no_bless) {                            /* we have a blessed ref */
447             STRLEN blesslen;
448             const char * const blessstr = SvPV(bless, blesslen);
449             sv_catpvn(retval, blessstr, blesslen);
450             sv_catpvn(retval, "( ", 2);
451             if (indent >= 2) {
452                 blesspad = apad;
453                 apad = newSVsv(apad);
454                 sv_x(aTHX_ apad, " ", 1, blesslen+2);
455             }
456         }
457
458         (*levelp)++;
459         ipad = sv_x(aTHX_ Nullsv, SvPVX_const(xpad), SvCUR(xpad), *levelp);
460
461         if (is_regex) 
462         {
463             STRLEN rlen;
464             const char *rval = SvPV(val, rlen);
465             const char *slash = strchr(rval, '/');
466             sv_catpvn(retval, "qr/", 3);
467             while (slash) {
468                 sv_catpvn(retval, rval, slash-rval);
469                 sv_catpvn(retval, "\\/", 2);
470                 rlen -= slash-rval+1;
471                 rval = slash+1;
472                 slash = strchr(rval, '/');
473             }
474             sv_catpvn(retval, rval, rlen);
475             sv_catpvn(retval, "/", 1);
476         } 
477         else if (
478 #if PERL_VERSION < 9
479                 realtype <= SVt_PVBM
480 #else
481                 realtype <= SVt_PVMG
482 #endif
483         ) {                          /* scalar ref */
484             SV * const namesv = newSVpvn("${", 2);
485             sv_catpvn(namesv, name, namelen);
486             sv_catpvn(namesv, "}", 1);
487             if (realpack) {                                  /* blessed */
488                 sv_catpvn(retval, "do{\\(my $o = ", 13);
489                 DD_dump(aTHX_ ival, SvPVX_const(namesv), SvCUR(namesv), retval, seenhv,
490                         postav, levelp, indent, pad, xpad, apad, sep, pair,
491                         freezer, toaster, purity, deepcopy, quotekeys, bless,
492                         maxdepth, sortkeys);
493                 sv_catpvn(retval, ")}", 2);
494             }                                                /* plain */
495             else {
496                 sv_catpvn(retval, "\\", 1);
497                 DD_dump(aTHX_ ival, SvPVX_const(namesv), SvCUR(namesv), retval, seenhv,
498                         postav, levelp, indent, pad, xpad, apad, sep, pair,
499                         freezer, toaster, purity, deepcopy, quotekeys, bless,
500                         maxdepth, sortkeys);
501             }
502             SvREFCNT_dec(namesv);
503         }
504         else if (realtype == SVt_PVGV) {                     /* glob ref */
505             SV * const namesv = newSVpvn("*{", 2);
506             sv_catpvn(namesv, name, namelen);
507             sv_catpvn(namesv, "}", 1);
508             sv_catpvn(retval, "\\", 1);
509             DD_dump(aTHX_ ival, SvPVX_const(namesv), SvCUR(namesv), retval, seenhv,
510                     postav, levelp,     indent, pad, xpad, apad, sep, pair,
511                     freezer, toaster, purity, deepcopy, quotekeys, bless,
512                     maxdepth, sortkeys);
513             SvREFCNT_dec(namesv);
514         }
515         else if (realtype == SVt_PVAV) {
516             SV *totpad;
517             I32 ix = 0;
518             const I32 ixmax = av_len((AV *)ival);
519         
520             SV * const ixsv = newSViv(0);
521             /* allowing for a 24 char wide array index */
522             New(0, iname, namelen+28, char);
523             (void)strcpy(iname, name);
524             inamelen = namelen;
525             if (name[0] == '@') {
526                 sv_catpvn(retval, "(", 1);
527                 iname[0] = '$';
528             }
529             else {
530                 sv_catpvn(retval, "[", 1);
531                 /* omit "->" in $foo{bar}->[0], but not in ${$foo}->[0] */
532                 /*if (namelen > 0
533                     && name[namelen-1] != ']' && name[namelen-1] != '}'
534                     && (namelen < 4 || (name[1] != '{' && name[2] != '{')))*/
535                 if ((namelen > 0
536                      && name[namelen-1] != ']' && name[namelen-1] != '}')
537                     || (namelen > 4
538                         && (name[1] == '{'
539                             || (name[0] == '\\' && name[2] == '{'))))
540                 {
541                     iname[inamelen++] = '-'; iname[inamelen++] = '>';
542                     iname[inamelen] = '\0';
543                 }
544             }
545             if (iname[0] == '*' && iname[inamelen-1] == '}' && inamelen >= 8 &&
546                 (instr(iname+inamelen-8, "{SCALAR}") ||
547                  instr(iname+inamelen-7, "{ARRAY}") ||
548                  instr(iname+inamelen-6, "{HASH}"))) {
549                 iname[inamelen++] = '-'; iname[inamelen++] = '>';
550             }
551             iname[inamelen++] = '['; iname[inamelen] = '\0';
552             totpad = newSVsv(sep);
553             sv_catsv(totpad, pad);
554             sv_catsv(totpad, apad);
555
556             for (ix = 0; ix <= ixmax; ++ix) {
557                 STRLEN ilen;
558                 SV *elem;
559                 svp = av_fetch((AV*)ival, ix, FALSE);
560                 if (svp)
561                     elem = *svp;
562                 else
563                     elem = &PL_sv_undef;
564                 
565                 ilen = inamelen;
566                 sv_setiv(ixsv, ix);
567 #if PERL_VERSION < 10
568                 (void) sprintf(iname+ilen, "%"IVdf, (IV)ix);
569                 ilen = strlen(iname);
570 #else
571                 ilen = ilen + my_sprintf(iname+ilen, "%"IVdf, (IV)ix);
572 #endif
573                 iname[ilen++] = ']'; iname[ilen] = '\0';
574                 if (indent >= 3) {
575                     sv_catsv(retval, totpad);
576                     sv_catsv(retval, ipad);
577                     sv_catpvn(retval, "#", 1);
578                     sv_catsv(retval, ixsv);
579                 }
580                 sv_catsv(retval, totpad);
581                 sv_catsv(retval, ipad);
582                 DD_dump(aTHX_ elem, iname, ilen, retval, seenhv, postav,
583                         levelp, indent, pad, xpad, apad, sep, pair,
584                         freezer, toaster, purity, deepcopy, quotekeys, bless,
585                         maxdepth, sortkeys);
586                 if (ix < ixmax)
587                     sv_catpvn(retval, ",", 1);
588             }
589             if (ixmax >= 0) {
590                 SV * const opad = sv_x(aTHX_ Nullsv, SvPVX_const(xpad), SvCUR(xpad), (*levelp)-1);
591                 sv_catsv(retval, totpad);
592                 sv_catsv(retval, opad);
593                 SvREFCNT_dec(opad);
594             }
595             if (name[0] == '@')
596                 sv_catpvn(retval, ")", 1);
597             else
598                 sv_catpvn(retval, "]", 1);
599             SvREFCNT_dec(ixsv);
600             SvREFCNT_dec(totpad);
601             Safefree(iname);
602         }
603         else if (realtype == SVt_PVHV) {
604             SV *totpad, *newapad;
605             SV *sname;
606             HE *entry;
607             char *key;
608             I32 klen;
609             SV *hval;
610             AV *keys = NULL;
611         
612             SV * const iname = newSVpvn(name, namelen);
613             if (name[0] == '%') {
614                 sv_catpvn(retval, "(", 1);
615                 (SvPVX(iname))[0] = '$';
616             }
617             else {
618                 sv_catpvn(retval, "{", 1);
619                 /* omit "->" in $foo[0]->{bar}, but not in ${$foo}->{bar} */
620                 if ((namelen > 0
621                      && name[namelen-1] != ']' && name[namelen-1] != '}')
622                     || (namelen > 4
623                         && (name[1] == '{'
624                             || (name[0] == '\\' && name[2] == '{'))))
625                 {
626                     sv_catpvn(iname, "->", 2);
627                 }
628             }
629             if (name[0] == '*' && name[namelen-1] == '}' && namelen >= 8 &&
630                 (instr(name+namelen-8, "{SCALAR}") ||
631                  instr(name+namelen-7, "{ARRAY}") ||
632                  instr(name+namelen-6, "{HASH}"))) {
633                 sv_catpvn(iname, "->", 2);
634             }
635             sv_catpvn(iname, "{", 1);
636             totpad = newSVsv(sep);
637             sv_catsv(totpad, pad);
638             sv_catsv(totpad, apad);
639         
640             /* If requested, get a sorted/filtered array of hash keys */
641             if (sortkeys) {
642                 if (sortkeys == &PL_sv_yes) {
643 #if PERL_VERSION < 8
644                     sortkeys = sv_2mortal(newSVpvn("Data::Dumper::_sortkeys", 23));
645 #else
646                     keys = newAV();
647                     (void)hv_iterinit((HV*)ival);
648                     while ((entry = hv_iternext((HV*)ival))) {
649                         sv = hv_iterkeysv(entry);
650                         SvREFCNT_inc(sv);
651                         av_push(keys, sv);
652                     }
653 # ifdef USE_LOCALE_NUMERIC
654                     sortsv(AvARRAY(keys), 
655                            av_len(keys)+1, 
656                            IN_LOCALE ? Perl_sv_cmp_locale : Perl_sv_cmp);
657 # else
658                     sortsv(AvARRAY(keys), 
659                            av_len(keys)+1, 
660                            Perl_sv_cmp);
661 # endif
662 #endif
663                 }
664                 if (sortkeys != &PL_sv_yes) {
665                     dSP; ENTER; SAVETMPS; PUSHMARK(sp);
666                     XPUSHs(sv_2mortal(newRV_inc(ival))); PUTBACK;
667                     i = perl_call_sv(sortkeys, G_SCALAR | G_EVAL);
668                     SPAGAIN;
669                     if (i) {
670                         sv = POPs;
671                         if (SvROK(sv) && (SvTYPE(SvRV(sv)) == SVt_PVAV))
672                             keys = (AV*)SvREFCNT_inc(SvRV(sv));
673                     }
674                     if (! keys)
675                         warn("Sortkeys subroutine did not return ARRAYREF\n");
676                     PUTBACK; FREETMPS; LEAVE;
677                 }
678                 if (keys)
679                     sv_2mortal((SV*)keys);
680             }
681             else
682                 (void)hv_iterinit((HV*)ival);
683
684             /* foreach (keys %hash) */
685             for (i = 0; 1; i++) {
686                 char *nkey;
687                 char *nkey_buffer = NULL;
688                 I32 nticks = 0;
689                 SV* keysv;
690                 STRLEN keylen;
691                 I32 nlen;
692                 bool do_utf8 = FALSE;
693
694                if (sortkeys) {
695                    if (!(keys && (I32)i <= av_len(keys))) break;
696                } else {
697                    if (!(entry = hv_iternext((HV *)ival))) break;
698                }
699
700                 if (i)
701                     sv_catpvn(retval, ",", 1);
702
703                 if (sortkeys) {
704                     char *key;
705                     svp = av_fetch(keys, i, FALSE);
706                     keysv = svp ? *svp : sv_mortalcopy(&PL_sv_undef);
707                     key = SvPV(keysv, keylen);
708                     svp = hv_fetch((HV*)ival, key,
709                                    SvUTF8(keysv) ? -(I32)keylen : (I32)keylen, 0);
710                     hval = svp ? *svp : sv_mortalcopy(&PL_sv_undef);
711                 }
712                 else {
713                     keysv = hv_iterkeysv(entry);
714                     hval = hv_iterval((HV*)ival, entry);
715                 }
716
717                 key = SvPV(keysv, keylen);
718                 do_utf8 = DO_UTF8(keysv);
719                 klen = keylen;
720
721                 sv_catsv(retval, totpad);
722                 sv_catsv(retval, ipad);
723                 /* old logic was first to check utf8 flag, and if utf8 always
724                    call esc_q_utf8.  This caused test to break under -Mutf8,
725                    because there even strings like 'c' have utf8 flag on.
726                    Hence with quotekeys == 0 the XS code would still '' quote
727                    them based on flags, whereas the perl code would not,
728                    based on regexps.
729                    The perl code is correct.
730                    needs_quote() decides that anything that isn't a valid
731                    perl identifier needs to be quoted, hence only correctly
732                    formed strings with no characters outside [A-Za-z0-9_:]
733                    won't need quoting.  None of those characters are used in
734                    the byte encoding of utf8, so anything with utf8
735                    encoded characters in will need quoting. Hence strings
736                    with utf8 encoded characters in will end up inside do_utf8
737                    just like before, but now strings with utf8 flag set but
738                    only ascii characters will end up in the unquoted section.
739
740                    There should also be less tests for the (probably currently)
741                    more common doesn't need quoting case.
742                    The code is also smaller (22044 vs 22260) because I've been
743                    able to pull the common logic out to both sides.  */
744                 if (quotekeys || needs_quote(key)) {
745                     if (do_utf8) {
746                         STRLEN ocur = SvCUR(retval);
747                         nlen = esc_q_utf8(aTHX_ retval, key, klen);
748                         nkey = SvPVX(retval) + ocur;
749                     }
750                     else {
751                         nticks = num_q(key, klen);
752                         New(0, nkey_buffer, klen+nticks+3, char);
753                         nkey = nkey_buffer;
754                         nkey[0] = '\'';
755                         if (nticks)
756                             klen += esc_q(nkey+1, key, klen);
757                         else
758                             (void)Copy(key, nkey+1, klen, char);
759                         nkey[++klen] = '\'';
760                         nkey[++klen] = '\0';
761                         nlen = klen;
762                         sv_catpvn(retval, nkey, klen);
763                     }
764                 }
765                 else {
766                     nkey = key;
767                     nlen = klen;
768                     sv_catpvn(retval, nkey, klen);
769                 }
770                 sname = newSVsv(iname);
771                 sv_catpvn(sname, nkey, nlen);
772                 sv_catpvn(sname, "}", 1);
773
774                 sv_catsv(retval, pair);
775                 if (indent >= 2) {
776                     char *extra;
777                     I32 elen = 0;
778                     newapad = newSVsv(apad);
779                     New(0, extra, klen+4+1, char);
780                     while (elen < (klen+4))
781                         extra[elen++] = ' ';
782                     extra[elen] = '\0';
783                     sv_catpvn(newapad, extra, elen);
784                     Safefree(extra);
785                 }
786                 else
787                     newapad = apad;
788
789                 DD_dump(aTHX_ hval, SvPVX_const(sname), SvCUR(sname), retval, seenhv,
790                         postav, levelp, indent, pad, xpad, newapad, sep, pair,
791                         freezer, toaster, purity, deepcopy, quotekeys, bless,
792                         maxdepth, sortkeys);
793                 SvREFCNT_dec(sname);
794                 Safefree(nkey_buffer);
795                 if (indent >= 2)
796                     SvREFCNT_dec(newapad);
797             }
798             if (i) {
799                 SV *opad = sv_x(aTHX_ Nullsv, SvPVX_const(xpad), SvCUR(xpad), *levelp-1);
800                 sv_catsv(retval, totpad);
801                 sv_catsv(retval, opad);
802                 SvREFCNT_dec(opad);
803             }
804             if (name[0] == '%')
805                 sv_catpvn(retval, ")", 1);
806             else
807                 sv_catpvn(retval, "}", 1);
808             SvREFCNT_dec(iname);
809             SvREFCNT_dec(totpad);
810         }
811         else if (realtype == SVt_PVCV) {
812             sv_catpvn(retval, "sub { \"DUMMY\" }", 15);
813             if (purity)
814                 warn("Encountered CODE ref, using dummy placeholder");
815         }
816         else {
817             warn("cannot handle ref type %d", (int)realtype);
818         }
819
820         if (realpack && !no_bless) {  /* free blessed allocs */
821             I32 plen;
822             I32 pticks;
823
824             if (indent >= 2) {
825                 SvREFCNT_dec(apad);
826                 apad = blesspad;
827             }
828             sv_catpvn(retval, ", '", 3);
829
830             plen = strlen(realpack);
831             pticks = num_q(realpack, plen);
832             if (pticks) { /* needs escaping */
833                 char *npack;
834                 char *npack_buffer = NULL;
835
836                 New(0, npack_buffer, plen+pticks+1, char);
837                 npack = npack_buffer;
838                 plen += esc_q(npack, realpack, plen);
839                 npack[plen] = '\0';
840
841                 sv_catpvn(retval, npack, plen);
842                 Safefree(npack_buffer);
843             }
844             else {
845                 sv_catpvn(retval, realpack, strlen(realpack));
846             }
847             sv_catpvn(retval, "' )", 3);
848             if (toaster && SvPOK(toaster) && SvCUR(toaster)) {
849                 sv_catpvn(retval, "->", 2);
850                 sv_catsv(retval, toaster);
851                 sv_catpvn(retval, "()", 2);
852             }
853         }
854         SvREFCNT_dec(ipad);
855         (*levelp)--;
856     }
857     else {
858         STRLEN i;
859         
860         if (namelen) {
861 #ifdef DD_USE_OLD_ID_FORMAT
862             idlen = my_snprintf(id, sizeof(id), "0x%"UVxf, PTR2UV(val));
863 #else
864             id_buffer = PTR2UV(val);
865             idlen = sizeof(id_buffer);
866 #endif
867             if ((svp = hv_fetch(seenhv, id, idlen, FALSE)) &&
868                 (sv = *svp) && SvROK(sv) &&
869                 (seenentry = (AV*)SvRV(sv)))
870             {
871                 SV *othername;
872                 if ((svp = av_fetch(seenentry, 0, FALSE)) && (othername = *svp)
873                     && (svp = av_fetch(seenentry, 2, FALSE)) && *svp && SvIV(*svp) > 0)
874                 {
875                     sv_catpvn(retval, "${", 2);
876                     sv_catsv(retval, othername);
877                     sv_catpvn(retval, "}", 1);
878                     return 1;
879                 }
880             }
881             else if (val != &PL_sv_undef) {
882                 SV * const namesv = newSVpvn("\\", 1);
883                 sv_catpvn(namesv, name, namelen);
884                 seenentry = newAV();
885                 av_push(seenentry, namesv);
886                 av_push(seenentry, newRV_inc(val));
887                 (void)hv_store(seenhv, id, idlen, newRV_inc((SV*)seenentry), 0);
888                 SvREFCNT_dec(seenentry);
889             }
890         }
891
892         if (DD_is_integer(val)) {
893             STRLEN len;
894             if (SvIsUV(val))
895               len = my_snprintf(tmpbuf, sizeof(tmpbuf), "%"UVuf, SvUV(val));
896             else
897               len = my_snprintf(tmpbuf, sizeof(tmpbuf), "%"IVdf, SvIV(val));
898             if (SvPOK(val)) {
899               /* Need to check to see if this is a string such as " 0".
900                  I'm assuming from sprintf isn't going to clash with utf8.
901                  Is this valid on EBCDIC?  */
902               STRLEN pvlen;
903               const char * const pv = SvPV(val, pvlen);
904               if (pvlen != len || memNE(pv, tmpbuf, len))
905                 goto integer_came_from_string;
906             }
907             if (len > 10) {
908               /* Looks like we're on a 64 bit system.  Make it a string so that
909                  if a 32 bit system reads the number it will cope better.  */
910               sv_catpvf(retval, "'%s'", tmpbuf);
911             } else
912               sv_catpvn(retval, tmpbuf, len);
913         }
914         else if (realtype == SVt_PVGV) {/* GLOBs can end up with scribbly names */
915             c = SvPV(val, i);
916             if(i) ++c, --i;                     /* just get the name */
917             if (i >= 6 && strncmp(c, "main::", 6) == 0) {
918                 c += 4;
919                 i -= 4;
920             }
921             if (needs_quote(c)) {
922                 sv_grow(retval, SvCUR(retval)+6+2*i);
923                 r = SvPVX(retval)+SvCUR(retval);
924                 r[0] = '*'; r[1] = '{'; r[2] = '\'';
925                 i += esc_q(r+3, c, i);
926                 i += 3;
927                 r[i++] = '\''; r[i++] = '}';
928                 r[i] = '\0';
929             }
930             else {
931                 sv_grow(retval, SvCUR(retval)+i+2);
932                 r = SvPVX(retval)+SvCUR(retval);
933                 r[0] = '*'; strcpy(r+1, c);
934                 i++;
935             }
936             SvCUR_set(retval, SvCUR(retval)+i);
937
938             if (purity) {
939                 static const char* const entries[] = { "{SCALAR}", "{ARRAY}", "{HASH}" };
940                 static const STRLEN sizes[] = { 8, 7, 6 };
941                 SV *e;
942                 SV * const nname = newSVpvn("", 0);
943                 SV * const newapad = newSVpvn("", 0);
944                 GV * const gv = (GV*)val;
945                 I32 j;
946                 
947                 for (j=0; j<3; j++) {
948                     e = ((j == 0) ? GvSV(gv) : (j == 1) ? (SV*)GvAV(gv) : (SV*)GvHV(gv));
949                     if (!e)
950                         continue;
951                     if (j == 0 && !SvOK(e))
952                         continue;
953
954                     {
955                         I32 nlevel = 0;
956                         SV *postentry = newSVpvn(r,i);
957                         
958                         sv_setsv(nname, postentry);
959                         sv_catpvn(nname, entries[j], sizes[j]);
960                         sv_catpvn(postentry, " = ", 3);
961                         av_push(postav, postentry);
962                         e = newRV_inc(e);
963                         
964                         SvCUR_set(newapad, 0);
965                         if (indent >= 2)
966                             (void)sv_x(aTHX_ newapad, " ", 1, SvCUR(postentry));
967                         
968                         DD_dump(aTHX_ e, SvPVX_const(nname), SvCUR(nname), postentry,
969                                 seenhv, postav, &nlevel, indent, pad, xpad,
970                                 newapad, sep, pair, freezer, toaster, purity,
971                                 deepcopy, quotekeys, bless, maxdepth, 
972                                 sortkeys);
973                         SvREFCNT_dec(e);
974                     }
975                 }
976                 
977                 SvREFCNT_dec(newapad);
978                 SvREFCNT_dec(nname);
979             }
980         }
981         else if (val == &PL_sv_undef || !SvOK(val)) {
982             sv_catpvn(retval, "undef", 5);
983         }
984         else {
985         integer_came_from_string:
986             c = SvPV(val, i);
987             if (DO_UTF8(val))
988                 i += esc_q_utf8(aTHX_ retval, c, i);
989             else {
990                 sv_grow(retval, SvCUR(retval)+3+2*i); /* 3: ""\0 */
991                 r = SvPVX(retval) + SvCUR(retval);
992                 r[0] = '\'';
993                 i += esc_q(r+1, c, i);
994                 ++i;
995                 r[i++] = '\'';
996                 r[i] = '\0';
997                 SvCUR_set(retval, SvCUR(retval)+i);
998             }
999         }
1000     }
1001
1002     if (idlen) {
1003         if (deepcopy)
1004             (void)hv_delete(seenhv, id, idlen, G_DISCARD);
1005         else if (namelen && seenentry) {
1006             SV *mark = *av_fetch(seenentry, 2, TRUE);
1007             sv_setiv(mark,1);
1008         }
1009     }
1010     return 1;
1011 }
1012
1013
1014 MODULE = Data::Dumper           PACKAGE = Data::Dumper         PREFIX = Data_Dumper_
1015
1016 #
1017 # This is the exact equivalent of Dump.  Well, almost. The things that are
1018 # different as of now (due to Laziness):
1019 #   * doesn't do double-quotes yet.
1020 #
1021
1022 void
1023 Data_Dumper_Dumpxs(href, ...)
1024         SV      *href;
1025         PROTOTYPE: $;$$
1026         PPCODE:
1027         {
1028             HV *hv;
1029             SV *retval, *valstr;
1030             HV *seenhv = NULL;
1031             AV *postav, *todumpav, *namesav;
1032             I32 level = 0;
1033             I32 indent, terse, i, imax, postlen;
1034             SV **svp;
1035             SV *val, *name, *pad, *xpad, *apad, *sep, *pair, *varname;
1036             SV *freezer, *toaster, *bless, *sortkeys;
1037             I32 purity, deepcopy, quotekeys, maxdepth = 0;
1038             char tmpbuf[1024];
1039             I32 gimme = GIMME;
1040
1041             if (!SvROK(href)) {         /* call new to get an object first */
1042                 if (items < 2)
1043                     croak("Usage: Data::Dumper::Dumpxs(PACKAGE, VAL_ARY_REF, [NAME_ARY_REF])");
1044                 
1045                 ENTER;
1046                 SAVETMPS;
1047                 
1048                 PUSHMARK(sp);
1049                 XPUSHs(href);
1050                 XPUSHs(sv_2mortal(newSVsv(ST(1))));
1051                 if (items >= 3)
1052                     XPUSHs(sv_2mortal(newSVsv(ST(2))));
1053                 PUTBACK;
1054                 i = perl_call_method("new", G_SCALAR);
1055                 SPAGAIN;
1056                 if (i)
1057                     href = newSVsv(POPs);
1058
1059                 PUTBACK;
1060                 FREETMPS;
1061                 LEAVE;
1062                 if (i)
1063                     (void)sv_2mortal(href);
1064             }
1065
1066             todumpav = namesav = NULL;
1067             seenhv = NULL;
1068             val = pad = xpad = apad = sep = pair = varname
1069                 = freezer = toaster = bless = sortkeys = &PL_sv_undef;
1070             name = sv_newmortal();
1071             indent = 2;
1072             terse = purity = deepcopy = 0;
1073             quotekeys = 1;
1074         
1075             retval = newSVpvn("", 0);
1076             if (SvROK(href)
1077                 && (hv = (HV*)SvRV((SV*)href))
1078                 && SvTYPE(hv) == SVt_PVHV)              {
1079
1080                 if ((svp = hv_fetch(hv, "seen", 4, FALSE)) && SvROK(*svp))
1081                     seenhv = (HV*)SvRV(*svp);
1082                 if ((svp = hv_fetch(hv, "todump", 6, FALSE)) && SvROK(*svp))
1083                     todumpav = (AV*)SvRV(*svp);
1084                 if ((svp = hv_fetch(hv, "names", 5, FALSE)) && SvROK(*svp))
1085                     namesav = (AV*)SvRV(*svp);
1086                 if ((svp = hv_fetch(hv, "indent", 6, FALSE)))
1087                     indent = SvIV(*svp);
1088                 if ((svp = hv_fetch(hv, "purity", 6, FALSE)))
1089                     purity = SvIV(*svp);
1090                 if ((svp = hv_fetch(hv, "terse", 5, FALSE)))
1091                     terse = SvTRUE(*svp);
1092 #if 0 /* useqq currently unused */
1093                 if ((svp = hv_fetch(hv, "useqq", 5, FALSE)))
1094                     useqq = SvTRUE(*svp);
1095 #endif
1096                 if ((svp = hv_fetch(hv, "pad", 3, FALSE)))
1097                     pad = *svp;
1098                 if ((svp = hv_fetch(hv, "xpad", 4, FALSE)))
1099                     xpad = *svp;
1100                 if ((svp = hv_fetch(hv, "apad", 4, FALSE)))
1101                     apad = *svp;
1102                 if ((svp = hv_fetch(hv, "sep", 3, FALSE)))
1103                     sep = *svp;
1104                 if ((svp = hv_fetch(hv, "pair", 4, FALSE)))
1105                     pair = *svp;
1106                 if ((svp = hv_fetch(hv, "varname", 7, FALSE)))
1107                     varname = *svp;
1108                 if ((svp = hv_fetch(hv, "freezer", 7, FALSE)))
1109                     freezer = *svp;
1110                 if ((svp = hv_fetch(hv, "toaster", 7, FALSE)))
1111                     toaster = *svp;
1112                 if ((svp = hv_fetch(hv, "deepcopy", 8, FALSE)))
1113                     deepcopy = SvTRUE(*svp);
1114                 if ((svp = hv_fetch(hv, "quotekeys", 9, FALSE)))
1115                     quotekeys = SvTRUE(*svp);
1116                 if ((svp = hv_fetch(hv, "bless", 5, FALSE)))
1117                     bless = *svp;
1118                 if ((svp = hv_fetch(hv, "maxdepth", 8, FALSE)))
1119                     maxdepth = SvIV(*svp);
1120                 if ((svp = hv_fetch(hv, "sortkeys", 8, FALSE))) {
1121                     sortkeys = *svp;
1122                     if (! SvTRUE(sortkeys))
1123                         sortkeys = NULL;
1124                     else if (! (SvROK(sortkeys) &&
1125                                 SvTYPE(SvRV(sortkeys)) == SVt_PVCV) )
1126                     {
1127                         /* flag to use qsortsv() for sorting hash keys */       
1128                         sortkeys = &PL_sv_yes; 
1129                     }
1130                 }
1131                 postav = newAV();
1132
1133                 if (todumpav)
1134                     imax = av_len(todumpav);
1135                 else
1136                     imax = -1;
1137                 valstr = newSVpvn("",0);
1138                 for (i = 0; i <= imax; ++i) {
1139                     SV *newapad;
1140                 
1141                     av_clear(postav);
1142                     if ((svp = av_fetch(todumpav, i, FALSE)))
1143                         val = *svp;
1144                     else
1145                         val = &PL_sv_undef;
1146                     if ((svp = av_fetch(namesav, i, TRUE))) {
1147                         sv_setsv(name, *svp);
1148                         if (SvOK(*svp) && !SvPOK(*svp))
1149                             (void)SvPV_nolen_const(name);
1150                     }
1151                     else
1152                         (void)SvOK_off(name);
1153                 
1154                     if (SvPOK(name)) {
1155                         if ((SvPVX_const(name))[0] == '*') {
1156                             if (SvROK(val)) {
1157                                 switch (SvTYPE(SvRV(val))) {
1158                                 case SVt_PVAV:
1159                                     (SvPVX(name))[0] = '@';
1160                                     break;
1161                                 case SVt_PVHV:
1162                                     (SvPVX(name))[0] = '%';
1163                                     break;
1164                                 case SVt_PVCV:
1165                                     (SvPVX(name))[0] = '*';
1166                                     break;
1167                                 default:
1168                                     (SvPVX(name))[0] = '$';
1169                                     break;
1170                                 }
1171                             }
1172                             else
1173                                 (SvPVX(name))[0] = '$';
1174                         }
1175                         else if ((SvPVX_const(name))[0] != '$')
1176                             sv_insert(name, 0, 0, "$", 1);
1177                     }
1178                     else {
1179                         STRLEN nchars;
1180                         sv_setpvn(name, "$", 1);
1181                         sv_catsv(name, varname);
1182                         nchars = my_snprintf(tmpbuf, sizeof(tmpbuf), "%"IVdf, (IV)(i+1));
1183                         sv_catpvn(name, tmpbuf, nchars);
1184                     }
1185                 
1186                     if (indent >= 2 && !terse) {
1187                         SV * const tmpsv = sv_x(aTHX_ NULL, " ", 1, SvCUR(name)+3);
1188                         newapad = newSVsv(apad);
1189                         sv_catsv(newapad, tmpsv);
1190                         SvREFCNT_dec(tmpsv);
1191                     }
1192                     else
1193                         newapad = apad;
1194                 
1195                     PUTBACK;
1196                     DD_dump(aTHX_ val, SvPVX_const(name), SvCUR(name), valstr, seenhv,
1197                             postav, &level, indent, pad, xpad, newapad, sep, pair,
1198                             freezer, toaster, purity, deepcopy, quotekeys,
1199                             bless, maxdepth, sortkeys);
1200                     SPAGAIN;
1201                 
1202                     if (indent >= 2 && !terse)
1203                         SvREFCNT_dec(newapad);
1204
1205                     postlen = av_len(postav);
1206                     if (postlen >= 0 || !terse) {
1207                         sv_insert(valstr, 0, 0, " = ", 3);
1208                         sv_insert(valstr, 0, 0, SvPVX_const(name), SvCUR(name));
1209                         sv_catpvn(valstr, ";", 1);
1210                     }
1211                     sv_catsv(retval, pad);
1212                     sv_catsv(retval, valstr);
1213                     sv_catsv(retval, sep);
1214                     if (postlen >= 0) {
1215                         I32 i;
1216                         sv_catsv(retval, pad);
1217                         for (i = 0; i <= postlen; ++i) {
1218                             SV *elem;
1219                             svp = av_fetch(postav, i, FALSE);
1220                             if (svp && (elem = *svp)) {
1221                                 sv_catsv(retval, elem);
1222                                 if (i < postlen) {
1223                                     sv_catpvn(retval, ";", 1);
1224                                     sv_catsv(retval, sep);
1225                                     sv_catsv(retval, pad);
1226                                 }
1227                             }
1228                         }
1229                         sv_catpvn(retval, ";", 1);
1230                             sv_catsv(retval, sep);
1231                     }
1232                     sv_setpvn(valstr, "", 0);
1233                     if (gimme == G_ARRAY) {
1234                         XPUSHs(sv_2mortal(retval));
1235                         if (i < imax)   /* not the last time thro ? */
1236                             retval = newSVpvn("",0);
1237                     }
1238                 }
1239                 SvREFCNT_dec(postav);
1240                 SvREFCNT_dec(valstr);
1241             }
1242             else
1243                 croak("Call to new() method failed to return HASH ref");
1244             if (gimme == G_SCALAR)
1245                 XPUSHs(sv_2mortal(retval));
1246         }