e3867a183830adf28fdfa1eecf658c74208551a0
[perl.git] / ext / Data-Dumper / Dumper.xs
1 #define PERL_NO_GET_CONTEXT
2 #include "EXTERN.h"
3 #include "perl.h"
4 #include "XSUB.h"
5 #ifdef USE_PPPORT_H
6 #  define NEED_my_snprintf
7 #  define NEED_sv_2pv_flags
8 #  include "ppport.h"
9 #endif
10
11 #if PERL_VERSION < 8
12 #  define DD_USE_OLD_ID_FORMAT
13 #endif
14
15 static I32 num_q (const char *s, STRLEN slen);
16 static I32 esc_q (char *dest, const char *src, STRLEN slen);
17 static I32 esc_q_utf8 (pTHX_ SV *sv, const char *src, STRLEN slen);
18 static I32 needs_quote(register const char *s);
19 static SV *sv_x (pTHX_ SV *sv, const char *str, STRLEN len, I32 n);
20 static I32 DD_dump (pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval,
21                     HV *seenhv, AV *postav, I32 *levelp, I32 indent,
22                     SV *pad, SV *xpad, SV *apad, SV *sep, SV *pair,
23                     SV *freezer, SV *toaster,
24                     I32 purity, I32 deepcopy, I32 quotekeys, SV *bless,
25                     I32 maxdepth, SV *sortkeys);
26
27 #ifndef HvNAME_get
28 #define HvNAME_get HvNAME
29 #endif
30
31 #if PERL_VERSION <= 6 /* Perl 5.6 and earlier */
32
33 # ifdef EBCDIC
34 #  define UNI_TO_NATIVE(ch) (((ch) > 255) ? (ch) : ASCII_TO_NATIVE(ch))
35 # else
36 #  define UNI_TO_NATIVE(ch) (ch)
37 # endif
38
39 UV
40 Perl_utf8_to_uvchr(pTHX_ U8 *s, STRLEN *retlen)
41 {
42     const UV uv = utf8_to_uv(s, UTF8_MAXLEN, retlen,
43                     ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
44     return UNI_TO_NATIVE(uv);
45 }
46
47 # if !defined(PERL_IMPLICIT_CONTEXT)
48 #  define utf8_to_uvchr      Perl_utf8_to_uvchr
49 # else
50 #  define utf8_to_uvchr(a,b) Perl_utf8_to_uvchr(aTHX_ a,b)
51 # endif
52
53 #endif /* PERL_VERSION <= 6 */
54
55 /* Changes in 5.7 series mean that now IOK is only set if scalar is
56    precisely integer but in 5.6 and earlier we need to do a more
57    complex test  */
58 #if PERL_VERSION <= 6
59 #define DD_is_integer(sv) (SvIOK(sv) && (SvIsUV(val) ? SvUV(sv) == SvNV(sv) : SvIV(sv) == SvNV(sv)))
60 #else
61 #define DD_is_integer(sv) SvIOK(sv)
62 #endif
63
64 /* does a string need to be protected? */
65 static I32
66 needs_quote(register const char *s)
67 {
68 TOP:
69     if (s[0] == ':') {
70         if (*++s) {
71             if (*s++ != ':')
72                 return 1;
73         }
74         else
75             return 1;
76     }
77     if (isIDFIRST(*s)) {
78         while (*++s)
79             if (!isALNUM(*s)) {
80                 if (*s == ':')
81                     goto TOP;
82                 else
83                     return 1;
84             }
85     }
86     else
87         return 1;
88     return 0;
89 }
90
91 /* count the number of "'"s and "\"s in string */
92 static I32
93 num_q(register const char *s, register STRLEN slen)
94 {
95     register I32 ret = 0;
96
97     while (slen > 0) {
98         if (*s == '\'' || *s == '\\')
99             ++ret;
100         ++s;
101         --slen;
102     }
103     return ret;
104 }
105
106
107 /* returns number of chars added to escape "'"s and "\"s in s */
108 /* slen number of characters in s will be escaped */
109 /* destination must be long enough for additional chars */
110 static I32
111 esc_q(register char *d, register const char *s, register STRLEN slen)
112 {
113     register I32 ret = 0;
114
115     while (slen > 0) {
116         switch (*s) {
117         case '\'':
118         case '\\':
119             *d = '\\';
120             ++d; ++ret;
121         default:
122             *d = *s;
123             ++d; ++s; --slen;
124             break;
125         }
126     }
127     return ret;
128 }
129
130 static I32
131 esc_q_utf8(pTHX_ SV* sv, register const char *src, register STRLEN slen)
132 {
133     char *r, *rstart;
134     const char *s = src;
135     const char * const send = src + slen;
136     STRLEN j, cur = SvCUR(sv);
137     /* Could count 128-255 and 256+ in two variables, if we want to
138        be like &qquote and make a distinction.  */
139     STRLEN grow = 0;    /* bytes needed to represent chars 128+ */
140     /* STRLEN topbit_grow = 0;  bytes needed to represent chars 128-255 */
141     STRLEN backslashes = 0;
142     STRLEN single_quotes = 0;
143     STRLEN qq_escapables = 0;   /* " $ @ will need a \ in "" strings.  */
144     STRLEN normal = 0;
145
146     /* this will need EBCDICification */
147     for (s = src; s < send; s += UTF8SKIP(s)) {
148         const UV k = utf8_to_uvchr((U8*)s, NULL);
149
150 #ifdef EBCDIC
151         if (!isprint(k) || k > 256) {
152 #else
153         if (k > 127) {
154 #endif
155             /* 4: \x{} then count the number of hex digits.  */
156             grow += 4 + (k <= 0xFF ? 2 : k <= 0xFFF ? 3 : k <= 0xFFFF ? 4 :
157 #if UVSIZE == 4
158                 8 /* We may allocate a bit more than the minimum here.  */
159 #else
160                 k <= 0xFFFFFFFF ? 8 : UVSIZE * 4
161 #endif
162                 );
163         } else if (k == '\\') {
164             backslashes++;
165         } else if (k == '\'') {
166             single_quotes++;
167         } else if (k == '"' || k == '$' || k == '@') {
168             qq_escapables++;
169         } else {
170             normal++;
171         }
172     }
173     if (grow) {
174         /* We have something needing hex. 3 is ""\0 */
175         sv_grow(sv, cur + 3 + grow + 2*backslashes + single_quotes
176                 + 2*qq_escapables + normal);
177         rstart = r = SvPVX(sv) + cur;
178
179         *r++ = '"';
180
181         for (s = src; s < send; s += UTF8SKIP(s)) {
182             const UV k = utf8_to_uvchr((U8*)s, NULL);
183
184             if (k == '"' || k == '\\' || k == '$' || k == '@') {
185                 *r++ = '\\';
186                 *r++ = (char)k;
187             }
188             else
189 #ifdef EBCDIC
190               if (isprint(k) && k < 256)
191 #else
192               if (k < 0x80)
193 #endif
194                 *r++ = (char)k;
195             else {
196 #if PERL_VERSION < 10
197                 sprintf(r, "\\x{%"UVxf"}", k);
198                 r += strlen(r);
199                 /* my_sprintf is not supported by ppport.h */
200 #else
201                 r = r + my_sprintf(r, "\\x{%"UVxf"}", k);
202 #endif
203             }
204         }
205         *r++ = '"';
206     } else {
207         /* Single quotes.  */
208         sv_grow(sv, cur + 3 + 2*backslashes + 2*single_quotes
209                 + qq_escapables + normal);
210         rstart = r = SvPVX(sv) + cur;
211         *r++ = '\'';
212         for (s = src; s < send; s ++) {
213             const char k = *s;
214             if (k == '\'' || k == '\\')
215                 *r++ = '\\';
216             *r++ = k;
217         }
218         *r++ = '\'';
219     }
220     *r = '\0';
221     j = r - rstart;
222     SvCUR_set(sv, cur + j);
223
224     return j;
225 }
226
227 /* append a repeated string to an SV */
228 static SV *
229 sv_x(pTHX_ SV *sv, const char *str, STRLEN len, I32 n)
230 {
231     if (!sv)
232         sv = newSVpvn("", 0);
233 #ifdef DEBUGGING
234     else
235         assert(SvTYPE(sv) >= SVt_PV);
236 #endif
237
238     if (n > 0) {
239         SvGROW(sv, len*n + SvCUR(sv) + 1);
240         if (len == 1) {
241             char * const start = SvPVX(sv) + SvCUR(sv);
242             SvCUR_set(sv, SvCUR(sv) + n);
243             start[n] = '\0';
244             while (n > 0)
245                 start[--n] = str[0];
246         }
247         else
248             while (n > 0) {
249                 sv_catpvn(sv, str, len);
250                 --n;
251             }
252     }
253     return sv;
254 }
255
256 /*
257  * This ought to be split into smaller functions. (it is one long function since
258  * it exactly parallels the perl version, which was one long thing for
259  * efficiency raisins.)  Ugggh!
260  */
261 static I32
262 DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
263         AV *postav, I32 *levelp, I32 indent, SV *pad, SV *xpad,
264         SV *apad, SV *sep, SV *pair, SV *freezer, SV *toaster, I32 purity,
265         I32 deepcopy, I32 quotekeys, SV *bless, I32 maxdepth, SV *sortkeys)
266 {
267     char tmpbuf[128];
268     U32 i;
269     char *c, *r, *realpack;
270 #ifdef DD_USE_OLD_ID_FORMAT
271     char id[128];
272 #else
273     UV id_buffer;
274     char *const id = (char *)&id_buffer;
275 #endif
276     SV **svp;
277     SV *sv, *ipad, *ival;
278     SV *blesspad = Nullsv;
279     AV *seenentry = NULL;
280     char *iname;
281     STRLEN inamelen, idlen = 0;
282     U32 realtype;
283     bool no_bless = 0; /* when a qr// is blessed into Regexp we dont want to bless it.
284                           in later perls we should actually check the classname of the 
285                           engine. this gets tricky as it involves lexical issues that arent so
286                           easy to resolve */
287     bool is_regex = 0; /* we are dumping a regex, we need to know this before we bless */
288
289     if (!val)
290         return 0;
291
292     /* If the ouput buffer has less than some arbitary amount of space
293        remaining, then enlarge it. For the test case (25M of output),
294        *1.1 was slower, *2.0 was the same, so the first guess of 1.5 is
295         deemed to be good enough.  */
296     if (SvTYPE(retval) >= SVt_PV && (SvLEN(retval) - SvCUR(retval)) < 42) {
297         sv_grow(retval, SvCUR(retval) * 3 / 2);
298     }
299
300     realtype = SvTYPE(val);
301
302     if (SvGMAGICAL(val))
303         mg_get(val);
304     if (SvROK(val)) {
305
306         /* If a freeze method is provided and the object has it, call
307            it.  Warn on errors. */
308         if (SvOBJECT(SvRV(val)) && freezer &&
309             SvPOK(freezer) && SvCUR(freezer) &&
310             gv_fetchmeth(SvSTASH(SvRV(val)), SvPVX_const(freezer), 
311                          SvCUR(freezer), -1) != NULL)
312         {
313             dSP; ENTER; SAVETMPS; PUSHMARK(sp);
314             XPUSHs(val); PUTBACK;
315             i = perl_call_method(SvPVX_const(freezer), G_EVAL|G_VOID);
316             SPAGAIN;
317             if (SvTRUE(ERRSV))
318                 warn("WARNING(Freezer method call failed): %"SVf"", ERRSV);
319             PUTBACK; FREETMPS; LEAVE;
320         }
321         
322         ival = SvRV(val);
323         realtype = SvTYPE(ival);
324 #ifdef DD_USE_OLD_ID_FORMAT
325         idlen = my_snprintf(id, sizeof(id), "0x%"UVxf, PTR2UV(ival));
326 #else
327         id_buffer = PTR2UV(ival);
328         idlen = sizeof(id_buffer);
329 #endif
330         if (SvOBJECT(ival))
331             realpack = HvNAME_get(SvSTASH(ival));
332         else
333             realpack = NULL;
334
335         /* if it has a name, we need to either look it up, or keep a tab
336          * on it so we know when we hit it later
337          */
338         if (namelen) {
339             if ((svp = hv_fetch(seenhv, id, idlen, FALSE))
340                 && (sv = *svp) && SvROK(sv) && (seenentry = (AV*)SvRV(sv)))
341             {
342                 SV *othername;
343                 if ((svp = av_fetch(seenentry, 0, FALSE))
344                     && (othername = *svp))
345                 {
346                     if (purity && *levelp > 0) {
347                         SV *postentry;
348                         
349                         if (realtype == SVt_PVHV)
350                             sv_catpvn(retval, "{}", 2);
351                         else if (realtype == SVt_PVAV)
352                             sv_catpvn(retval, "[]", 2);
353                         else
354                             sv_catpvn(retval, "do{my $o}", 9);
355                         postentry = newSVpvn(name, namelen);
356                         sv_catpvn(postentry, " = ", 3);
357                         sv_catsv(postentry, othername);
358                         av_push(postav, postentry);
359                     }
360                     else {
361                         if (name[0] == '@' || name[0] == '%') {
362                             if ((SvPVX_const(othername))[0] == '\\' &&
363                                 (SvPVX_const(othername))[1] == name[0]) {
364                                 sv_catpvn(retval, SvPVX_const(othername)+1,
365                                           SvCUR(othername)-1);
366                             }
367                             else {
368                                 sv_catpvn(retval, name, 1);
369                                 sv_catpvn(retval, "{", 1);
370                                 sv_catsv(retval, othername);
371                                 sv_catpvn(retval, "}", 1);
372                             }
373                         }
374                         else
375                             sv_catsv(retval, othername);
376                     }
377                     return 1;
378                 }
379                 else {
380 #ifdef DD_USE_OLD_ID_FORMAT
381                     warn("ref name not found for %s", id);
382 #else
383                     warn("ref name not found for 0x%"UVxf, PTR2UV(ival));
384 #endif
385                     return 0;
386                 }
387             }
388             else {   /* store our name and continue */
389                 SV *namesv;
390                 if (name[0] == '@' || name[0] == '%') {
391                     namesv = newSVpvn("\\", 1);
392                     sv_catpvn(namesv, name, namelen);
393                 }
394                 else if (realtype == SVt_PVCV && name[0] == '*') {
395                     namesv = newSVpvn("\\", 2);
396                     sv_catpvn(namesv, name, namelen);
397                     (SvPVX(namesv))[1] = '&';
398                 }
399                 else
400                     namesv = newSVpvn(name, namelen);
401                 seenentry = newAV();
402                 av_push(seenentry, namesv);
403                 (void)SvREFCNT_inc(val);
404                 av_push(seenentry, val);
405                 (void)hv_store(seenhv, id, idlen,
406                                newRV_inc((SV*)seenentry), 0);
407                 SvREFCNT_dec(seenentry);
408             }
409         }
410         /* regexps dont have to be blessed into package "Regexp"
411          * they can be blessed into any package. 
412          */
413 #if PERL_VERSION < 8
414         if (realpack && *realpack == 'R' && strEQ(realpack, "Regexp")) 
415 #elif PERL_VERSION < 11
416         if (realpack && realtype == SVt_PVMG && mg_find(ival, PERL_MAGIC_qr))
417 #else        
418         if (realpack && realtype == SVt_REGEXP) 
419 #endif
420         {
421             is_regex = 1;
422             if (strEQ(realpack, "Regexp")) 
423                 no_bless = 1;
424             else
425                 no_bless = 0;
426         }
427
428         /* If purity is not set and maxdepth is set, then check depth:
429          * if we have reached maximum depth, return the string
430          * representation of the thing we are currently examining
431          * at this depth (i.e., 'Foo=ARRAY(0xdeadbeef)').
432          */
433         if (!purity && maxdepth > 0 && *levelp >= maxdepth) {
434             STRLEN vallen;
435             const char * const valstr = SvPV(val,vallen);
436             sv_catpvn(retval, "'", 1);
437             sv_catpvn(retval, valstr, vallen);
438             sv_catpvn(retval, "'", 1);
439             return 1;
440         }
441
442         if (realpack && !no_bless) {                            /* we have a blessed ref */
443             STRLEN blesslen;
444             const char * const blessstr = SvPV(bless, blesslen);
445             sv_catpvn(retval, blessstr, blesslen);
446             sv_catpvn(retval, "( ", 2);
447             if (indent >= 2) {
448                 blesspad = apad;
449                 apad = newSVsv(apad);
450                 sv_x(aTHX_ apad, " ", 1, blesslen+2);
451             }
452         }
453
454         (*levelp)++;
455         ipad = sv_x(aTHX_ Nullsv, SvPVX_const(xpad), SvCUR(xpad), *levelp);
456
457         if (is_regex) 
458         {
459             STRLEN rlen;
460             const char *rval = SvPV(val, rlen);
461             const char *slash = strchr(rval, '/');
462             sv_catpvn(retval, "qr/", 3);
463             while (slash) {
464                 sv_catpvn(retval, rval, slash-rval);
465                 sv_catpvn(retval, "\\/", 2);
466                 rlen -= slash-rval+1;
467                 rval = slash+1;
468                 slash = strchr(rval, '/');
469             }
470             sv_catpvn(retval, rval, rlen);
471             sv_catpvn(retval, "/", 1);
472         } 
473         else if (
474 #if PERL_VERSION < 9
475                 realtype <= SVt_PVBM
476 #else
477                 realtype <= SVt_PVMG
478 #endif
479         ) {                          /* scalar ref */
480             SV * const namesv = newSVpvn("${", 2);
481             sv_catpvn(namesv, name, namelen);
482             sv_catpvn(namesv, "}", 1);
483             if (realpack) {                                  /* blessed */
484                 sv_catpvn(retval, "do{\\(my $o = ", 13);
485                 DD_dump(aTHX_ ival, SvPVX_const(namesv), SvCUR(namesv), retval, seenhv,
486                         postav, levelp, indent, pad, xpad, apad, sep, pair,
487                         freezer, toaster, purity, deepcopy, quotekeys, bless,
488                         maxdepth, sortkeys);
489                 sv_catpvn(retval, ")}", 2);
490             }                                                /* plain */
491             else {
492                 sv_catpvn(retval, "\\", 1);
493                 DD_dump(aTHX_ ival, SvPVX_const(namesv), SvCUR(namesv), retval, seenhv,
494                         postav, levelp, indent, pad, xpad, apad, sep, pair,
495                         freezer, toaster, purity, deepcopy, quotekeys, bless,
496                         maxdepth, sortkeys);
497             }
498             SvREFCNT_dec(namesv);
499         }
500         else if (realtype == SVt_PVGV) {                     /* glob ref */
501             SV * const namesv = newSVpvn("*{", 2);
502             sv_catpvn(namesv, name, namelen);
503             sv_catpvn(namesv, "}", 1);
504             sv_catpvn(retval, "\\", 1);
505             DD_dump(aTHX_ ival, SvPVX_const(namesv), SvCUR(namesv), retval, seenhv,
506                     postav, levelp,     indent, pad, xpad, apad, sep, pair,
507                     freezer, toaster, purity, deepcopy, quotekeys, bless,
508                     maxdepth, sortkeys);
509             SvREFCNT_dec(namesv);
510         }
511         else if (realtype == SVt_PVAV) {
512             SV *totpad;
513             I32 ix = 0;
514             const I32 ixmax = av_len((AV *)ival);
515         
516             SV * const ixsv = newSViv(0);
517             /* allowing for a 24 char wide array index */
518             New(0, iname, namelen+28, char);
519             (void)strcpy(iname, name);
520             inamelen = namelen;
521             if (name[0] == '@') {
522                 sv_catpvn(retval, "(", 1);
523                 iname[0] = '$';
524             }
525             else {
526                 sv_catpvn(retval, "[", 1);
527                 /* omit "->" in $foo{bar}->[0], but not in ${$foo}->[0] */
528                 /*if (namelen > 0
529                     && name[namelen-1] != ']' && name[namelen-1] != '}'
530                     && (namelen < 4 || (name[1] != '{' && name[2] != '{')))*/
531                 if ((namelen > 0
532                      && name[namelen-1] != ']' && name[namelen-1] != '}')
533                     || (namelen > 4
534                         && (name[1] == '{'
535                             || (name[0] == '\\' && name[2] == '{'))))
536                 {
537                     iname[inamelen++] = '-'; iname[inamelen++] = '>';
538                     iname[inamelen] = '\0';
539                 }
540             }
541             if (iname[0] == '*' && iname[inamelen-1] == '}' && inamelen >= 8 &&
542                 (instr(iname+inamelen-8, "{SCALAR}") ||
543                  instr(iname+inamelen-7, "{ARRAY}") ||
544                  instr(iname+inamelen-6, "{HASH}"))) {
545                 iname[inamelen++] = '-'; iname[inamelen++] = '>';
546             }
547             iname[inamelen++] = '['; iname[inamelen] = '\0';
548             totpad = newSVsv(sep);
549             sv_catsv(totpad, pad);
550             sv_catsv(totpad, apad);
551
552             for (ix = 0; ix <= ixmax; ++ix) {
553                 STRLEN ilen;
554                 SV *elem;
555                 svp = av_fetch((AV*)ival, ix, FALSE);
556                 if (svp)
557                     elem = *svp;
558                 else
559                     elem = &PL_sv_undef;
560                 
561                 ilen = inamelen;
562                 sv_setiv(ixsv, ix);
563 #if PERL_VERSION < 10
564                 (void) sprintf(iname+ilen, "%"IVdf, (IV)ix);
565                 ilen = strlen(iname);
566 #else
567                 ilen = ilen + my_sprintf(iname+ilen, "%"IVdf, (IV)ix);
568 #endif
569                 iname[ilen++] = ']'; iname[ilen] = '\0';
570                 if (indent >= 3) {
571                     sv_catsv(retval, totpad);
572                     sv_catsv(retval, ipad);
573                     sv_catpvn(retval, "#", 1);
574                     sv_catsv(retval, ixsv);
575                 }
576                 sv_catsv(retval, totpad);
577                 sv_catsv(retval, ipad);
578                 DD_dump(aTHX_ elem, iname, ilen, retval, seenhv, postav,
579                         levelp, indent, pad, xpad, apad, sep, pair,
580                         freezer, toaster, purity, deepcopy, quotekeys, bless,
581                         maxdepth, sortkeys);
582                 if (ix < ixmax)
583                     sv_catpvn(retval, ",", 1);
584             }
585             if (ixmax >= 0) {
586                 SV * const opad = sv_x(aTHX_ Nullsv, SvPVX_const(xpad), SvCUR(xpad), (*levelp)-1);
587                 sv_catsv(retval, totpad);
588                 sv_catsv(retval, opad);
589                 SvREFCNT_dec(opad);
590             }
591             if (name[0] == '@')
592                 sv_catpvn(retval, ")", 1);
593             else
594                 sv_catpvn(retval, "]", 1);
595             SvREFCNT_dec(ixsv);
596             SvREFCNT_dec(totpad);
597             Safefree(iname);
598         }
599         else if (realtype == SVt_PVHV) {
600             SV *totpad, *newapad;
601             SV *sname;
602             HE *entry;
603             char *key;
604             I32 klen;
605             SV *hval;
606             AV *keys = NULL;
607         
608             SV * const iname = newSVpvn(name, namelen);
609             if (name[0] == '%') {
610                 sv_catpvn(retval, "(", 1);
611                 (SvPVX(iname))[0] = '$';
612             }
613             else {
614                 sv_catpvn(retval, "{", 1);
615                 /* omit "->" in $foo[0]->{bar}, but not in ${$foo}->{bar} */
616                 if ((namelen > 0
617                      && name[namelen-1] != ']' && name[namelen-1] != '}')
618                     || (namelen > 4
619                         && (name[1] == '{'
620                             || (name[0] == '\\' && name[2] == '{'))))
621                 {
622                     sv_catpvn(iname, "->", 2);
623                 }
624             }
625             if (name[0] == '*' && name[namelen-1] == '}' && namelen >= 8 &&
626                 (instr(name+namelen-8, "{SCALAR}") ||
627                  instr(name+namelen-7, "{ARRAY}") ||
628                  instr(name+namelen-6, "{HASH}"))) {
629                 sv_catpvn(iname, "->", 2);
630             }
631             sv_catpvn(iname, "{", 1);
632             totpad = newSVsv(sep);
633             sv_catsv(totpad, pad);
634             sv_catsv(totpad, apad);
635         
636             /* If requested, get a sorted/filtered array of hash keys */
637             if (sortkeys) {
638                 if (sortkeys == &PL_sv_yes) {
639 #if PERL_VERSION < 8
640                     sortkeys = sv_2mortal(newSVpvn("Data::Dumper::_sortkeys", 23));
641 #else
642                     keys = newAV();
643                     (void)hv_iterinit((HV*)ival);
644                     while ((entry = hv_iternext((HV*)ival))) {
645                         sv = hv_iterkeysv(entry);
646                         SvREFCNT_inc(sv);
647                         av_push(keys, sv);
648                     }
649 # ifdef USE_LOCALE_NUMERIC
650                     sortsv(AvARRAY(keys), 
651                            av_len(keys)+1, 
652                            IN_LOCALE ? Perl_sv_cmp_locale : Perl_sv_cmp);
653 # else
654                     sortsv(AvARRAY(keys), 
655                            av_len(keys)+1, 
656                            Perl_sv_cmp);
657 # endif
658 #endif
659                 }
660                 if (sortkeys != &PL_sv_yes) {
661                     dSP; ENTER; SAVETMPS; PUSHMARK(sp);
662                     XPUSHs(sv_2mortal(newRV_inc(ival))); PUTBACK;
663                     i = perl_call_sv(sortkeys, G_SCALAR | G_EVAL);
664                     SPAGAIN;
665                     if (i) {
666                         sv = POPs;
667                         if (SvROK(sv) && (SvTYPE(SvRV(sv)) == SVt_PVAV))
668                             keys = (AV*)SvREFCNT_inc(SvRV(sv));
669                     }
670                     if (! keys)
671                         warn("Sortkeys subroutine did not return ARRAYREF\n");
672                     PUTBACK; FREETMPS; LEAVE;
673                 }
674                 if (keys)
675                     sv_2mortal((SV*)keys);
676             }
677             else
678                 (void)hv_iterinit((HV*)ival);
679
680             /* foreach (keys %hash) */
681             for (i = 0; 1; i++) {
682                 char *nkey;
683                 char *nkey_buffer = NULL;
684                 I32 nticks = 0;
685                 SV* keysv;
686                 STRLEN keylen;
687                 I32 nlen;
688                 bool do_utf8 = FALSE;
689
690                if (sortkeys) {
691                    if (!(keys && (I32)i <= av_len(keys))) break;
692                } else {
693                    if (!(entry = hv_iternext((HV *)ival))) break;
694                }
695
696                 if (i)
697                     sv_catpvn(retval, ",", 1);
698
699                 if (sortkeys) {
700                     char *key;
701                     svp = av_fetch(keys, i, FALSE);
702                     keysv = svp ? *svp : sv_mortalcopy(&PL_sv_undef);
703                     key = SvPV(keysv, keylen);
704                     svp = hv_fetch((HV*)ival, key,
705                                    SvUTF8(keysv) ? -(I32)keylen : keylen, 0);
706                     hval = svp ? *svp : sv_mortalcopy(&PL_sv_undef);
707                 }
708                 else {
709                     keysv = hv_iterkeysv(entry);
710                     hval = hv_iterval((HV*)ival, entry);
711                 }
712
713                 key = SvPV(keysv, keylen);
714                 do_utf8 = DO_UTF8(keysv);
715                 klen = keylen;
716
717                 sv_catsv(retval, totpad);
718                 sv_catsv(retval, ipad);
719                 /* old logic was first to check utf8 flag, and if utf8 always
720                    call esc_q_utf8.  This caused test to break under -Mutf8,
721                    because there even strings like 'c' have utf8 flag on.
722                    Hence with quotekeys == 0 the XS code would still '' quote
723                    them based on flags, whereas the perl code would not,
724                    based on regexps.
725                    The perl code is correct.
726                    needs_quote() decides that anything that isn't a valid
727                    perl identifier needs to be quoted, hence only correctly
728                    formed strings with no characters outside [A-Za-z0-9_:]
729                    won't need quoting.  None of those characters are used in
730                    the byte encoding of utf8, so anything with utf8
731                    encoded characters in will need quoting. Hence strings
732                    with utf8 encoded characters in will end up inside do_utf8
733                    just like before, but now strings with utf8 flag set but
734                    only ascii characters will end up in the unquoted section.
735
736                    There should also be less tests for the (probably currently)
737                    more common doesn't need quoting case.
738                    The code is also smaller (22044 vs 22260) because I've been
739                    able to pull the common logic out to both sides.  */
740                 if (quotekeys || needs_quote(key)) {
741                     if (do_utf8) {
742                         STRLEN ocur = SvCUR(retval);
743                         nlen = esc_q_utf8(aTHX_ retval, key, klen);
744                         nkey = SvPVX(retval) + ocur;
745                     }
746                     else {
747                         nticks = num_q(key, klen);
748                         New(0, nkey_buffer, klen+nticks+3, char);
749                         nkey = nkey_buffer;
750                         nkey[0] = '\'';
751                         if (nticks)
752                             klen += esc_q(nkey+1, key, klen);
753                         else
754                             (void)Copy(key, nkey+1, klen, char);
755                         nkey[++klen] = '\'';
756                         nkey[++klen] = '\0';
757                         nlen = klen;
758                         sv_catpvn(retval, nkey, klen);
759                     }
760                 }
761                 else {
762                     nkey = key;
763                     nlen = klen;
764                     sv_catpvn(retval, nkey, klen);
765                 }
766                 sname = newSVsv(iname);
767                 sv_catpvn(sname, nkey, nlen);
768                 sv_catpvn(sname, "}", 1);
769
770                 sv_catsv(retval, pair);
771                 if (indent >= 2) {
772                     char *extra;
773                     I32 elen = 0;
774                     newapad = newSVsv(apad);
775                     New(0, extra, klen+4+1, char);
776                     while (elen < (klen+4))
777                         extra[elen++] = ' ';
778                     extra[elen] = '\0';
779                     sv_catpvn(newapad, extra, elen);
780                     Safefree(extra);
781                 }
782                 else
783                     newapad = apad;
784
785                 DD_dump(aTHX_ hval, SvPVX_const(sname), SvCUR(sname), retval, seenhv,
786                         postav, levelp, indent, pad, xpad, newapad, sep, pair,
787                         freezer, toaster, purity, deepcopy, quotekeys, bless,
788                         maxdepth, sortkeys);
789                 SvREFCNT_dec(sname);
790                 Safefree(nkey_buffer);
791                 if (indent >= 2)
792                     SvREFCNT_dec(newapad);
793             }
794             if (i) {
795                 SV *opad = sv_x(aTHX_ Nullsv, SvPVX_const(xpad), SvCUR(xpad), *levelp-1);
796                 sv_catsv(retval, totpad);
797                 sv_catsv(retval, opad);
798                 SvREFCNT_dec(opad);
799             }
800             if (name[0] == '%')
801                 sv_catpvn(retval, ")", 1);
802             else
803                 sv_catpvn(retval, "}", 1);
804             SvREFCNT_dec(iname);
805             SvREFCNT_dec(totpad);
806         }
807         else if (realtype == SVt_PVCV) {
808             sv_catpvn(retval, "sub { \"DUMMY\" }", 15);
809             if (purity)
810                 warn("Encountered CODE ref, using dummy placeholder");
811         }
812         else {
813             warn("cannot handle ref type %ld", realtype);
814         }
815
816         if (realpack && !no_bless) {  /* free blessed allocs */
817             I32 plen;
818             I32 pticks;
819
820             if (indent >= 2) {
821                 SvREFCNT_dec(apad);
822                 apad = blesspad;
823             }
824             sv_catpvn(retval, ", '", 3);
825
826             plen = strlen(realpack);
827             pticks = num_q(realpack, plen);
828             if (pticks) { /* needs escaping */
829                 char *npack;
830                 char *npack_buffer = NULL;
831
832                 New(0, npack_buffer, plen+pticks+1, char);
833                 npack = npack_buffer;
834                 plen += esc_q(npack, realpack, plen);
835                 npack[plen] = '\0';
836
837                 sv_catpvn(retval, npack, plen);
838                 Safefree(npack_buffer);
839             }
840             else {
841                 sv_catpvn(retval, realpack, strlen(realpack));
842             }
843             sv_catpvn(retval, "' )", 3);
844             if (toaster && SvPOK(toaster) && SvCUR(toaster)) {
845                 sv_catpvn(retval, "->", 2);
846                 sv_catsv(retval, toaster);
847                 sv_catpvn(retval, "()", 2);
848             }
849         }
850         SvREFCNT_dec(ipad);
851         (*levelp)--;
852     }
853     else {
854         STRLEN i;
855         
856         if (namelen) {
857 #ifdef DD_USE_OLD_ID_FORMAT
858             idlen = my_snprintf(id, sizeof(id), "0x%"UVxf, PTR2UV(val));
859 #else
860             id_buffer = PTR2UV(val);
861             idlen = sizeof(id_buffer);
862 #endif
863             if ((svp = hv_fetch(seenhv, id, idlen, FALSE)) &&
864                 (sv = *svp) && SvROK(sv) &&
865                 (seenentry = (AV*)SvRV(sv)))
866             {
867                 SV *othername;
868                 if ((svp = av_fetch(seenentry, 0, FALSE)) && (othername = *svp)
869                     && (svp = av_fetch(seenentry, 2, FALSE)) && *svp && SvIV(*svp) > 0)
870                 {
871                     sv_catpvn(retval, "${", 2);
872                     sv_catsv(retval, othername);
873                     sv_catpvn(retval, "}", 1);
874                     return 1;
875                 }
876             }
877             else if (val != &PL_sv_undef) {
878                 SV * const namesv = newSVpvn("\\", 1);
879                 sv_catpvn(namesv, name, namelen);
880                 seenentry = newAV();
881                 av_push(seenentry, namesv);
882                 av_push(seenentry, newRV_inc(val));
883                 (void)hv_store(seenhv, id, idlen, newRV_inc((SV*)seenentry), 0);
884                 SvREFCNT_dec(seenentry);
885             }
886         }
887
888         if (DD_is_integer(val)) {
889             STRLEN len;
890             if (SvIsUV(val))
891               len = my_snprintf(tmpbuf, sizeof(tmpbuf), "%"UVuf, SvUV(val));
892             else
893               len = my_snprintf(tmpbuf, sizeof(tmpbuf), "%"IVdf, SvIV(val));
894             if (SvPOK(val)) {
895               /* Need to check to see if this is a string such as " 0".
896                  I'm assuming from sprintf isn't going to clash with utf8.
897                  Is this valid on EBCDIC?  */
898               STRLEN pvlen;
899               const char * const pv = SvPV(val, pvlen);
900               if (pvlen != len || memNE(pv, tmpbuf, len))
901                 goto integer_came_from_string;
902             }
903             if (len > 10) {
904               /* Looks like we're on a 64 bit system.  Make it a string so that
905                  if a 32 bit system reads the number it will cope better.  */
906               sv_catpvf(retval, "'%s'", tmpbuf);
907             } else
908               sv_catpvn(retval, tmpbuf, len);
909         }
910         else if (realtype == SVt_PVGV) {/* GLOBs can end up with scribbly names */
911             c = SvPV(val, i);
912             ++c; --i;                   /* just get the name */
913             if (i >= 6 && strncmp(c, "main::", 6) == 0) {
914                 c += 4;
915                 i -= 4;
916             }
917             if (needs_quote(c)) {
918                 sv_grow(retval, SvCUR(retval)+6+2*i);
919                 r = SvPVX(retval)+SvCUR(retval);
920                 r[0] = '*'; r[1] = '{'; r[2] = '\'';
921                 i += esc_q(r+3, c, i);
922                 i += 3;
923                 r[i++] = '\''; r[i++] = '}';
924                 r[i] = '\0';
925             }
926             else {
927                 sv_grow(retval, SvCUR(retval)+i+2);
928                 r = SvPVX(retval)+SvCUR(retval);
929                 r[0] = '*'; strcpy(r+1, c);
930                 i++;
931             }
932             SvCUR_set(retval, SvCUR(retval)+i);
933
934             if (purity) {
935                 static const char* const entries[] = { "{SCALAR}", "{ARRAY}", "{HASH}" };
936                 static const STRLEN sizes[] = { 8, 7, 6 };
937                 SV *e;
938                 SV * const nname = newSVpvn("", 0);
939                 SV * const newapad = newSVpvn("", 0);
940                 GV * const gv = (GV*)val;
941                 I32 j;
942                 
943                 for (j=0; j<3; j++) {
944                     e = ((j == 0) ? GvSV(gv) : (j == 1) ? (SV*)GvAV(gv) : (SV*)GvHV(gv));
945                     if (!e)
946                         continue;
947                     if (j == 0 && !SvOK(e))
948                         continue;
949
950                     {
951                         I32 nlevel = 0;
952                         SV *postentry = newSVpvn(r,i);
953                         
954                         sv_setsv(nname, postentry);
955                         sv_catpvn(nname, entries[j], sizes[j]);
956                         sv_catpvn(postentry, " = ", 3);
957                         av_push(postav, postentry);
958                         e = newRV_inc(e);
959                         
960                         SvCUR_set(newapad, 0);
961                         if (indent >= 2)
962                             (void)sv_x(aTHX_ newapad, " ", 1, SvCUR(postentry));
963                         
964                         DD_dump(aTHX_ e, SvPVX_const(nname), SvCUR(nname), postentry,
965                                 seenhv, postav, &nlevel, indent, pad, xpad,
966                                 newapad, sep, pair, freezer, toaster, purity,
967                                 deepcopy, quotekeys, bless, maxdepth, 
968                                 sortkeys);
969                         SvREFCNT_dec(e);
970                     }
971                 }
972                 
973                 SvREFCNT_dec(newapad);
974                 SvREFCNT_dec(nname);
975             }
976         }
977         else if (val == &PL_sv_undef || !SvOK(val)) {
978             sv_catpvn(retval, "undef", 5);
979         }
980         else {
981         integer_came_from_string:
982             c = SvPV(val, i);
983             if (DO_UTF8(val))
984                 i += esc_q_utf8(aTHX_ retval, c, i);
985             else {
986                 sv_grow(retval, SvCUR(retval)+3+2*i); /* 3: ""\0 */
987                 r = SvPVX(retval) + SvCUR(retval);
988                 r[0] = '\'';
989                 i += esc_q(r+1, c, i);
990                 ++i;
991                 r[i++] = '\'';
992                 r[i] = '\0';
993                 SvCUR_set(retval, SvCUR(retval)+i);
994             }
995         }
996     }
997
998     if (idlen) {
999         if (deepcopy)
1000             (void)hv_delete(seenhv, id, idlen, G_DISCARD);
1001         else if (namelen && seenentry) {
1002             SV *mark = *av_fetch(seenentry, 2, TRUE);
1003             sv_setiv(mark,1);
1004         }
1005     }
1006     return 1;
1007 }
1008
1009
1010 MODULE = Data::Dumper           PACKAGE = Data::Dumper         PREFIX = Data_Dumper_
1011
1012 #
1013 # This is the exact equivalent of Dump.  Well, almost. The things that are
1014 # different as of now (due to Laziness):
1015 #   * doesnt do double-quotes yet.
1016 #
1017
1018 void
1019 Data_Dumper_Dumpxs(href, ...)
1020         SV      *href;
1021         PROTOTYPE: $;$$
1022         PPCODE:
1023         {
1024             HV *hv;
1025             SV *retval, *valstr;
1026             HV *seenhv = NULL;
1027             AV *postav, *todumpav, *namesav;
1028             I32 level = 0;
1029             I32 indent, terse, i, imax, postlen;
1030             SV **svp;
1031             SV *val, *name, *pad, *xpad, *apad, *sep, *pair, *varname;
1032             SV *freezer, *toaster, *bless, *sortkeys;
1033             I32 purity, deepcopy, quotekeys, maxdepth = 0;
1034             char tmpbuf[1024];
1035             I32 gimme = GIMME;
1036
1037             if (!SvROK(href)) {         /* call new to get an object first */
1038                 if (items < 2)
1039                     croak("Usage: Data::Dumper::Dumpxs(PACKAGE, VAL_ARY_REF, [NAME_ARY_REF])");
1040                 
1041                 ENTER;
1042                 SAVETMPS;
1043                 
1044                 PUSHMARK(sp);
1045                 XPUSHs(href);
1046                 XPUSHs(sv_2mortal(newSVsv(ST(1))));
1047                 if (items >= 3)
1048                     XPUSHs(sv_2mortal(newSVsv(ST(2))));
1049                 PUTBACK;
1050                 i = perl_call_method("new", G_SCALAR);
1051                 SPAGAIN;
1052                 if (i)
1053                     href = newSVsv(POPs);
1054
1055                 PUTBACK;
1056                 FREETMPS;
1057                 LEAVE;
1058                 if (i)
1059                     (void)sv_2mortal(href);
1060             }
1061
1062             todumpav = namesav = NULL;
1063             seenhv = NULL;
1064             val = pad = xpad = apad = sep = pair = varname
1065                 = freezer = toaster = bless = sortkeys = &PL_sv_undef;
1066             name = sv_newmortal();
1067             indent = 2;
1068             terse = purity = deepcopy = 0;
1069             quotekeys = 1;
1070         
1071             retval = newSVpvn("", 0);
1072             if (SvROK(href)
1073                 && (hv = (HV*)SvRV((SV*)href))
1074                 && SvTYPE(hv) == SVt_PVHV)              {
1075
1076                 if ((svp = hv_fetch(hv, "seen", 4, FALSE)) && SvROK(*svp))
1077                     seenhv = (HV*)SvRV(*svp);
1078                 if ((svp = hv_fetch(hv, "todump", 6, FALSE)) && SvROK(*svp))
1079                     todumpav = (AV*)SvRV(*svp);
1080                 if ((svp = hv_fetch(hv, "names", 5, FALSE)) && SvROK(*svp))
1081                     namesav = (AV*)SvRV(*svp);
1082                 if ((svp = hv_fetch(hv, "indent", 6, FALSE)))
1083                     indent = SvIV(*svp);
1084                 if ((svp = hv_fetch(hv, "purity", 6, FALSE)))
1085                     purity = SvIV(*svp);
1086                 if ((svp = hv_fetch(hv, "terse", 5, FALSE)))
1087                     terse = SvTRUE(*svp);
1088 #if 0 /* useqq currently unused */
1089                 if ((svp = hv_fetch(hv, "useqq", 5, FALSE)))
1090                     useqq = SvTRUE(*svp);
1091 #endif
1092                 if ((svp = hv_fetch(hv, "pad", 3, FALSE)))
1093                     pad = *svp;
1094                 if ((svp = hv_fetch(hv, "xpad", 4, FALSE)))
1095                     xpad = *svp;
1096                 if ((svp = hv_fetch(hv, "apad", 4, FALSE)))
1097                     apad = *svp;
1098                 if ((svp = hv_fetch(hv, "sep", 3, FALSE)))
1099                     sep = *svp;
1100                 if ((svp = hv_fetch(hv, "pair", 4, FALSE)))
1101                     pair = *svp;
1102                 if ((svp = hv_fetch(hv, "varname", 7, FALSE)))
1103                     varname = *svp;
1104                 if ((svp = hv_fetch(hv, "freezer", 7, FALSE)))
1105                     freezer = *svp;
1106                 if ((svp = hv_fetch(hv, "toaster", 7, FALSE)))
1107                     toaster = *svp;
1108                 if ((svp = hv_fetch(hv, "deepcopy", 8, FALSE)))
1109                     deepcopy = SvTRUE(*svp);
1110                 if ((svp = hv_fetch(hv, "quotekeys", 9, FALSE)))
1111                     quotekeys = SvTRUE(*svp);
1112                 if ((svp = hv_fetch(hv, "bless", 5, FALSE)))
1113                     bless = *svp;
1114                 if ((svp = hv_fetch(hv, "maxdepth", 8, FALSE)))
1115                     maxdepth = SvIV(*svp);
1116                 if ((svp = hv_fetch(hv, "sortkeys", 8, FALSE))) {
1117                     sortkeys = *svp;
1118                     if (! SvTRUE(sortkeys))
1119                         sortkeys = NULL;
1120                     else if (! (SvROK(sortkeys) &&
1121                                 SvTYPE(SvRV(sortkeys)) == SVt_PVCV) )
1122                     {
1123                         /* flag to use qsortsv() for sorting hash keys */       
1124                         sortkeys = &PL_sv_yes; 
1125                     }
1126                 }
1127                 postav = newAV();
1128
1129                 if (todumpav)
1130                     imax = av_len(todumpav);
1131                 else
1132                     imax = -1;
1133                 valstr = newSVpvn("",0);
1134                 for (i = 0; i <= imax; ++i) {
1135                     SV *newapad;
1136                 
1137                     av_clear(postav);
1138                     if ((svp = av_fetch(todumpav, i, FALSE)))
1139                         val = *svp;
1140                     else
1141                         val = &PL_sv_undef;
1142                     if ((svp = av_fetch(namesav, i, TRUE))) {
1143                         sv_setsv(name, *svp);
1144                         if (SvOK(*svp) && !SvPOK(*svp))
1145                             (void)SvPV_nolen_const(name);
1146                     }
1147                     else
1148                         (void)SvOK_off(name);
1149                 
1150                     if (SvPOK(name)) {
1151                         if ((SvPVX_const(name))[0] == '*') {
1152                             if (SvROK(val)) {
1153                                 switch (SvTYPE(SvRV(val))) {
1154                                 case SVt_PVAV:
1155                                     (SvPVX(name))[0] = '@';
1156                                     break;
1157                                 case SVt_PVHV:
1158                                     (SvPVX(name))[0] = '%';
1159                                     break;
1160                                 case SVt_PVCV:
1161                                     (SvPVX(name))[0] = '*';
1162                                     break;
1163                                 default:
1164                                     (SvPVX(name))[0] = '$';
1165                                     break;
1166                                 }
1167                             }
1168                             else
1169                                 (SvPVX(name))[0] = '$';
1170                         }
1171                         else if ((SvPVX_const(name))[0] != '$')
1172                             sv_insert(name, 0, 0, "$", 1);
1173                     }
1174                     else {
1175                         STRLEN nchars;
1176                         sv_setpvn(name, "$", 1);
1177                         sv_catsv(name, varname);
1178                         nchars = my_snprintf(tmpbuf, sizeof(tmpbuf), "%"IVdf, (IV)(i+1));
1179                         sv_catpvn(name, tmpbuf, nchars);
1180                     }
1181                 
1182                     if (indent >= 2) {
1183                         SV * const tmpsv = sv_x(aTHX_ NULL, " ", 1, SvCUR(name)+3);
1184                         newapad = newSVsv(apad);
1185                         sv_catsv(newapad, tmpsv);
1186                         SvREFCNT_dec(tmpsv);
1187                     }
1188                     else
1189                         newapad = apad;
1190                 
1191                     DD_dump(aTHX_ val, SvPVX_const(name), SvCUR(name), valstr, seenhv,
1192                             postav, &level, indent, pad, xpad, newapad, sep, pair,
1193                             freezer, toaster, purity, deepcopy, quotekeys,
1194                             bless, maxdepth, sortkeys);
1195                 
1196                     if (indent >= 2)
1197                         SvREFCNT_dec(newapad);
1198
1199                     postlen = av_len(postav);
1200                     if (postlen >= 0 || !terse) {
1201                         sv_insert(valstr, 0, 0, " = ", 3);
1202                         sv_insert(valstr, 0, 0, SvPVX_const(name), SvCUR(name));
1203                         sv_catpvn(valstr, ";", 1);
1204                     }
1205                     sv_catsv(retval, pad);
1206                     sv_catsv(retval, valstr);
1207                     sv_catsv(retval, sep);
1208                     if (postlen >= 0) {
1209                         I32 i;
1210                         sv_catsv(retval, pad);
1211                         for (i = 0; i <= postlen; ++i) {
1212                             SV *elem;
1213                             svp = av_fetch(postav, i, FALSE);
1214                             if (svp && (elem = *svp)) {
1215                                 sv_catsv(retval, elem);
1216                                 if (i < postlen) {
1217                                     sv_catpvn(retval, ";", 1);
1218                                     sv_catsv(retval, sep);
1219                                     sv_catsv(retval, pad);
1220                                 }
1221                             }
1222                         }
1223                         sv_catpvn(retval, ";", 1);
1224                             sv_catsv(retval, sep);
1225                     }
1226                     sv_setpvn(valstr, "", 0);
1227                     if (gimme == G_ARRAY) {
1228                         XPUSHs(sv_2mortal(retval));
1229                         if (i < imax)   /* not the last time thro ? */
1230                             retval = newSVpvn("",0);
1231                     }
1232                 }
1233                 SvREFCNT_dec(postav);
1234                 SvREFCNT_dec(valstr);
1235             }
1236             else
1237                 croak("Call to new() method failed to return HASH ref");
1238             if (gimme == G_SCALAR)
1239                 XPUSHs(sv_2mortal(retval));
1240         }