make Dumper.xs work with 5.10.x
[perl.git] / ext / Data / Dumper / Dumper.xs
1 #define PERL_NO_GET_CONTEXT
2 #include "EXTERN.h"
3 #include "perl.h"
4 #include "XSUB.h"
5 #ifdef USE_PPPORT_H
6 #  define NEED_my_snprintf
7 #  include "ppport.h"
8 #endif
9
10 #if PERL_VERSION < 6
11 #  define DD_USE_OLD_ID_FORMAT
12 #endif
13
14 static I32 num_q (const char *s, STRLEN slen);
15 static I32 esc_q (char *dest, const char *src, STRLEN slen);
16 static I32 esc_q_utf8 (pTHX_ SV *sv, const char *src, STRLEN slen);
17 static I32 needs_quote(register const char *s);
18 static SV *sv_x (pTHX_ SV *sv, const char *str, STRLEN len, I32 n);
19 static I32 DD_dump (pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval,
20                     HV *seenhv, AV *postav, I32 *levelp, I32 indent,
21                     SV *pad, SV *xpad, SV *apad, SV *sep, SV *pair,
22                     SV *freezer, SV *toaster,
23                     I32 purity, I32 deepcopy, I32 quotekeys, SV *bless,
24                     I32 maxdepth, SV *sortkeys);
25
26 #ifndef HvNAME_get
27 #define HvNAME_get HvNAME
28 #endif
29
30 #if PERL_VERSION <= 6 /* Perl 5.6 and earlier */
31
32 # ifdef EBCDIC
33 #  define UNI_TO_NATIVE(ch) (((ch) > 255) ? (ch) : ASCII_TO_NATIVE(ch))
34 # else
35 #  define UNI_TO_NATIVE(ch) (ch)
36 # endif
37
38 UV
39 Perl_utf8_to_uvchr(pTHX_ U8 *s, STRLEN *retlen)
40 {
41     const UV uv = utf8_to_uv(s, UTF8_MAXLEN, retlen,
42                     ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
43     return UNI_TO_NATIVE(uv);
44 }
45
46 # if !defined(PERL_IMPLICIT_CONTEXT)
47 #  define utf8_to_uvchr      Perl_utf8_to_uvchr
48 # else
49 #  define utf8_to_uvchr(a,b) Perl_utf8_to_uvchr(aTHX_ a,b)
50 # endif
51
52 #endif /* PERL_VERSION <= 6 */
53
54 /* Changes in 5.7 series mean that now IOK is only set if scalar is
55    precisely integer but in 5.6 and earlier we need to do a more
56    complex test  */
57 #if PERL_VERSION <= 6
58 #define DD_is_integer(sv) (SvIOK(sv) && (SvIsUV(val) ? SvUV(sv) == SvNV(sv) : SvIV(sv) == SvNV(sv)))
59 #else
60 #define DD_is_integer(sv) SvIOK(sv)
61 #endif
62
63 /* does a string need to be protected? */
64 static I32
65 needs_quote(register const char *s)
66 {
67 TOP:
68     if (s[0] == ':') {
69         if (*++s) {
70             if (*s++ != ':')
71                 return 1;
72         }
73         else
74             return 1;
75     }
76     if (isIDFIRST(*s)) {
77         while (*++s)
78             if (!isALNUM(*s)) {
79                 if (*s == ':')
80                     goto TOP;
81                 else
82                     return 1;
83             }
84     }
85     else
86         return 1;
87     return 0;
88 }
89
90 /* count the number of "'"s and "\"s in string */
91 static I32
92 num_q(register const char *s, register STRLEN slen)
93 {
94     register I32 ret = 0;
95
96     while (slen > 0) {
97         if (*s == '\'' || *s == '\\')
98             ++ret;
99         ++s;
100         --slen;
101     }
102     return ret;
103 }
104
105
106 /* returns number of chars added to escape "'"s and "\"s in s */
107 /* slen number of characters in s will be escaped */
108 /* destination must be long enough for additional chars */
109 static I32
110 esc_q(register char *d, register const char *s, register STRLEN slen)
111 {
112     register I32 ret = 0;
113
114     while (slen > 0) {
115         switch (*s) {
116         case '\'':
117         case '\\':
118             *d = '\\';
119             ++d; ++ret;
120         default:
121             *d = *s;
122             ++d; ++s; --slen;
123             break;
124         }
125     }
126     return ret;
127 }
128
129 static I32
130 esc_q_utf8(pTHX_ SV* sv, register const char *src, register STRLEN slen)
131 {
132     char *r, *rstart;
133     const char *s = src;
134     const char * const send = src + slen;
135     STRLEN j, cur = SvCUR(sv);
136     /* Could count 128-255 and 256+ in two variables, if we want to
137        be like &qquote and make a distinction.  */
138     STRLEN grow = 0;    /* bytes needed to represent chars 128+ */
139     /* STRLEN topbit_grow = 0;  bytes needed to represent chars 128-255 */
140     STRLEN backslashes = 0;
141     STRLEN single_quotes = 0;
142     STRLEN qq_escapables = 0;   /* " $ @ will need a \ in "" strings.  */
143     STRLEN normal = 0;
144
145     /* this will need EBCDICification */
146     for (s = src; s < send; s += UTF8SKIP(s)) {
147         const UV k = utf8_to_uvchr((U8*)s, NULL);
148
149 #ifdef EBCDIC
150         if (!isprint(k) || k > 256) {
151 #else
152         if (k > 127) {
153 #endif
154             /* 4: \x{} then count the number of hex digits.  */
155             grow += 4 + (k <= 0xFF ? 2 : k <= 0xFFF ? 3 : k <= 0xFFFF ? 4 :
156 #if UVSIZE == 4
157                 8 /* We may allocate a bit more than the minimum here.  */
158 #else
159                 k <= 0xFFFFFFFF ? 8 : UVSIZE * 4
160 #endif
161                 );
162         } else if (k == '\\') {
163             backslashes++;
164         } else if (k == '\'') {
165             single_quotes++;
166         } else if (k == '"' || k == '$' || k == '@') {
167             qq_escapables++;
168         } else {
169             normal++;
170         }
171     }
172     if (grow) {
173         /* We have something needing hex. 3 is ""\0 */
174         sv_grow(sv, cur + 3 + grow + 2*backslashes + single_quotes
175                 + 2*qq_escapables + normal);
176         rstart = r = SvPVX(sv) + cur;
177
178         *r++ = '"';
179
180         for (s = src; s < send; s += UTF8SKIP(s)) {
181             const UV k = utf8_to_uvchr((U8*)s, NULL);
182
183             if (k == '"' || k == '\\' || k == '$' || k == '@') {
184                 *r++ = '\\';
185                 *r++ = (char)k;
186             }
187             else
188 #ifdef EBCDIC
189               if (isprint(k) && k < 256)
190 #else
191               if (k < 0x80)
192 #endif
193                 *r++ = (char)k;
194             else {
195 #if PERL_VERSION < 10
196                 sprintf(r, "\\x{%"UVxf"}", k);
197                 r += strlen(r);
198                 /* my_sprintf is not supported by ppport.h */
199 #else
200                 r = r + my_sprintf(r, "\\x{%"UVxf"}", k);
201 #endif
202             }
203         }
204         *r++ = '"';
205     } else {
206         /* Single quotes.  */
207         sv_grow(sv, cur + 3 + 2*backslashes + 2*single_quotes
208                 + qq_escapables + normal);
209         rstart = r = SvPVX(sv) + cur;
210         *r++ = '\'';
211         for (s = src; s < send; s ++) {
212             const char k = *s;
213             if (k == '\'' || k == '\\')
214                 *r++ = '\\';
215             *r++ = k;
216         }
217         *r++ = '\'';
218     }
219     *r = '\0';
220     j = r - rstart;
221     SvCUR_set(sv, cur + j);
222
223     return j;
224 }
225
226 /* append a repeated string to an SV */
227 static SV *
228 sv_x(pTHX_ SV *sv, const char *str, STRLEN len, I32 n)
229 {
230     if (!sv)
231         sv = newSVpvn("", 0);
232 #ifdef DEBUGGING
233     else
234         assert(SvTYPE(sv) >= SVt_PV);
235 #endif
236
237     if (n > 0) {
238         SvGROW(sv, len*n + SvCUR(sv) + 1);
239         if (len == 1) {
240             char * const start = SvPVX(sv) + SvCUR(sv);
241             SvCUR_set(sv, SvCUR(sv) + n);
242             start[n] = '\0';
243             while (n > 0)
244                 start[--n] = str[0];
245         }
246         else
247             while (n > 0) {
248                 sv_catpvn(sv, str, len);
249                 --n;
250             }
251     }
252     return sv;
253 }
254
255 /*
256  * This ought to be split into smaller functions. (it is one long function since
257  * it exactly parallels the perl version, which was one long thing for
258  * efficiency raisins.)  Ugggh!
259  */
260 static I32
261 DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
262         AV *postav, I32 *levelp, I32 indent, SV *pad, SV *xpad,
263         SV *apad, SV *sep, SV *pair, SV *freezer, SV *toaster, I32 purity,
264         I32 deepcopy, I32 quotekeys, SV *bless, I32 maxdepth, SV *sortkeys)
265 {
266     char tmpbuf[128];
267     U32 i;
268     char *c, *r, *realpack;
269 #ifdef DD_USE_OLD_ID_FORMAT
270     char id[128];
271 #else
272     UV id_buffer;
273     char *const id = (char *)&id_buffer;
274 #endif
275     SV **svp;
276     SV *sv, *ipad, *ival;
277     SV *blesspad = Nullsv;
278     AV *seenentry = NULL;
279     char *iname;
280     STRLEN inamelen, idlen = 0;
281     U32 realtype;
282     bool no_bless = 0; /* when a qr// is blessed into Regexp we dont want to bless it.
283                           in later perls we should actually check the classname of the 
284                           engine. this gets tricky as it involves lexical issues that arent so
285                           easy to resolve */
286     bool is_regex = 0; /* we are dumping a regex, we need to know this before we bless */
287
288     if (!val)
289         return 0;
290
291     /* If the ouput buffer has less than some arbitary amount of space
292        remaining, then enlarge it. For the test case (25M of output),
293        *1.1 was slower, *2.0 was the same, so the first guess of 1.5 is
294         deemed to be good enough.  */
295     if (SvTYPE(retval) >= SVt_PV && (SvLEN(retval) - SvCUR(retval)) < 42) {
296         sv_grow(retval, SvCUR(retval) * 3 / 2);
297     }
298
299     realtype = SvTYPE(val);
300
301     if (SvGMAGICAL(val))
302         mg_get(val);
303     if (SvROK(val)) {
304
305         /* If a freeze method is provided and the object has it, call
306            it.  Warn on errors. */
307         if (SvOBJECT(SvRV(val)) && freezer &&
308             SvPOK(freezer) && SvCUR(freezer) &&
309             gv_fetchmeth(SvSTASH(SvRV(val)), SvPVX_const(freezer), 
310                          SvCUR(freezer), -1) != NULL)
311         {
312             dSP; ENTER; SAVETMPS; PUSHMARK(sp);
313             XPUSHs(val); PUTBACK;
314             i = perl_call_method(SvPVX_const(freezer), G_EVAL|G_VOID);
315             SPAGAIN;
316             if (SvTRUE(ERRSV))
317                 warn("WARNING(Freezer method call failed): %"SVf"", ERRSV);
318             PUTBACK; FREETMPS; LEAVE;
319         }
320         
321         ival = SvRV(val);
322         realtype = SvTYPE(ival);
323 #ifdef DD_USE_OLD_ID_FORMAT
324         idlen = my_snprintf(id, sizeof(id), "0x%"UVxf, PTR2UV(ival));
325 #else
326         id_buffer = PTR2UV(ival);
327         idlen = sizeof(id_buffer);
328 #endif
329         if (SvOBJECT(ival))
330             realpack = HvNAME_get(SvSTASH(ival));
331         else
332             realpack = NULL;
333
334         /* if it has a name, we need to either look it up, or keep a tab
335          * on it so we know when we hit it later
336          */
337         if (namelen) {
338             if ((svp = hv_fetch(seenhv, id, idlen, FALSE))
339                 && (sv = *svp) && SvROK(sv) && (seenentry = (AV*)SvRV(sv)))
340             {
341                 SV *othername;
342                 if ((svp = av_fetch(seenentry, 0, FALSE))
343                     && (othername = *svp))
344                 {
345                     if (purity && *levelp > 0) {
346                         SV *postentry;
347                         
348                         if (realtype == SVt_PVHV)
349                             sv_catpvn(retval, "{}", 2);
350                         else if (realtype == SVt_PVAV)
351                             sv_catpvn(retval, "[]", 2);
352                         else
353                             sv_catpvn(retval, "do{my $o}", 9);
354                         postentry = newSVpvn(name, namelen);
355                         sv_catpvn(postentry, " = ", 3);
356                         sv_catsv(postentry, othername);
357                         av_push(postav, postentry);
358                     }
359                     else {
360                         if (name[0] == '@' || name[0] == '%') {
361                             if ((SvPVX_const(othername))[0] == '\\' &&
362                                 (SvPVX_const(othername))[1] == name[0]) {
363                                 sv_catpvn(retval, SvPVX_const(othername)+1,
364                                           SvCUR(othername)-1);
365                             }
366                             else {
367                                 sv_catpvn(retval, name, 1);
368                                 sv_catpvn(retval, "{", 1);
369                                 sv_catsv(retval, othername);
370                                 sv_catpvn(retval, "}", 1);
371                             }
372                         }
373                         else
374                             sv_catsv(retval, othername);
375                     }
376                     return 1;
377                 }
378                 else {
379 #ifdef DD_USE_OLD_ID_FORMAT
380                     warn("ref name not found for %s", id);
381 #else
382                     warn("ref name not found for 0x%"UVxf, PTR2UV(ival));
383 #endif
384                     return 0;
385                 }
386             }
387             else {   /* store our name and continue */
388                 SV *namesv;
389                 if (name[0] == '@' || name[0] == '%') {
390                     namesv = newSVpvn("\\", 1);
391                     sv_catpvn(namesv, name, namelen);
392                 }
393                 else if (realtype == SVt_PVCV && name[0] == '*') {
394                     namesv = newSVpvn("\\", 2);
395                     sv_catpvn(namesv, name, namelen);
396                     (SvPVX(namesv))[1] = '&';
397                 }
398                 else
399                     namesv = newSVpvn(name, namelen);
400                 seenentry = newAV();
401                 av_push(seenentry, namesv);
402                 (void)SvREFCNT_inc(val);
403                 av_push(seenentry, val);
404                 (void)hv_store(seenhv, id, idlen,
405                                newRV_inc((SV*)seenentry), 0);
406                 SvREFCNT_dec(seenentry);
407             }
408         }
409         /* regexps dont have to be blessed into package "Regexp"
410          * they can be blessed into any package. 
411          */
412 #if PERL_VERSION < 8
413         if (realpack && *realpack == 'R' && strEQ(realpack, "Regexp")) 
414 #elif PERL_VERSION < 11
415         if (realpack && realtype == SVt_PVMG && mg_find(ival, PERL_MAGIC_qr))
416 #else        
417         if (realpack && realtype == SVt_REGEXP) 
418 #endif
419         {
420             is_regex = 1;
421             if (strEQ(realpack, "Regexp")) 
422                 no_bless = 1;
423             else
424                 no_bless = 0;
425         }
426
427         /* If purity is not set and maxdepth is set, then check depth:
428          * if we have reached maximum depth, return the string
429          * representation of the thing we are currently examining
430          * at this depth (i.e., 'Foo=ARRAY(0xdeadbeef)').
431          */
432         if (!purity && maxdepth > 0 && *levelp >= maxdepth) {
433             STRLEN vallen;
434             const char * const valstr = SvPV(val,vallen);
435             sv_catpvn(retval, "'", 1);
436             sv_catpvn(retval, valstr, vallen);
437             sv_catpvn(retval, "'", 1);
438             return 1;
439         }
440
441         if (realpack && !no_bless) {                            /* we have a blessed ref */
442             STRLEN blesslen;
443             const char * const blessstr = SvPV(bless, blesslen);
444             sv_catpvn(retval, blessstr, blesslen);
445             sv_catpvn(retval, "( ", 2);
446             if (indent >= 2) {
447                 blesspad = apad;
448                 apad = newSVsv(apad);
449                 sv_x(aTHX_ apad, " ", 1, blesslen+2);
450             }
451         }
452
453         (*levelp)++;
454         ipad = sv_x(aTHX_ Nullsv, SvPVX_const(xpad), SvCUR(xpad), *levelp);
455
456         if (is_regex) 
457         {
458             STRLEN rlen;
459             const char *rval = SvPV(val, rlen);
460             const char *slash = strchr(rval, '/');
461             sv_catpvn(retval, "qr/", 3);
462             while (slash) {
463                 sv_catpvn(retval, rval, slash-rval);
464                 sv_catpvn(retval, "\\/", 2);
465                 rlen -= slash-rval+1;
466                 rval = slash+1;
467                 slash = strchr(rval, '/');
468             }
469             sv_catpvn(retval, rval, rlen);
470             sv_catpvn(retval, "/", 1);
471         } 
472         else if (
473 #if PERL_VERSION < 9
474                 realtype <= SVt_PVBM
475 #else
476                 realtype <= SVt_PVMG
477 #endif
478         ) {                          /* scalar ref */
479             SV * const namesv = newSVpvn("${", 2);
480             sv_catpvn(namesv, name, namelen);
481             sv_catpvn(namesv, "}", 1);
482             if (realpack) {                                  /* blessed */
483                 sv_catpvn(retval, "do{\\(my $o = ", 13);
484                 DD_dump(aTHX_ ival, SvPVX_const(namesv), SvCUR(namesv), retval, seenhv,
485                         postav, levelp, indent, pad, xpad, apad, sep, pair,
486                         freezer, toaster, purity, deepcopy, quotekeys, bless,
487                         maxdepth, sortkeys);
488                 sv_catpvn(retval, ")}", 2);
489             }                                                /* plain */
490             else {
491                 sv_catpvn(retval, "\\", 1);
492                 DD_dump(aTHX_ ival, SvPVX_const(namesv), SvCUR(namesv), retval, seenhv,
493                         postav, levelp, indent, pad, xpad, apad, sep, pair,
494                         freezer, toaster, purity, deepcopy, quotekeys, bless,
495                         maxdepth, sortkeys);
496             }
497             SvREFCNT_dec(namesv);
498         }
499         else if (realtype == SVt_PVGV) {                     /* glob ref */
500             SV * const namesv = newSVpvn("*{", 2);
501             sv_catpvn(namesv, name, namelen);
502             sv_catpvn(namesv, "}", 1);
503             sv_catpvn(retval, "\\", 1);
504             DD_dump(aTHX_ ival, SvPVX_const(namesv), SvCUR(namesv), retval, seenhv,
505                     postav, levelp,     indent, pad, xpad, apad, sep, pair,
506                     freezer, toaster, purity, deepcopy, quotekeys, bless,
507                     maxdepth, sortkeys);
508             SvREFCNT_dec(namesv);
509         }
510         else if (realtype == SVt_PVAV) {
511             SV *totpad;
512             I32 ix = 0;
513             const I32 ixmax = av_len((AV *)ival);
514         
515             SV * const ixsv = newSViv(0);
516             /* allowing for a 24 char wide array index */
517             New(0, iname, namelen+28, char);
518             (void)strcpy(iname, name);
519             inamelen = namelen;
520             if (name[0] == '@') {
521                 sv_catpvn(retval, "(", 1);
522                 iname[0] = '$';
523             }
524             else {
525                 sv_catpvn(retval, "[", 1);
526                 /* omit "->" in $foo{bar}->[0], but not in ${$foo}->[0] */
527                 /*if (namelen > 0
528                     && name[namelen-1] != ']' && name[namelen-1] != '}'
529                     && (namelen < 4 || (name[1] != '{' && name[2] != '{')))*/
530                 if ((namelen > 0
531                      && name[namelen-1] != ']' && name[namelen-1] != '}')
532                     || (namelen > 4
533                         && (name[1] == '{'
534                             || (name[0] == '\\' && name[2] == '{'))))
535                 {
536                     iname[inamelen++] = '-'; iname[inamelen++] = '>';
537                     iname[inamelen] = '\0';
538                 }
539             }
540             if (iname[0] == '*' && iname[inamelen-1] == '}' && inamelen >= 8 &&
541                 (instr(iname+inamelen-8, "{SCALAR}") ||
542                  instr(iname+inamelen-7, "{ARRAY}") ||
543                  instr(iname+inamelen-6, "{HASH}"))) {
544                 iname[inamelen++] = '-'; iname[inamelen++] = '>';
545             }
546             iname[inamelen++] = '['; iname[inamelen] = '\0';
547             totpad = newSVsv(sep);
548             sv_catsv(totpad, pad);
549             sv_catsv(totpad, apad);
550
551             for (ix = 0; ix <= ixmax; ++ix) {
552                 STRLEN ilen;
553                 SV *elem;
554                 svp = av_fetch((AV*)ival, ix, FALSE);
555                 if (svp)
556                     elem = *svp;
557                 else
558                     elem = &PL_sv_undef;
559                 
560                 ilen = inamelen;
561                 sv_setiv(ixsv, ix);
562 #if PERL_VERSION < 10
563                 (void) sprintf(iname+ilen, "%"IVdf, (IV)ix);
564                 ilen = strlen(iname);
565 #else
566                 ilen = ilen + my_sprintf(iname+ilen, "%"IVdf, (IV)ix);
567 #endif
568                 iname[ilen++] = ']'; iname[ilen] = '\0';
569                 if (indent >= 3) {
570                     sv_catsv(retval, totpad);
571                     sv_catsv(retval, ipad);
572                     sv_catpvn(retval, "#", 1);
573                     sv_catsv(retval, ixsv);
574                 }
575                 sv_catsv(retval, totpad);
576                 sv_catsv(retval, ipad);
577                 DD_dump(aTHX_ elem, iname, ilen, retval, seenhv, postav,
578                         levelp, indent, pad, xpad, apad, sep, pair,
579                         freezer, toaster, purity, deepcopy, quotekeys, bless,
580                         maxdepth, sortkeys);
581                 if (ix < ixmax)
582                     sv_catpvn(retval, ",", 1);
583             }
584             if (ixmax >= 0) {
585                 SV * const opad = sv_x(aTHX_ Nullsv, SvPVX_const(xpad), SvCUR(xpad), (*levelp)-1);
586                 sv_catsv(retval, totpad);
587                 sv_catsv(retval, opad);
588                 SvREFCNT_dec(opad);
589             }
590             if (name[0] == '@')
591                 sv_catpvn(retval, ")", 1);
592             else
593                 sv_catpvn(retval, "]", 1);
594             SvREFCNT_dec(ixsv);
595             SvREFCNT_dec(totpad);
596             Safefree(iname);
597         }
598         else if (realtype == SVt_PVHV) {
599             SV *totpad, *newapad;
600             SV *sname;
601             HE *entry;
602             char *key;
603             I32 klen;
604             SV *hval;
605             AV *keys = NULL;
606         
607             SV * const iname = newSVpvn(name, namelen);
608             if (name[0] == '%') {
609                 sv_catpvn(retval, "(", 1);
610                 (SvPVX(iname))[0] = '$';
611             }
612             else {
613                 sv_catpvn(retval, "{", 1);
614                 /* omit "->" in $foo[0]->{bar}, but not in ${$foo}->{bar} */
615                 if ((namelen > 0
616                      && name[namelen-1] != ']' && name[namelen-1] != '}')
617                     || (namelen > 4
618                         && (name[1] == '{'
619                             || (name[0] == '\\' && name[2] == '{'))))
620                 {
621                     sv_catpvn(iname, "->", 2);
622                 }
623             }
624             if (name[0] == '*' && name[namelen-1] == '}' && namelen >= 8 &&
625                 (instr(name+namelen-8, "{SCALAR}") ||
626                  instr(name+namelen-7, "{ARRAY}") ||
627                  instr(name+namelen-6, "{HASH}"))) {
628                 sv_catpvn(iname, "->", 2);
629             }
630             sv_catpvn(iname, "{", 1);
631             totpad = newSVsv(sep);
632             sv_catsv(totpad, pad);
633             sv_catsv(totpad, apad);
634         
635             /* If requested, get a sorted/filtered array of hash keys */
636             if (sortkeys) {
637                 if (sortkeys == &PL_sv_yes) {
638 #if PERL_VERSION < 8
639                     sortkeys = sv_2mortal(newSVpvn("Data::Dumper::_sortkeys", 23));
640 #else
641                     keys = newAV();
642                     (void)hv_iterinit((HV*)ival);
643                     while ((entry = hv_iternext((HV*)ival))) {
644                         sv = hv_iterkeysv(entry);
645                         SvREFCNT_inc(sv);
646                         av_push(keys, sv);
647                     }
648 # ifdef USE_LOCALE_NUMERIC
649                     sortsv(AvARRAY(keys), 
650                            av_len(keys)+1, 
651                            IN_LOCALE ? Perl_sv_cmp_locale : Perl_sv_cmp);
652 # else
653                     sortsv(AvARRAY(keys), 
654                            av_len(keys)+1, 
655                            Perl_sv_cmp);
656 # endif
657 #endif
658                 }
659                 if (sortkeys != &PL_sv_yes) {
660                     dSP; ENTER; SAVETMPS; PUSHMARK(sp);
661                     XPUSHs(sv_2mortal(newRV_inc(ival))); PUTBACK;
662                     i = perl_call_sv(sortkeys, G_SCALAR | G_EVAL);
663                     SPAGAIN;
664                     if (i) {
665                         sv = POPs;
666                         if (SvROK(sv) && (SvTYPE(SvRV(sv)) == SVt_PVAV))
667                             keys = (AV*)SvREFCNT_inc(SvRV(sv));
668                     }
669                     if (! keys)
670                         warn("Sortkeys subroutine did not return ARRAYREF\n");
671                     PUTBACK; FREETMPS; LEAVE;
672                 }
673                 if (keys)
674                     sv_2mortal((SV*)keys);
675             }
676             else
677                 (void)hv_iterinit((HV*)ival);
678
679             /* foreach (keys %hash) */
680             for (i = 0; 1; i++) {
681                 char *nkey;
682                 char *nkey_buffer = NULL;
683                 I32 nticks = 0;
684                 SV* keysv;
685                 STRLEN keylen;
686                 I32 nlen;
687                 bool do_utf8 = FALSE;
688
689                if (sortkeys) {
690                    if (!(keys && (I32)i <= av_len(keys))) break;
691                } else {
692                    if (!(entry = hv_iternext((HV *)ival))) break;
693                }
694
695                 if (i)
696                     sv_catpvn(retval, ",", 1);
697
698                 if (sortkeys) {
699                     char *key;
700                     svp = av_fetch(keys, i, FALSE);
701                     keysv = svp ? *svp : sv_mortalcopy(&PL_sv_undef);
702                     key = SvPV(keysv, keylen);
703                     svp = hv_fetch((HV*)ival, key,
704                                    SvUTF8(keysv) ? -(I32)keylen : keylen, 0);
705                     hval = svp ? *svp : sv_mortalcopy(&PL_sv_undef);
706                 }
707                 else {
708                     keysv = hv_iterkeysv(entry);
709                     hval = hv_iterval((HV*)ival, entry);
710                 }
711
712                 key = SvPV(keysv, keylen);
713                 do_utf8 = DO_UTF8(keysv);
714                 klen = keylen;
715
716                 sv_catsv(retval, totpad);
717                 sv_catsv(retval, ipad);
718                 /* old logic was first to check utf8 flag, and if utf8 always
719                    call esc_q_utf8.  This caused test to break under -Mutf8,
720                    because there even strings like 'c' have utf8 flag on.
721                    Hence with quotekeys == 0 the XS code would still '' quote
722                    them based on flags, whereas the perl code would not,
723                    based on regexps.
724                    The perl code is correct.
725                    needs_quote() decides that anything that isn't a valid
726                    perl identifier needs to be quoted, hence only correctly
727                    formed strings with no characters outside [A-Za-z0-9_:]
728                    won't need quoting.  None of those characters are used in
729                    the byte encoding of utf8, so anything with utf8
730                    encoded characters in will need quoting. Hence strings
731                    with utf8 encoded characters in will end up inside do_utf8
732                    just like before, but now strings with utf8 flag set but
733                    only ascii characters will end up in the unquoted section.
734
735                    There should also be less tests for the (probably currently)
736                    more common doesn't need quoting case.
737                    The code is also smaller (22044 vs 22260) because I've been
738                    able to pull the common logic out to both sides.  */
739                 if (quotekeys || needs_quote(key)) {
740                     if (do_utf8) {
741                         STRLEN ocur = SvCUR(retval);
742                         nlen = esc_q_utf8(aTHX_ retval, key, klen);
743                         nkey = SvPVX(retval) + ocur;
744                     }
745                     else {
746                         nticks = num_q(key, klen);
747                         New(0, nkey_buffer, klen+nticks+3, char);
748                         nkey = nkey_buffer;
749                         nkey[0] = '\'';
750                         if (nticks)
751                             klen += esc_q(nkey+1, key, klen);
752                         else
753                             (void)Copy(key, nkey+1, klen, char);
754                         nkey[++klen] = '\'';
755                         nkey[++klen] = '\0';
756                         nlen = klen;
757                         sv_catpvn(retval, nkey, klen);
758                     }
759                 }
760                 else {
761                     nkey = key;
762                     nlen = klen;
763                     sv_catpvn(retval, nkey, klen);
764                 }
765                 sname = newSVsv(iname);
766                 sv_catpvn(sname, nkey, nlen);
767                 sv_catpvn(sname, "}", 1);
768
769                 sv_catsv(retval, pair);
770                 if (indent >= 2) {
771                     char *extra;
772                     I32 elen = 0;
773                     newapad = newSVsv(apad);
774                     New(0, extra, klen+4+1, char);
775                     while (elen < (klen+4))
776                         extra[elen++] = ' ';
777                     extra[elen] = '\0';
778                     sv_catpvn(newapad, extra, elen);
779                     Safefree(extra);
780                 }
781                 else
782                     newapad = apad;
783
784                 DD_dump(aTHX_ hval, SvPVX_const(sname), SvCUR(sname), retval, seenhv,
785                         postav, levelp, indent, pad, xpad, newapad, sep, pair,
786                         freezer, toaster, purity, deepcopy, quotekeys, bless,
787                         maxdepth, sortkeys);
788                 SvREFCNT_dec(sname);
789                 Safefree(nkey_buffer);
790                 if (indent >= 2)
791                     SvREFCNT_dec(newapad);
792             }
793             if (i) {
794                 SV *opad = sv_x(aTHX_ Nullsv, SvPVX_const(xpad), SvCUR(xpad), *levelp-1);
795                 sv_catsv(retval, totpad);
796                 sv_catsv(retval, opad);
797                 SvREFCNT_dec(opad);
798             }
799             if (name[0] == '%')
800                 sv_catpvn(retval, ")", 1);
801             else
802                 sv_catpvn(retval, "}", 1);
803             SvREFCNT_dec(iname);
804             SvREFCNT_dec(totpad);
805         }
806         else if (realtype == SVt_PVCV) {
807             sv_catpvn(retval, "sub { \"DUMMY\" }", 15);
808             if (purity)
809                 warn("Encountered CODE ref, using dummy placeholder");
810         }
811         else {
812             warn("cannot handle ref type %ld", realtype);
813         }
814
815         if (realpack && !no_bless) {  /* free blessed allocs */
816             I32 plen;
817             I32 pticks;
818
819             if (indent >= 2) {
820                 SvREFCNT_dec(apad);
821                 apad = blesspad;
822             }
823             sv_catpvn(retval, ", '", 3);
824
825             plen = strlen(realpack);
826             pticks = num_q(realpack, plen);
827             if (pticks) { /* needs escaping */
828                 char *npack;
829                 char *npack_buffer = NULL;
830
831                 New(0, npack_buffer, plen+pticks+1, char);
832                 npack = npack_buffer;
833                 plen += esc_q(npack, realpack, plen);
834                 npack[plen] = '\0';
835
836                 sv_catpvn(retval, npack, plen);
837                 Safefree(npack_buffer);
838             }
839             else {
840                 sv_catpvn(retval, realpack, strlen(realpack));
841             }
842             sv_catpvn(retval, "' )", 3);
843             if (toaster && SvPOK(toaster) && SvCUR(toaster)) {
844                 sv_catpvn(retval, "->", 2);
845                 sv_catsv(retval, toaster);
846                 sv_catpvn(retval, "()", 2);
847             }
848         }
849         SvREFCNT_dec(ipad);
850         (*levelp)--;
851     }
852     else {
853         STRLEN i;
854         
855         if (namelen) {
856 #ifdef DD_USE_OLD_ID_FORMAT
857             idlen = my_snprintf(id, sizeof(id), "0x%"UVxf, PTR2UV(val));
858 #else
859             id_buffer = PTR2UV(val);
860             idlen = sizeof(id_buffer);
861 #endif
862             if ((svp = hv_fetch(seenhv, id, idlen, FALSE)) &&
863                 (sv = *svp) && SvROK(sv) &&
864                 (seenentry = (AV*)SvRV(sv)))
865             {
866                 SV *othername;
867                 if ((svp = av_fetch(seenentry, 0, FALSE)) && (othername = *svp)
868                     && (svp = av_fetch(seenentry, 2, FALSE)) && *svp && SvIV(*svp) > 0)
869                 {
870                     sv_catpvn(retval, "${", 2);
871                     sv_catsv(retval, othername);
872                     sv_catpvn(retval, "}", 1);
873                     return 1;
874                 }
875             }
876             else if (val != &PL_sv_undef) {
877                 SV * const namesv = newSVpvn("\\", 1);
878                 sv_catpvn(namesv, name, namelen);
879                 seenentry = newAV();
880                 av_push(seenentry, namesv);
881                 av_push(seenentry, newRV_inc(val));
882                 (void)hv_store(seenhv, id, idlen, newRV_inc((SV*)seenentry), 0);
883                 SvREFCNT_dec(seenentry);
884             }
885         }
886
887         if (DD_is_integer(val)) {
888             STRLEN len;
889             if (SvIsUV(val))
890               len = my_snprintf(tmpbuf, sizeof(tmpbuf), "%"UVuf, SvUV(val));
891             else
892               len = my_snprintf(tmpbuf, sizeof(tmpbuf), "%"IVdf, SvIV(val));
893             if (SvPOK(val)) {
894               /* Need to check to see if this is a string such as " 0".
895                  I'm assuming from sprintf isn't going to clash with utf8.
896                  Is this valid on EBCDIC?  */
897               STRLEN pvlen;
898               const char * const pv = SvPV(val, pvlen);
899               if (pvlen != len || memNE(pv, tmpbuf, len))
900                 goto integer_came_from_string;
901             }
902             if (len > 10) {
903               /* Looks like we're on a 64 bit system.  Make it a string so that
904                  if a 32 bit system reads the number it will cope better.  */
905               sv_catpvf(retval, "'%s'", tmpbuf);
906             } else
907               sv_catpvn(retval, tmpbuf, len);
908         }
909         else if (realtype == SVt_PVGV) {/* GLOBs can end up with scribbly names */
910             c = SvPV(val, i);
911             ++c; --i;                   /* just get the name */
912             if (i >= 6 && strncmp(c, "main::", 6) == 0) {
913                 c += 4;
914                 i -= 4;
915             }
916             if (needs_quote(c)) {
917                 sv_grow(retval, SvCUR(retval)+6+2*i);
918                 r = SvPVX(retval)+SvCUR(retval);
919                 r[0] = '*'; r[1] = '{'; r[2] = '\'';
920                 i += esc_q(r+3, c, i);
921                 i += 3;
922                 r[i++] = '\''; r[i++] = '}';
923                 r[i] = '\0';
924             }
925             else {
926                 sv_grow(retval, SvCUR(retval)+i+2);
927                 r = SvPVX(retval)+SvCUR(retval);
928                 r[0] = '*'; strcpy(r+1, c);
929                 i++;
930             }
931             SvCUR_set(retval, SvCUR(retval)+i);
932
933             if (purity) {
934                 static const char* const entries[] = { "{SCALAR}", "{ARRAY}", "{HASH}" };
935                 static const STRLEN sizes[] = { 8, 7, 6 };
936                 SV *e;
937                 SV * const nname = newSVpvn("", 0);
938                 SV * const newapad = newSVpvn("", 0);
939                 GV * const gv = (GV*)val;
940                 I32 j;
941                 
942                 for (j=0; j<3; j++) {
943                     e = ((j == 0) ? GvSV(gv) : (j == 1) ? (SV*)GvAV(gv) : (SV*)GvHV(gv));
944                     if (!e)
945                         continue;
946                     if (j == 0 && !SvOK(e))
947                         continue;
948
949                     {
950                         I32 nlevel = 0;
951                         SV *postentry = newSVpvn(r,i);
952                         
953                         sv_setsv(nname, postentry);
954                         sv_catpvn(nname, entries[j], sizes[j]);
955                         sv_catpvn(postentry, " = ", 3);
956                         av_push(postav, postentry);
957                         e = newRV_inc(e);
958                         
959                         SvCUR_set(newapad, 0);
960                         if (indent >= 2)
961                             (void)sv_x(aTHX_ newapad, " ", 1, SvCUR(postentry));
962                         
963                         DD_dump(aTHX_ e, SvPVX_const(nname), SvCUR(nname), postentry,
964                                 seenhv, postav, &nlevel, indent, pad, xpad,
965                                 newapad, sep, pair, freezer, toaster, purity,
966                                 deepcopy, quotekeys, bless, maxdepth, 
967                                 sortkeys);
968                         SvREFCNT_dec(e);
969                     }
970                 }
971                 
972                 SvREFCNT_dec(newapad);
973                 SvREFCNT_dec(nname);
974             }
975         }
976         else if (val == &PL_sv_undef || !SvOK(val)) {
977             sv_catpvn(retval, "undef", 5);
978         }
979         else {
980         integer_came_from_string:
981             c = SvPV(val, i);
982             if (DO_UTF8(val))
983                 i += esc_q_utf8(aTHX_ retval, c, i);
984             else {
985                 sv_grow(retval, SvCUR(retval)+3+2*i); /* 3: ""\0 */
986                 r = SvPVX(retval) + SvCUR(retval);
987                 r[0] = '\'';
988                 i += esc_q(r+1, c, i);
989                 ++i;
990                 r[i++] = '\'';
991                 r[i] = '\0';
992                 SvCUR_set(retval, SvCUR(retval)+i);
993             }
994         }
995     }
996
997     if (idlen) {
998         if (deepcopy)
999             (void)hv_delete(seenhv, id, idlen, G_DISCARD);
1000         else if (namelen && seenentry) {
1001             SV *mark = *av_fetch(seenentry, 2, TRUE);
1002             sv_setiv(mark,1);
1003         }
1004     }
1005     return 1;
1006 }
1007
1008
1009 MODULE = Data::Dumper           PACKAGE = Data::Dumper         PREFIX = Data_Dumper_
1010
1011 #
1012 # This is the exact equivalent of Dump.  Well, almost. The things that are
1013 # different as of now (due to Laziness):
1014 #   * doesnt do double-quotes yet.
1015 #
1016
1017 void
1018 Data_Dumper_Dumpxs(href, ...)
1019         SV      *href;
1020         PROTOTYPE: $;$$
1021         PPCODE:
1022         {
1023             HV *hv;
1024             SV *retval, *valstr;
1025             HV *seenhv = NULL;
1026             AV *postav, *todumpav, *namesav;
1027             I32 level = 0;
1028             I32 indent, terse, i, imax, postlen;
1029             SV **svp;
1030             SV *val, *name, *pad, *xpad, *apad, *sep, *pair, *varname;
1031             SV *freezer, *toaster, *bless, *sortkeys;
1032             I32 purity, deepcopy, quotekeys, maxdepth = 0;
1033             char tmpbuf[1024];
1034             I32 gimme = GIMME;
1035
1036             if (!SvROK(href)) {         /* call new to get an object first */
1037                 if (items < 2)
1038                     croak("Usage: Data::Dumper::Dumpxs(PACKAGE, VAL_ARY_REF, [NAME_ARY_REF])");
1039                 
1040                 ENTER;
1041                 SAVETMPS;
1042                 
1043                 PUSHMARK(sp);
1044                 XPUSHs(href);
1045                 XPUSHs(sv_2mortal(newSVsv(ST(1))));
1046                 if (items >= 3)
1047                     XPUSHs(sv_2mortal(newSVsv(ST(2))));
1048                 PUTBACK;
1049                 i = perl_call_method("new", G_SCALAR);
1050                 SPAGAIN;
1051                 if (i)
1052                     href = newSVsv(POPs);
1053
1054                 PUTBACK;
1055                 FREETMPS;
1056                 LEAVE;
1057                 if (i)
1058                     (void)sv_2mortal(href);
1059             }
1060
1061             todumpav = namesav = NULL;
1062             seenhv = NULL;
1063             val = pad = xpad = apad = sep = pair = varname
1064                 = freezer = toaster = bless = sortkeys = &PL_sv_undef;
1065             name = sv_newmortal();
1066             indent = 2;
1067             terse = purity = deepcopy = 0;
1068             quotekeys = 1;
1069         
1070             retval = newSVpvn("", 0);
1071             if (SvROK(href)
1072                 && (hv = (HV*)SvRV((SV*)href))
1073                 && SvTYPE(hv) == SVt_PVHV)              {
1074
1075                 if ((svp = hv_fetch(hv, "seen", 4, FALSE)) && SvROK(*svp))
1076                     seenhv = (HV*)SvRV(*svp);
1077                 if ((svp = hv_fetch(hv, "todump", 6, FALSE)) && SvROK(*svp))
1078                     todumpav = (AV*)SvRV(*svp);
1079                 if ((svp = hv_fetch(hv, "names", 5, FALSE)) && SvROK(*svp))
1080                     namesav = (AV*)SvRV(*svp);
1081                 if ((svp = hv_fetch(hv, "indent", 6, FALSE)))
1082                     indent = SvIV(*svp);
1083                 if ((svp = hv_fetch(hv, "purity", 6, FALSE)))
1084                     purity = SvIV(*svp);
1085                 if ((svp = hv_fetch(hv, "terse", 5, FALSE)))
1086                     terse = SvTRUE(*svp);
1087 #if 0 /* useqq currently unused */
1088                 if ((svp = hv_fetch(hv, "useqq", 5, FALSE)))
1089                     useqq = SvTRUE(*svp);
1090 #endif
1091                 if ((svp = hv_fetch(hv, "pad", 3, FALSE)))
1092                     pad = *svp;
1093                 if ((svp = hv_fetch(hv, "xpad", 4, FALSE)))
1094                     xpad = *svp;
1095                 if ((svp = hv_fetch(hv, "apad", 4, FALSE)))
1096                     apad = *svp;
1097                 if ((svp = hv_fetch(hv, "sep", 3, FALSE)))
1098                     sep = *svp;
1099                 if ((svp = hv_fetch(hv, "pair", 4, FALSE)))
1100                     pair = *svp;
1101                 if ((svp = hv_fetch(hv, "varname", 7, FALSE)))
1102                     varname = *svp;
1103                 if ((svp = hv_fetch(hv, "freezer", 7, FALSE)))
1104                     freezer = *svp;
1105                 if ((svp = hv_fetch(hv, "toaster", 7, FALSE)))
1106                     toaster = *svp;
1107                 if ((svp = hv_fetch(hv, "deepcopy", 8, FALSE)))
1108                     deepcopy = SvTRUE(*svp);
1109                 if ((svp = hv_fetch(hv, "quotekeys", 9, FALSE)))
1110                     quotekeys = SvTRUE(*svp);
1111                 if ((svp = hv_fetch(hv, "bless", 5, FALSE)))
1112                     bless = *svp;
1113                 if ((svp = hv_fetch(hv, "maxdepth", 8, FALSE)))
1114                     maxdepth = SvIV(*svp);
1115                 if ((svp = hv_fetch(hv, "sortkeys", 8, FALSE))) {
1116                     sortkeys = *svp;
1117                     if (! SvTRUE(sortkeys))
1118                         sortkeys = NULL;
1119                     else if (! (SvROK(sortkeys) &&
1120                                 SvTYPE(SvRV(sortkeys)) == SVt_PVCV) )
1121                     {
1122                         /* flag to use qsortsv() for sorting hash keys */       
1123                         sortkeys = &PL_sv_yes; 
1124                     }
1125                 }
1126                 postav = newAV();
1127
1128                 if (todumpav)
1129                     imax = av_len(todumpav);
1130                 else
1131                     imax = -1;
1132                 valstr = newSVpvn("",0);
1133                 for (i = 0; i <= imax; ++i) {
1134                     SV *newapad;
1135                 
1136                     av_clear(postav);
1137                     if ((svp = av_fetch(todumpav, i, FALSE)))
1138                         val = *svp;
1139                     else
1140                         val = &PL_sv_undef;
1141                     if ((svp = av_fetch(namesav, i, TRUE))) {
1142                         sv_setsv(name, *svp);
1143                         if (SvOK(*svp) && !SvPOK(*svp))
1144                             (void)SvPV_nolen_const(name);
1145                     }
1146                     else
1147                         (void)SvOK_off(name);
1148                 
1149                     if (SvPOK(name)) {
1150                         if ((SvPVX_const(name))[0] == '*') {
1151                             if (SvROK(val)) {
1152                                 switch (SvTYPE(SvRV(val))) {
1153                                 case SVt_PVAV:
1154                                     (SvPVX(name))[0] = '@';
1155                                     break;
1156                                 case SVt_PVHV:
1157                                     (SvPVX(name))[0] = '%';
1158                                     break;
1159                                 case SVt_PVCV:
1160                                     (SvPVX(name))[0] = '*';
1161                                     break;
1162                                 default:
1163                                     (SvPVX(name))[0] = '$';
1164                                     break;
1165                                 }
1166                             }
1167                             else
1168                                 (SvPVX(name))[0] = '$';
1169                         }
1170                         else if ((SvPVX_const(name))[0] != '$')
1171                             sv_insert(name, 0, 0, "$", 1);
1172                     }
1173                     else {
1174                         STRLEN nchars;
1175                         sv_setpvn(name, "$", 1);
1176                         sv_catsv(name, varname);
1177                         nchars = my_snprintf(tmpbuf, sizeof(tmpbuf), "%"IVdf, (IV)(i+1));
1178                         sv_catpvn(name, tmpbuf, nchars);
1179                     }
1180                 
1181                     if (indent >= 2) {
1182                         SV * const tmpsv = sv_x(aTHX_ NULL, " ", 1, SvCUR(name)+3);
1183                         newapad = newSVsv(apad);
1184                         sv_catsv(newapad, tmpsv);
1185                         SvREFCNT_dec(tmpsv);
1186                     }
1187                     else
1188                         newapad = apad;
1189                 
1190                     DD_dump(aTHX_ val, SvPVX_const(name), SvCUR(name), valstr, seenhv,
1191                             postav, &level, indent, pad, xpad, newapad, sep, pair,
1192                             freezer, toaster, purity, deepcopy, quotekeys,
1193                             bless, maxdepth, sortkeys);
1194                 
1195                     if (indent >= 2)
1196                         SvREFCNT_dec(newapad);
1197
1198                     postlen = av_len(postav);
1199                     if (postlen >= 0 || !terse) {
1200                         sv_insert(valstr, 0, 0, " = ", 3);
1201                         sv_insert(valstr, 0, 0, SvPVX_const(name), SvCUR(name));
1202                         sv_catpvn(valstr, ";", 1);
1203                     }
1204                     sv_catsv(retval, pad);
1205                     sv_catsv(retval, valstr);
1206                     sv_catsv(retval, sep);
1207                     if (postlen >= 0) {
1208                         I32 i;
1209                         sv_catsv(retval, pad);
1210                         for (i = 0; i <= postlen; ++i) {
1211                             SV *elem;
1212                             svp = av_fetch(postav, i, FALSE);
1213                             if (svp && (elem = *svp)) {
1214                                 sv_catsv(retval, elem);
1215                                 if (i < postlen) {
1216                                     sv_catpvn(retval, ";", 1);
1217                                     sv_catsv(retval, sep);
1218                                     sv_catsv(retval, pad);
1219                                 }
1220                             }
1221                         }
1222                         sv_catpvn(retval, ";", 1);
1223                             sv_catsv(retval, sep);
1224                     }
1225                     sv_setpvn(valstr, "", 0);
1226                     if (gimme == G_ARRAY) {
1227                         XPUSHs(sv_2mortal(retval));
1228                         if (i < imax)   /* not the last time thro ? */
1229                             retval = newSVpvn("",0);
1230                     }
1231                 }
1232                 SvREFCNT_dec(postav);
1233                 SvREFCNT_dec(valstr);
1234             }
1235             else
1236                 croak("Call to new() method failed to return HASH ref");
1237             if (gimme == G_SCALAR)
1238                 XPUSHs(sv_2mortal(retval));
1239         }