This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
z/OS: CPAN-ized ext/ and lib/
[perl5.git] / ext / Data / Dumper / Dumper.xs
1 #define PERL_NO_GET_CONTEXT
2 #include "EXTERN.h"
3 #include "perl.h"
4 #include "XSUB.h"
5
6 static I32 num_q (const char *s, STRLEN slen);
7 static I32 esc_q (char *dest, const char *src, STRLEN slen);
8 static I32 esc_q_utf8 (pTHX_ SV *sv, const char *src, STRLEN slen);
9 static I32 needs_quote(register const char *s);
10 static SV *sv_x (pTHX_ SV *sv, const char *str, STRLEN len, I32 n);
11 static I32 DD_dump (pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval,
12                     HV *seenhv, AV *postav, I32 *levelp, I32 indent,
13                     SV *pad, SV *xpad, SV *apad, SV *sep, SV *pair,
14                     SV *freezer, SV *toaster,
15                     I32 purity, I32 deepcopy, I32 quotekeys, SV *bless,
16                     I32 maxdepth, SV *sortkeys);
17
18 #ifndef HvNAME_get
19 #define HvNAME_get HvNAME
20 #endif
21
22 #if PERL_VERSION <= 6 /* Perl 5.6 and earlier */
23
24 # ifdef EBCDIC
25 #  define UNI_TO_NATIVE(ch) (((ch) > 255) ? (ch) : ASCII_TO_NATIVE(ch))
26 # else
27 #  define UNI_TO_NATIVE(ch) (ch)
28 # endif
29
30 UV
31 Perl_utf8_to_uvchr(pTHX_ U8 *s, STRLEN *retlen)
32 {
33     const UV uv = utf8_to_uv(s, UTF8_MAXLEN, retlen,
34                     ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
35     return UNI_TO_NATIVE(uv);
36 }
37
38 # if !defined(PERL_IMPLICIT_CONTEXT)
39 #  define utf8_to_uvchr      Perl_utf8_to_uvchr
40 # else
41 #  define utf8_to_uvchr(a,b) Perl_utf8_to_uvchr(aTHX_ a,b)
42 # endif
43
44 #endif /* PERL_VERSION <= 6 */
45
46 /* Changes in 5.7 series mean that now IOK is only set if scalar is
47    precisely integer but in 5.6 and earlier we need to do a more
48    complex test  */
49 #if PERL_VERSION <= 6
50 #define DD_is_integer(sv) (SvIOK(sv) && (SvIsUV(val) ? SvUV(sv) == SvNV(sv) : SvIV(sv) == SvNV(sv)))
51 #else
52 #define DD_is_integer(sv) SvIOK(sv)
53 #endif
54
55 /* does a string need to be protected? */
56 static I32
57 needs_quote(register const char *s)
58 {
59 TOP:
60     if (s[0] == ':') {
61         if (*++s) {
62             if (*s++ != ':')
63                 return 1;
64         }
65         else
66             return 1;
67     }
68     if (isIDFIRST(*s)) {
69         while (*++s)
70             if (!isALNUM(*s)) {
71                 if (*s == ':')
72                     goto TOP;
73                 else
74                     return 1;
75             }
76     }
77     else
78         return 1;
79     return 0;
80 }
81
82 /* count the number of "'"s and "\"s in string */
83 static I32
84 num_q(register const char *s, register STRLEN slen)
85 {
86     register I32 ret = 0;
87
88     while (slen > 0) {
89         if (*s == '\'' || *s == '\\')
90             ++ret;
91         ++s;
92         --slen;
93     }
94     return ret;
95 }
96
97
98 /* returns number of chars added to escape "'"s and "\"s in s */
99 /* slen number of characters in s will be escaped */
100 /* destination must be long enough for additional chars */
101 static I32
102 esc_q(register char *d, register const char *s, register STRLEN slen)
103 {
104     register I32 ret = 0;
105
106     while (slen > 0) {
107         switch (*s) {
108         case '\'':
109         case '\\':
110             *d = '\\';
111             ++d; ++ret;
112         default:
113             *d = *s;
114             ++d; ++s; --slen;
115             break;
116         }
117     }
118     return ret;
119 }
120
121 static I32
122 esc_q_utf8(pTHX_ SV* sv, register const char *src, register STRLEN slen)
123 {
124     char *r, *rstart;
125     const char *s = src;
126     const char * const send = src + slen;
127     STRLEN j, cur = SvCUR(sv);
128     /* Could count 128-255 and 256+ in two variables, if we want to
129        be like &qquote and make a distinction.  */
130     STRLEN grow = 0;    /* bytes needed to represent chars 128+ */
131     /* STRLEN topbit_grow = 0;  bytes needed to represent chars 128-255 */
132     STRLEN backslashes = 0;
133     STRLEN single_quotes = 0;
134     STRLEN qq_escapables = 0;   /* " $ @ will need a \ in "" strings.  */
135     STRLEN normal = 0;
136
137     /* this will need EBCDICification */
138     for (s = src; s < send; s += UTF8SKIP(s)) {
139         const UV k = utf8_to_uvchr((U8*)s, NULL);
140
141 #ifdef EBCDIC
142         if (!isprint(k) || k > 256) {
143 #else
144         if (k > 127) {
145 #endif
146             /* 4: \x{} then count the number of hex digits.  */
147             grow += 4 + (k <= 0xFF ? 2 : k <= 0xFFF ? 3 : k <= 0xFFFF ? 4 :
148 #if UVSIZE == 4
149                 8 /* We may allocate a bit more than the minimum here.  */
150 #else
151                 k <= 0xFFFFFFFF ? 8 : UVSIZE * 4
152 #endif
153                 );
154         } else if (k == '\\') {
155             backslashes++;
156         } else if (k == '\'') {
157             single_quotes++;
158         } else if (k == '"' || k == '$' || k == '@') {
159             qq_escapables++;
160         } else {
161             normal++;
162         }
163     }
164     if (grow) {
165         /* We have something needing hex. 3 is ""\0 */
166         sv_grow(sv, cur + 3 + grow + 2*backslashes + single_quotes
167                 + 2*qq_escapables + normal);
168         rstart = r = SvPVX(sv) + cur;
169
170         *r++ = '"';
171
172         for (s = src; s < send; s += UTF8SKIP(s)) {
173             const UV k = utf8_to_uvchr((U8*)s, NULL);
174
175             if (k == '"' || k == '\\' || k == '$' || k == '@') {
176                 *r++ = '\\';
177                 *r++ = (char)k;
178             }
179             else
180 #ifdef EBCDIC
181               if (isprint(k) && k < 256)
182 #else
183               if (k < 0x80)
184 #endif
185                 *r++ = (char)k;
186             else {
187               /* The return value of sprintf() is unportable.
188                * In modern systems it returns (int) the number of characters,
189                * but in older systems it might return (char*) the original
190                * buffer, or it might even be (void).  The easiest portable
191                * thing to do is probably use sprintf() in void context and
192                * then strlen(buffer) for the length.  The more proper way
193                * would of course be to figure out the prototype of sprintf.
194                * --jhi */
195                 sprintf(r, "\\x{%"UVxf"}", k);
196                 r += strlen(r);
197             }
198         }
199         *r++ = '"';
200     } else {
201         /* Single quotes.  */
202         sv_grow(sv, cur + 3 + 2*backslashes + 2*single_quotes
203                 + qq_escapables + normal);
204         rstart = r = SvPVX(sv) + cur;
205         *r++ = '\'';
206         for (s = src; s < send; s ++) {
207             const char k = *s;
208             if (k == '\'' || k == '\\')
209                 *r++ = '\\';
210             *r++ = k;
211         }
212         *r++ = '\'';
213     }
214     *r = '\0';
215     j = r - rstart;
216     SvCUR_set(sv, cur + j);
217
218     return j;
219 }
220
221 /* append a repeated string to an SV */
222 static SV *
223 sv_x(pTHX_ SV *sv, const char *str, STRLEN len, I32 n)
224 {
225     if (!sv)
226         sv = newSVpvn("", 0);
227 #ifdef DEBUGGING
228     else
229         assert(SvTYPE(sv) >= SVt_PV);
230 #endif
231
232     if (n > 0) {
233         SvGROW(sv, len*n + SvCUR(sv) + 1);
234         if (len == 1) {
235             char * const start = SvPVX(sv) + SvCUR(sv);
236             SvCUR_set(sv, SvCUR(sv) + n);
237             start[n] = '\0';
238             while (n > 0)
239                 start[--n] = str[0];
240         }
241         else
242             while (n > 0) {
243                 sv_catpvn(sv, str, len);
244                 --n;
245             }
246     }
247     return sv;
248 }
249
250 /*
251  * This ought to be split into smaller functions. (it is one long function since
252  * it exactly parallels the perl version, which was one long thing for
253  * efficiency raisins.)  Ugggh!
254  */
255 static I32
256 DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
257         AV *postav, I32 *levelp, I32 indent, SV *pad, SV *xpad,
258         SV *apad, SV *sep, SV *pair, SV *freezer, SV *toaster, I32 purity,
259         I32 deepcopy, I32 quotekeys, SV *bless, I32 maxdepth, SV *sortkeys)
260 {
261     char tmpbuf[128];
262     U32 i;
263     char *c, *r, *realpack, id[128];
264     SV **svp;
265     SV *sv, *ipad, *ival;
266     SV *blesspad = Nullsv;
267     AV *seenentry = NULL;
268     char *iname;
269     STRLEN inamelen, idlen = 0;
270     U32 realtype;
271
272     if (!val)
273         return 0;
274
275     realtype = SvTYPE(val);
276
277     if (SvGMAGICAL(val))
278         mg_get(val);
279     if (SvROK(val)) {
280
281         /* If a freeze method is provided and the object has it, call
282            it.  Warn on errors. */
283         if (SvOBJECT(SvRV(val)) && freezer &&
284             SvPOK(freezer) && SvCUR(freezer) &&
285             gv_fetchmeth(SvSTASH(SvRV(val)), SvPVX_const(freezer), 
286                          SvCUR(freezer), -1) != NULL)
287         {
288             dSP; ENTER; SAVETMPS; PUSHMARK(sp);
289             XPUSHs(val); PUTBACK;
290             i = perl_call_method(SvPVX_const(freezer), G_EVAL|G_VOID);
291             SPAGAIN;
292             if (SvTRUE(ERRSV))
293                 warn("WARNING(Freezer method call failed): %"SVf"", ERRSV);
294             PUTBACK; FREETMPS; LEAVE;
295         }
296         
297         ival = SvRV(val);
298         realtype = SvTYPE(ival);
299         (void) sprintf(id, "0x%"UVxf, PTR2UV(ival));
300         idlen = strlen(id);
301         if (SvOBJECT(ival))
302             realpack = HvNAME_get(SvSTASH(ival));
303         else
304             realpack = Nullch;
305
306         /* if it has a name, we need to either look it up, or keep a tab
307          * on it so we know when we hit it later
308          */
309         if (namelen) {
310             if ((svp = hv_fetch(seenhv, id, idlen, FALSE))
311                 && (sv = *svp) && SvROK(sv) && (seenentry = (AV*)SvRV(sv)))
312             {
313                 SV *othername;
314                 if ((svp = av_fetch(seenentry, 0, FALSE))
315                     && (othername = *svp))
316                 {
317                     if (purity && *levelp > 0) {
318                         SV *postentry;
319                         
320                         if (realtype == SVt_PVHV)
321                             sv_catpvn(retval, "{}", 2);
322                         else if (realtype == SVt_PVAV)
323                             sv_catpvn(retval, "[]", 2);
324                         else
325                             sv_catpvn(retval, "do{my $o}", 9);
326                         postentry = newSVpvn(name, namelen);
327                         sv_catpvn(postentry, " = ", 3);
328                         sv_catsv(postentry, othername);
329                         av_push(postav, postentry);
330                     }
331                     else {
332                         if (name[0] == '@' || name[0] == '%') {
333                             if ((SvPVX_const(othername))[0] == '\\' &&
334                                 (SvPVX_const(othername))[1] == name[0]) {
335                                 sv_catpvn(retval, SvPVX_const(othername)+1,
336                                           SvCUR(othername)-1);
337                             }
338                             else {
339                                 sv_catpvn(retval, name, 1);
340                                 sv_catpvn(retval, "{", 1);
341                                 sv_catsv(retval, othername);
342                                 sv_catpvn(retval, "}", 1);
343                             }
344                         }
345                         else
346                             sv_catsv(retval, othername);
347                     }
348                     return 1;
349                 }
350                 else {
351                     warn("ref name not found for %s", id);
352                     return 0;
353                 }
354             }
355             else {   /* store our name and continue */
356                 SV *namesv;
357                 if (name[0] == '@' || name[0] == '%') {
358                     namesv = newSVpvn("\\", 1);
359                     sv_catpvn(namesv, name, namelen);
360                 }
361                 else if (realtype == SVt_PVCV && name[0] == '*') {
362                     namesv = newSVpvn("\\", 2);
363                     sv_catpvn(namesv, name, namelen);
364                     (SvPVX(namesv))[1] = '&';
365                 }
366                 else
367                     namesv = newSVpvn(name, namelen);
368                 seenentry = newAV();
369                 av_push(seenentry, namesv);
370                 (void)SvREFCNT_inc(val);
371                 av_push(seenentry, val);
372                 (void)hv_store(seenhv, id, strlen(id),
373                                newRV_inc((SV*)seenentry), 0);
374                 SvREFCNT_dec(seenentry);
375             }
376         }
377
378         if (realpack && *realpack == 'R' && strEQ(realpack, "Regexp")) {
379             STRLEN rlen;
380             const char *rval = SvPV(val, rlen);
381             const char *slash = strchr(rval, '/');
382             sv_catpvn(retval, "qr/", 3);
383             while (slash) {
384                 sv_catpvn(retval, rval, slash-rval);
385                 sv_catpvn(retval, "\\/", 2);
386                 rlen -= slash-rval+1;
387                 rval = slash+1;
388                 slash = strchr(rval, '/');
389             }
390             sv_catpvn(retval, rval, rlen);
391             sv_catpvn(retval, "/", 1);
392             return 1;
393         }
394
395         /* If purity is not set and maxdepth is set, then check depth:
396          * if we have reached maximum depth, return the string
397          * representation of the thing we are currently examining
398          * at this depth (i.e., 'Foo=ARRAY(0xdeadbeef)').
399          */
400         if (!purity && maxdepth > 0 && *levelp >= maxdepth) {
401             STRLEN vallen;
402             const char * const valstr = SvPV(val,vallen);
403             sv_catpvn(retval, "'", 1);
404             sv_catpvn(retval, valstr, vallen);
405             sv_catpvn(retval, "'", 1);
406             return 1;
407         }
408
409         if (realpack) {                         /* we have a blessed ref */
410             STRLEN blesslen;
411             const char * const blessstr = SvPV(bless, blesslen);
412             sv_catpvn(retval, blessstr, blesslen);
413             sv_catpvn(retval, "( ", 2);
414             if (indent >= 2) {
415                 blesspad = apad;
416                 apad = newSVsv(apad);
417                 sv_x(aTHX_ apad, " ", 1, blesslen+2);
418             }
419         }
420
421         (*levelp)++;
422         ipad = sv_x(aTHX_ Nullsv, SvPVX_const(xpad), SvCUR(xpad), *levelp);
423
424         if (realtype <= SVt_PVBM) {                          /* scalar ref */
425             SV * const namesv = newSVpvn("${", 2);
426             sv_catpvn(namesv, name, namelen);
427             sv_catpvn(namesv, "}", 1);
428             if (realpack) {                                  /* blessed */
429                 sv_catpvn(retval, "do{\\(my $o = ", 13);
430                 DD_dump(aTHX_ ival, SvPVX_const(namesv), SvCUR(namesv), retval, seenhv,
431                         postav, levelp, indent, pad, xpad, apad, sep, pair,
432                         freezer, toaster, purity, deepcopy, quotekeys, bless,
433                         maxdepth, sortkeys);
434                 sv_catpvn(retval, ")}", 2);
435             }                                                /* plain */
436             else {
437                 sv_catpvn(retval, "\\", 1);
438                 DD_dump(aTHX_ ival, SvPVX_const(namesv), SvCUR(namesv), retval, seenhv,
439                         postav, levelp, indent, pad, xpad, apad, sep, pair,
440                         freezer, toaster, purity, deepcopy, quotekeys, bless,
441                         maxdepth, sortkeys);
442             }
443             SvREFCNT_dec(namesv);
444         }
445         else if (realtype == SVt_PVGV) {                     /* glob ref */
446             SV * const namesv = newSVpvn("*{", 2);
447             sv_catpvn(namesv, name, namelen);
448             sv_catpvn(namesv, "}", 1);
449             sv_catpvn(retval, "\\", 1);
450             DD_dump(aTHX_ ival, SvPVX_const(namesv), SvCUR(namesv), retval, seenhv,
451                     postav, levelp,     indent, pad, xpad, apad, sep, pair,
452                     freezer, toaster, purity, deepcopy, quotekeys, bless,
453                     maxdepth, sortkeys);
454             SvREFCNT_dec(namesv);
455         }
456         else if (realtype == SVt_PVAV) {
457             SV *totpad;
458             I32 ix = 0;
459             const I32 ixmax = av_len((AV *)ival);
460         
461             SV * const ixsv = newSViv(0);
462             /* allowing for a 24 char wide array index */
463             New(0, iname, namelen+28, char);
464             (void)strcpy(iname, name);
465             inamelen = namelen;
466             if (name[0] == '@') {
467                 sv_catpvn(retval, "(", 1);
468                 iname[0] = '$';
469             }
470             else {
471                 sv_catpvn(retval, "[", 1);
472                 /* omit "->" in $foo{bar}->[0], but not in ${$foo}->[0] */
473                 /*if (namelen > 0
474                     && name[namelen-1] != ']' && name[namelen-1] != '}'
475                     && (namelen < 4 || (name[1] != '{' && name[2] != '{')))*/
476                 if ((namelen > 0
477                      && name[namelen-1] != ']' && name[namelen-1] != '}')
478                     || (namelen > 4
479                         && (name[1] == '{'
480                             || (name[0] == '\\' && name[2] == '{'))))
481                 {
482                     iname[inamelen++] = '-'; iname[inamelen++] = '>';
483                     iname[inamelen] = '\0';
484                 }
485             }
486             if (iname[0] == '*' && iname[inamelen-1] == '}' && inamelen >= 8 &&
487                 (instr(iname+inamelen-8, "{SCALAR}") ||
488                  instr(iname+inamelen-7, "{ARRAY}") ||
489                  instr(iname+inamelen-6, "{HASH}"))) {
490                 iname[inamelen++] = '-'; iname[inamelen++] = '>';
491             }
492             iname[inamelen++] = '['; iname[inamelen] = '\0';
493             totpad = newSVsv(sep);
494             sv_catsv(totpad, pad);
495             sv_catsv(totpad, apad);
496
497             for (ix = 0; ix <= ixmax; ++ix) {
498                 STRLEN ilen;
499                 SV *elem;
500                 svp = av_fetch((AV*)ival, ix, FALSE);
501                 if (svp)
502                     elem = *svp;
503                 else
504                     elem = &PL_sv_undef;
505                 
506                 ilen = inamelen;
507                 sv_setiv(ixsv, ix);
508                 (void) sprintf(iname+ilen, "%"IVdf, (IV)ix);
509                 ilen = strlen(iname);
510                 iname[ilen++] = ']'; iname[ilen] = '\0';
511                 if (indent >= 3) {
512                     sv_catsv(retval, totpad);
513                     sv_catsv(retval, ipad);
514                     sv_catpvn(retval, "#", 1);
515                     sv_catsv(retval, ixsv);
516                 }
517                 sv_catsv(retval, totpad);
518                 sv_catsv(retval, ipad);
519                 DD_dump(aTHX_ elem, iname, ilen, retval, seenhv, postav,
520                         levelp, indent, pad, xpad, apad, sep, pair,
521                         freezer, toaster, purity, deepcopy, quotekeys, bless,
522                         maxdepth, sortkeys);
523                 if (ix < ixmax)
524                     sv_catpvn(retval, ",", 1);
525             }
526             if (ixmax >= 0) {
527                 SV * const opad = sv_x(aTHX_ Nullsv, SvPVX_const(xpad), SvCUR(xpad), (*levelp)-1);
528                 sv_catsv(retval, totpad);
529                 sv_catsv(retval, opad);
530                 SvREFCNT_dec(opad);
531             }
532             if (name[0] == '@')
533                 sv_catpvn(retval, ")", 1);
534             else
535                 sv_catpvn(retval, "]", 1);
536             SvREFCNT_dec(ixsv);
537             SvREFCNT_dec(totpad);
538             Safefree(iname);
539         }
540         else if (realtype == SVt_PVHV) {
541             SV *totpad, *newapad;
542             SV *sname;
543             HE *entry;
544             char *key;
545             I32 klen;
546             SV *hval;
547             AV *keys = NULL;
548         
549             SV * const iname = newSVpvn(name, namelen);
550             if (name[0] == '%') {
551                 sv_catpvn(retval, "(", 1);
552                 (SvPVX(iname))[0] = '$';
553             }
554             else {
555                 sv_catpvn(retval, "{", 1);
556                 /* omit "->" in $foo[0]->{bar}, but not in ${$foo}->{bar} */
557                 if ((namelen > 0
558                      && name[namelen-1] != ']' && name[namelen-1] != '}')
559                     || (namelen > 4
560                         && (name[1] == '{'
561                             || (name[0] == '\\' && name[2] == '{'))))
562                 {
563                     sv_catpvn(iname, "->", 2);
564                 }
565             }
566             if (name[0] == '*' && name[namelen-1] == '}' && namelen >= 8 &&
567                 (instr(name+namelen-8, "{SCALAR}") ||
568                  instr(name+namelen-7, "{ARRAY}") ||
569                  instr(name+namelen-6, "{HASH}"))) {
570                 sv_catpvn(iname, "->", 2);
571             }
572             sv_catpvn(iname, "{", 1);
573             totpad = newSVsv(sep);
574             sv_catsv(totpad, pad);
575             sv_catsv(totpad, apad);
576         
577             /* If requested, get a sorted/filtered array of hash keys */
578             if (sortkeys) {
579                 if (sortkeys == &PL_sv_yes) {
580 #if PERL_VERSION < 8
581                     sortkeys = sv_2mortal(newSVpvn("Data::Dumper::_sortkeys", 23));
582 #else
583                     keys = newAV();
584                     (void)hv_iterinit((HV*)ival);
585                     while ((entry = hv_iternext((HV*)ival))) {
586                         sv = hv_iterkeysv(entry);
587                         SvREFCNT_inc(sv);
588                         av_push(keys, sv);
589                     }
590 # ifdef USE_LOCALE_NUMERIC
591                     sortsv(AvARRAY(keys), 
592                            av_len(keys)+1, 
593                            IN_LOCALE ? Perl_sv_cmp_locale : Perl_sv_cmp);
594 # else
595                     sortsv(AvARRAY(keys), 
596                            av_len(keys)+1, 
597                            Perl_sv_cmp);
598 # endif
599 #endif
600                 }
601                 if (sortkeys != &PL_sv_yes) {
602                     dSP; ENTER; SAVETMPS; PUSHMARK(sp);
603                     XPUSHs(sv_2mortal(newRV_inc(ival))); PUTBACK;
604                     i = perl_call_sv(sortkeys, G_SCALAR | G_EVAL);
605                     SPAGAIN;
606                     if (i) {
607                         sv = POPs;
608                         if (SvROK(sv) && (SvTYPE(SvRV(sv)) == SVt_PVAV))
609                             keys = (AV*)SvREFCNT_inc(SvRV(sv));
610                     }
611                     if (! keys)
612                         warn("Sortkeys subroutine did not return ARRAYREF\n");
613                     PUTBACK; FREETMPS; LEAVE;
614                 }
615                 if (keys)
616                     sv_2mortal((SV*)keys);
617             }
618             else
619                 (void)hv_iterinit((HV*)ival);
620
621             /* foreach (keys %hash) */
622             for (i = 0; 1; i++) {
623                 char *nkey;
624                 char *nkey_buffer = NULL;
625                 I32 nticks = 0;
626                 SV* keysv;
627                 STRLEN keylen;
628                 I32 nlen;
629                 bool do_utf8 = FALSE;
630
631                if (sortkeys) {
632                    if (!(keys && (I32)i <= av_len(keys))) break;
633                } else {
634                    if (!(entry = hv_iternext((HV *)ival))) break;
635                }
636
637                 if (i)
638                     sv_catpvn(retval, ",", 1);
639
640                 if (sortkeys) {
641                     char *key;
642                     svp = av_fetch(keys, i, FALSE);
643                     keysv = svp ? *svp : sv_mortalcopy(&PL_sv_undef);
644                     key = SvPV(keysv, keylen);
645                     svp = hv_fetch((HV*)ival, key,
646                                    SvUTF8(keysv) ? -(I32)keylen : keylen, 0);
647                     hval = svp ? *svp : sv_mortalcopy(&PL_sv_undef);
648                 }
649                 else {
650                     keysv = hv_iterkeysv(entry);
651                     hval = hv_iterval((HV*)ival, entry);
652                 }
653
654                 key = SvPV(keysv, keylen);
655                 do_utf8 = DO_UTF8(keysv);
656                 klen = keylen;
657
658                 sv_catsv(retval, totpad);
659                 sv_catsv(retval, ipad);
660                 /* old logic was first to check utf8 flag, and if utf8 always
661                    call esc_q_utf8.  This caused test to break under -Mutf8,
662                    because there even strings like 'c' have utf8 flag on.
663                    Hence with quotekeys == 0 the XS code would still '' quote
664                    them based on flags, whereas the perl code would not,
665                    based on regexps.
666                    The perl code is correct.
667                    needs_quote() decides that anything that isn't a valid
668                    perl identifier needs to be quoted, hence only correctly
669                    formed strings with no characters outside [A-Za-z0-9_:]
670                    won't need quoting.  None of those characters are used in
671                    the byte encoding of utf8, so anything with utf8
672                    encoded characters in will need quoting. Hence strings
673                    with utf8 encoded characters in will end up inside do_utf8
674                    just like before, but now strings with utf8 flag set but
675                    only ascii characters will end up in the unquoted section.
676
677                    There should also be less tests for the (probably currently)
678                    more common doesn't need quoting case.
679                    The code is also smaller (22044 vs 22260) because I've been
680                    able to pull the common logic out to both sides.  */
681                 if (quotekeys || needs_quote(key)) {
682                     if (do_utf8) {
683                         STRLEN ocur = SvCUR(retval);
684                         nlen = esc_q_utf8(aTHX_ retval, key, klen);
685                         nkey = SvPVX(retval) + ocur;
686                     }
687                     else {
688                         nticks = num_q(key, klen);
689                         New(0, nkey_buffer, klen+nticks+3, char);
690                         nkey = nkey_buffer;
691                         nkey[0] = '\'';
692                         if (nticks)
693                             klen += esc_q(nkey+1, key, klen);
694                         else
695                             (void)Copy(key, nkey+1, klen, char);
696                         nkey[++klen] = '\'';
697                         nkey[++klen] = '\0';
698                         nlen = klen;
699                         sv_catpvn(retval, nkey, klen);
700                     }
701                 }
702                 else {
703                     nkey = key;
704                     nlen = klen;
705                     sv_catpvn(retval, nkey, klen);
706                 }
707                 sname = newSVsv(iname);
708                 sv_catpvn(sname, nkey, nlen);
709                 sv_catpvn(sname, "}", 1);
710
711                 sv_catsv(retval, pair);
712                 if (indent >= 2) {
713                     char *extra;
714                     I32 elen = 0;
715                     newapad = newSVsv(apad);
716                     New(0, extra, klen+4+1, char);
717                     while (elen < (klen+4))
718                         extra[elen++] = ' ';
719                     extra[elen] = '\0';
720                     sv_catpvn(newapad, extra, elen);
721                     Safefree(extra);
722                 }
723                 else
724                     newapad = apad;
725
726                 DD_dump(aTHX_ hval, SvPVX_const(sname), SvCUR(sname), retval, seenhv,
727                         postav, levelp, indent, pad, xpad, newapad, sep, pair,
728                         freezer, toaster, purity, deepcopy, quotekeys, bless,
729                         maxdepth, sortkeys);
730                 SvREFCNT_dec(sname);
731                 Safefree(nkey_buffer);
732                 if (indent >= 2)
733                     SvREFCNT_dec(newapad);
734             }
735             if (i) {
736                 SV *opad = sv_x(aTHX_ Nullsv, SvPVX_const(xpad), SvCUR(xpad), *levelp-1);
737                 sv_catsv(retval, totpad);
738                 sv_catsv(retval, opad);
739                 SvREFCNT_dec(opad);
740             }
741             if (name[0] == '%')
742                 sv_catpvn(retval, ")", 1);
743             else
744                 sv_catpvn(retval, "}", 1);
745             SvREFCNT_dec(iname);
746             SvREFCNT_dec(totpad);
747         }
748         else if (realtype == SVt_PVCV) {
749             sv_catpvn(retval, "sub { \"DUMMY\" }", 15);
750             if (purity)
751                 warn("Encountered CODE ref, using dummy placeholder");
752         }
753         else {
754             warn("cannot handle ref type %ld", realtype);
755         }
756
757         if (realpack) {  /* free blessed allocs */
758             if (indent >= 2) {
759                 SvREFCNT_dec(apad);
760                 apad = blesspad;
761             }
762             sv_catpvn(retval, ", '", 3);
763             sv_catpvn(retval, realpack, strlen(realpack));
764             sv_catpvn(retval, "' )", 3);
765             if (toaster && SvPOK(toaster) && SvCUR(toaster)) {
766                 sv_catpvn(retval, "->", 2);
767                 sv_catsv(retval, toaster);
768                 sv_catpvn(retval, "()", 2);
769             }
770         }
771         SvREFCNT_dec(ipad);
772         (*levelp)--;
773     }
774     else {
775         STRLEN i;
776         
777         if (namelen) {
778             (void) sprintf(id, "0x%"UVxf, PTR2UV(val));
779             if ((svp = hv_fetch(seenhv, id, (idlen = strlen(id)), FALSE)) &&
780                 (sv = *svp) && SvROK(sv) &&
781                 (seenentry = (AV*)SvRV(sv)))
782             {
783                 SV *othername;
784                 if ((svp = av_fetch(seenentry, 0, FALSE)) && (othername = *svp)
785                     && (svp = av_fetch(seenentry, 2, FALSE)) && *svp && SvIV(*svp) > 0)
786                 {
787                     sv_catpvn(retval, "${", 2);
788                     sv_catsv(retval, othername);
789                     sv_catpvn(retval, "}", 1);
790                     return 1;
791                 }
792             }
793             else if (val != &PL_sv_undef) {
794                 SV * const namesv = newSVpvn("\\", 1);
795                 sv_catpvn(namesv, name, namelen);
796                 seenentry = newAV();
797                 av_push(seenentry, namesv);
798                 av_push(seenentry, newRV_inc(val));
799                 (void)hv_store(seenhv, id, strlen(id), newRV_inc((SV*)seenentry), 0);
800                 SvREFCNT_dec(seenentry);
801             }
802         }
803
804         if (DD_is_integer(val)) {
805             STRLEN len;
806             if (SvIsUV(val))
807               (void) sprintf(tmpbuf, "%"UVuf, SvUV(val));
808             else
809               (void) sprintf(tmpbuf, "%"IVdf, SvIV(val));
810             len = strlen(tmpbuf);
811             if (SvPOK(val)) {
812               /* Need to check to see if this is a string such as " 0".
813                  I'm assuming from sprintf isn't going to clash with utf8.
814                  Is this valid on EBCDIC?  */
815               STRLEN pvlen;
816               const char * const pv = SvPV(val, pvlen);
817               if (pvlen != len || memNE(pv, tmpbuf, len))
818                 goto integer_came_from_string;
819             }
820             if (len > 10) {
821               /* Looks like we're on a 64 bit system.  Make it a string so that
822                  if a 32 bit system reads the number it will cope better.  */
823               sv_catpvf(retval, "'%s'", tmpbuf);
824             } else
825               sv_catpvn(retval, tmpbuf, len);
826         }
827         else if (realtype == SVt_PVGV) {/* GLOBs can end up with scribbly names */
828             c = SvPV(val, i);
829             ++c; --i;                   /* just get the name */
830             if (i >= 6 && strncmp(c, "main::", 6) == 0) {
831                 c += 4;
832                 i -= 4;
833             }
834             if (needs_quote(c)) {
835                 sv_grow(retval, SvCUR(retval)+6+2*i);
836                 r = SvPVX(retval)+SvCUR(retval);
837                 r[0] = '*'; r[1] = '{'; r[2] = '\'';
838                 i += esc_q(r+3, c, i);
839                 i += 3;
840                 r[i++] = '\''; r[i++] = '}';
841                 r[i] = '\0';
842             }
843             else {
844                 sv_grow(retval, SvCUR(retval)+i+2);
845                 r = SvPVX(retval)+SvCUR(retval);
846                 r[0] = '*'; strcpy(r+1, c);
847                 i++;
848             }
849             SvCUR_set(retval, SvCUR(retval)+i);
850
851             if (purity) {
852                 static const char* const entries[] = { "{SCALAR}", "{ARRAY}", "{HASH}" };
853                 static const STRLEN sizes[] = { 8, 7, 6 };
854                 SV *e;
855                 SV * const nname = newSVpvn("", 0);
856                 SV * const newapad = newSVpvn("", 0);
857                 GV * const gv = (GV*)val;
858                 I32 j;
859                 
860                 for (j=0; j<3; j++) {
861                     e = ((j == 0) ? GvSV(gv) : (j == 1) ? (SV*)GvAV(gv) : (SV*)GvHV(gv));
862                     if (!e)
863                         continue;
864                     if (j == 0 && !SvOK(e))
865                         continue;
866
867                     {
868                         I32 nlevel = 0;
869                         SV *postentry = newSVpvn(r,i);
870                         
871                         sv_setsv(nname, postentry);
872                         sv_catpvn(nname, entries[j], sizes[j]);
873                         sv_catpvn(postentry, " = ", 3);
874                         av_push(postav, postentry);
875                         e = newRV_inc(e);
876                         
877                         SvCUR_set(newapad, 0);
878                         if (indent >= 2)
879                             (void)sv_x(aTHX_ newapad, " ", 1, SvCUR(postentry));
880                         
881                         DD_dump(aTHX_ e, SvPVX_const(nname), SvCUR(nname), postentry,
882                                 seenhv, postav, &nlevel, indent, pad, xpad,
883                                 newapad, sep, pair, freezer, toaster, purity,
884                                 deepcopy, quotekeys, bless, maxdepth, 
885                                 sortkeys);
886                         SvREFCNT_dec(e);
887                     }
888                 }
889                 
890                 SvREFCNT_dec(newapad);
891                 SvREFCNT_dec(nname);
892             }
893         }
894         else if (val == &PL_sv_undef || !SvOK(val)) {
895             sv_catpvn(retval, "undef", 5);
896         }
897         else {
898         integer_came_from_string:
899             c = SvPV(val, i);
900             if (DO_UTF8(val))
901                 i += esc_q_utf8(aTHX_ retval, c, i);
902             else {
903                 sv_grow(retval, SvCUR(retval)+3+2*i); /* 3: ""\0 */
904                 r = SvPVX(retval) + SvCUR(retval);
905                 r[0] = '\'';
906                 i += esc_q(r+1, c, i);
907                 ++i;
908                 r[i++] = '\'';
909                 r[i] = '\0';
910                 SvCUR_set(retval, SvCUR(retval)+i);
911             }
912         }
913     }
914
915     if (idlen) {
916         if (deepcopy)
917             (void)hv_delete(seenhv, id, idlen, G_DISCARD);
918         else if (namelen && seenentry) {
919             SV *mark = *av_fetch(seenentry, 2, TRUE);
920             sv_setiv(mark,1);
921         }
922     }
923     return 1;
924 }
925
926
927 MODULE = Data::Dumper           PACKAGE = Data::Dumper         PREFIX = Data_Dumper_
928
929 #
930 # This is the exact equivalent of Dump.  Well, almost. The things that are
931 # different as of now (due to Laziness):
932 #   * doesnt do double-quotes yet.
933 #
934
935 void
936 Data_Dumper_Dumpxs(href, ...)
937         SV      *href;
938         PROTOTYPE: $;$$
939         PPCODE:
940         {
941             HV *hv;
942             SV *retval, *valstr;
943             HV *seenhv = NULL;
944             AV *postav, *todumpav, *namesav;
945             I32 level = 0;
946             I32 indent, terse, i, imax, postlen;
947             SV **svp;
948             SV *val, *name, *pad, *xpad, *apad, *sep, *pair, *varname;
949             SV *freezer, *toaster, *bless, *sortkeys;
950             I32 purity, deepcopy, quotekeys, maxdepth = 0;
951             char tmpbuf[1024];
952             I32 gimme = GIMME;
953
954             if (!SvROK(href)) {         /* call new to get an object first */
955                 if (items < 2)
956                     croak("Usage: Data::Dumper::Dumpxs(PACKAGE, VAL_ARY_REF, [NAME_ARY_REF])");
957                 
958                 ENTER;
959                 SAVETMPS;
960                 
961                 PUSHMARK(sp);
962                 XPUSHs(href);
963                 XPUSHs(sv_2mortal(newSVsv(ST(1))));
964                 if (items >= 3)
965                     XPUSHs(sv_2mortal(newSVsv(ST(2))));
966                 PUTBACK;
967                 i = perl_call_method("new", G_SCALAR);
968                 SPAGAIN;
969                 if (i)
970                     href = newSVsv(POPs);
971
972                 PUTBACK;
973                 FREETMPS;
974                 LEAVE;
975                 if (i)
976                     (void)sv_2mortal(href);
977             }
978
979             todumpav = namesav = NULL;
980             seenhv = NULL;
981             val = pad = xpad = apad = sep = pair = varname
982                 = freezer = toaster = bless = sortkeys = &PL_sv_undef;
983             name = sv_newmortal();
984             indent = 2;
985             terse = purity = deepcopy = 0;
986             quotekeys = 1;
987         
988             retval = newSVpvn("", 0);
989             if (SvROK(href)
990                 && (hv = (HV*)SvRV((SV*)href))
991                 && SvTYPE(hv) == SVt_PVHV)              {
992
993                 if ((svp = hv_fetch(hv, "seen", 4, FALSE)) && SvROK(*svp))
994                     seenhv = (HV*)SvRV(*svp);
995                 if ((svp = hv_fetch(hv, "todump", 6, FALSE)) && SvROK(*svp))
996                     todumpav = (AV*)SvRV(*svp);
997                 if ((svp = hv_fetch(hv, "names", 5, FALSE)) && SvROK(*svp))
998                     namesav = (AV*)SvRV(*svp);
999                 if ((svp = hv_fetch(hv, "indent", 6, FALSE)))
1000                     indent = SvIV(*svp);
1001                 if ((svp = hv_fetch(hv, "purity", 6, FALSE)))
1002                     purity = SvIV(*svp);
1003                 if ((svp = hv_fetch(hv, "terse", 5, FALSE)))
1004                     terse = SvTRUE(*svp);
1005 #if 0 /* useqq currently unused */
1006                 if ((svp = hv_fetch(hv, "useqq", 5, FALSE)))
1007                     useqq = SvTRUE(*svp);
1008 #endif
1009                 if ((svp = hv_fetch(hv, "pad", 3, FALSE)))
1010                     pad = *svp;
1011                 if ((svp = hv_fetch(hv, "xpad", 4, FALSE)))
1012                     xpad = *svp;
1013                 if ((svp = hv_fetch(hv, "apad", 4, FALSE)))
1014                     apad = *svp;
1015                 if ((svp = hv_fetch(hv, "sep", 3, FALSE)))
1016                     sep = *svp;
1017                 if ((svp = hv_fetch(hv, "pair", 4, FALSE)))
1018                     pair = *svp;
1019                 if ((svp = hv_fetch(hv, "varname", 7, FALSE)))
1020                     varname = *svp;
1021                 if ((svp = hv_fetch(hv, "freezer", 7, FALSE)))
1022                     freezer = *svp;
1023                 if ((svp = hv_fetch(hv, "toaster", 7, FALSE)))
1024                     toaster = *svp;
1025                 if ((svp = hv_fetch(hv, "deepcopy", 8, FALSE)))
1026                     deepcopy = SvTRUE(*svp);
1027                 if ((svp = hv_fetch(hv, "quotekeys", 9, FALSE)))
1028                     quotekeys = SvTRUE(*svp);
1029                 if ((svp = hv_fetch(hv, "bless", 5, FALSE)))
1030                     bless = *svp;
1031                 if ((svp = hv_fetch(hv, "maxdepth", 8, FALSE)))
1032                     maxdepth = SvIV(*svp);
1033                 if ((svp = hv_fetch(hv, "sortkeys", 8, FALSE))) {
1034                     sortkeys = *svp;
1035                     if (! SvTRUE(sortkeys))
1036                         sortkeys = NULL;
1037                     else if (! (SvROK(sortkeys) &&
1038                                 SvTYPE(SvRV(sortkeys)) == SVt_PVCV) )
1039                     {
1040                         /* flag to use qsortsv() for sorting hash keys */       
1041                         sortkeys = &PL_sv_yes; 
1042                     }
1043                 }
1044                 postav = newAV();
1045
1046                 if (todumpav)
1047                     imax = av_len(todumpav);
1048                 else
1049                     imax = -1;
1050                 valstr = newSVpvn("",0);
1051                 for (i = 0; i <= imax; ++i) {
1052                     SV *newapad;
1053                 
1054                     av_clear(postav);
1055                     if ((svp = av_fetch(todumpav, i, FALSE)))
1056                         val = *svp;
1057                     else
1058                         val = &PL_sv_undef;
1059                     if ((svp = av_fetch(namesav, i, TRUE))) {
1060                         sv_setsv(name, *svp);
1061                         if (SvOK(*svp) && !SvPOK(*svp))
1062                             (void)SvPV_nolen_const(name);
1063                     }
1064                     else
1065                         (void)SvOK_off(name);
1066                 
1067                     if (SvPOK(name)) {
1068                         if ((SvPVX_const(name))[0] == '*') {
1069                             if (SvROK(val)) {
1070                                 switch (SvTYPE(SvRV(val))) {
1071                                 case SVt_PVAV:
1072                                     (SvPVX(name))[0] = '@';
1073                                     break;
1074                                 case SVt_PVHV:
1075                                     (SvPVX(name))[0] = '%';
1076                                     break;
1077                                 case SVt_PVCV:
1078                                     (SvPVX(name))[0] = '*';
1079                                     break;
1080                                 default:
1081                                     (SvPVX(name))[0] = '$';
1082                                     break;
1083                                 }
1084                             }
1085                             else
1086                                 (SvPVX(name))[0] = '$';
1087                         }
1088                         else if ((SvPVX_const(name))[0] != '$')
1089                             sv_insert(name, 0, 0, "$", 1);
1090                     }
1091                     else {
1092                         STRLEN nchars;
1093                         sv_setpvn(name, "$", 1);
1094                         sv_catsv(name, varname);
1095                         (void) sprintf(tmpbuf, "%"IVdf, (IV)(i+1));
1096                         nchars = strlen(tmpbuf);
1097                         sv_catpvn(name, tmpbuf, nchars);
1098                     }
1099                 
1100                     if (indent >= 2) {
1101                         SV * const tmpsv = sv_x(aTHX_ NULL, " ", 1, SvCUR(name)+3);
1102                         newapad = newSVsv(apad);
1103                         sv_catsv(newapad, tmpsv);
1104                         SvREFCNT_dec(tmpsv);
1105                     }
1106                     else
1107                         newapad = apad;
1108                 
1109                     DD_dump(aTHX_ val, SvPVX_const(name), SvCUR(name), valstr, seenhv,
1110                             postav, &level, indent, pad, xpad, newapad, sep, pair,
1111                             freezer, toaster, purity, deepcopy, quotekeys,
1112                             bless, maxdepth, sortkeys);
1113                 
1114                     if (indent >= 2)
1115                         SvREFCNT_dec(newapad);
1116
1117                     postlen = av_len(postav);
1118                     if (postlen >= 0 || !terse) {
1119                         sv_insert(valstr, 0, 0, " = ", 3);
1120                         sv_insert(valstr, 0, 0, SvPVX_const(name), SvCUR(name));
1121                         sv_catpvn(valstr, ";", 1);
1122                     }
1123                     sv_catsv(retval, pad);
1124                     sv_catsv(retval, valstr);
1125                     sv_catsv(retval, sep);
1126                     if (postlen >= 0) {
1127                         I32 i;
1128                         sv_catsv(retval, pad);
1129                         for (i = 0; i <= postlen; ++i) {
1130                             SV *elem;
1131                             svp = av_fetch(postav, i, FALSE);
1132                             if (svp && (elem = *svp)) {
1133                                 sv_catsv(retval, elem);
1134                                 if (i < postlen) {
1135                                     sv_catpvn(retval, ";", 1);
1136                                     sv_catsv(retval, sep);
1137                                     sv_catsv(retval, pad);
1138                                 }
1139                             }
1140                         }
1141                         sv_catpvn(retval, ";", 1);
1142                             sv_catsv(retval, sep);
1143                     }
1144                     sv_setpvn(valstr, "", 0);
1145                     if (gimme == G_ARRAY) {
1146                         XPUSHs(sv_2mortal(retval));
1147                         if (i < imax)   /* not the last time thro ? */
1148                             retval = newSVpvn("",0);
1149                     }
1150                 }
1151                 SvREFCNT_dec(postav);
1152                 SvREFCNT_dec(valstr);
1153             }
1154             else
1155                 croak("Call to new() method failed to return HASH ref");
1156             if (gimme == G_SCALAR)
1157                 XPUSHs(sv_2mortal(retval));
1158         }