This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
e45550b157f2fe99cb54d16de9e3d0237e6a8d00
[perl5.git] / ext / Data / Dumper / Dumper.xs
1 #define PERL_NO_GET_CONTEXT
2 #include "EXTERN.h"
3 #include "perl.h"
4 #include "XSUB.h"
5 #include "ppport.h"
6
7 static I32 num_q (const char *s, STRLEN slen);
8 static I32 esc_q (char *dest, const char *src, STRLEN slen);
9 static I32 esc_q_utf8 (pTHX_ SV *sv, const char *src, STRLEN slen);
10 static I32 needs_quote(register const char *s);
11 static SV *sv_x (pTHX_ SV *sv, const char *str, STRLEN len, I32 n);
12 static I32 DD_dump (pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval,
13                     HV *seenhv, AV *postav, I32 *levelp, I32 indent,
14                     SV *pad, SV *xpad, SV *apad, SV *sep, SV *pair,
15                     SV *freezer, SV *toaster,
16                     I32 purity, I32 deepcopy, I32 quotekeys, SV *bless,
17                     I32 maxdepth, SV *sortkeys);
18
19 #ifndef HvNAME_get
20 #define HvNAME_get HvNAME
21 #endif
22
23 #if PERL_VERSION <= 6 /* Perl 5.6 and earlier */
24
25 # ifdef EBCDIC
26 #  define UNI_TO_NATIVE(ch) (((ch) > 255) ? (ch) : ASCII_TO_NATIVE(ch))
27 # else
28 #  define UNI_TO_NATIVE(ch) (ch)
29 # endif
30
31 UV
32 Perl_utf8_to_uvchr(pTHX_ U8 *s, STRLEN *retlen)
33 {
34     const UV uv = utf8_to_uv(s, UTF8_MAXLEN, retlen,
35                     ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
36     return UNI_TO_NATIVE(uv);
37 }
38
39 # if !defined(PERL_IMPLICIT_CONTEXT)
40 #  define utf8_to_uvchr      Perl_utf8_to_uvchr
41 # else
42 #  define utf8_to_uvchr(a,b) Perl_utf8_to_uvchr(aTHX_ a,b)
43 # endif
44
45 #endif /* PERL_VERSION <= 6 */
46
47 /* Changes in 5.7 series mean that now IOK is only set if scalar is
48    precisely integer but in 5.6 and earlier we need to do a more
49    complex test  */
50 #if PERL_VERSION <= 6
51 #define DD_is_integer(sv) (SvIOK(sv) && (SvIsUV(val) ? SvUV(sv) == SvNV(sv) : SvIV(sv) == SvNV(sv)))
52 #else
53 #define DD_is_integer(sv) SvIOK(sv)
54 #endif
55
56 /* does a string need to be protected? */
57 static I32
58 needs_quote(register const char *s)
59 {
60 TOP:
61     if (s[0] == ':') {
62         if (*++s) {
63             if (*s++ != ':')
64                 return 1;
65         }
66         else
67             return 1;
68     }
69     if (isIDFIRST(*s)) {
70         while (*++s)
71             if (!isALNUM(*s)) {
72                 if (*s == ':')
73                     goto TOP;
74                 else
75                     return 1;
76             }
77     }
78     else
79         return 1;
80     return 0;
81 }
82
83 /* count the number of "'"s and "\"s in string */
84 static I32
85 num_q(register const char *s, register STRLEN slen)
86 {
87     register I32 ret = 0;
88
89     while (slen > 0) {
90         if (*s == '\'' || *s == '\\')
91             ++ret;
92         ++s;
93         --slen;
94     }
95     return ret;
96 }
97
98
99 /* returns number of chars added to escape "'"s and "\"s in s */
100 /* slen number of characters in s will be escaped */
101 /* destination must be long enough for additional chars */
102 static I32
103 esc_q(register char *d, register const char *s, register STRLEN slen)
104 {
105     register I32 ret = 0;
106
107     while (slen > 0) {
108         switch (*s) {
109         case '\'':
110         case '\\':
111             *d = '\\';
112             ++d; ++ret;
113         default:
114             *d = *s;
115             ++d; ++s; --slen;
116             break;
117         }
118     }
119     return ret;
120 }
121
122 static I32
123 esc_q_utf8(pTHX_ SV* sv, register const char *src, register STRLEN slen)
124 {
125     char *r, *rstart;
126     const char *s = src;
127     const char * const send = src + slen;
128     STRLEN j, cur = SvCUR(sv);
129     /* Could count 128-255 and 256+ in two variables, if we want to
130        be like &qquote and make a distinction.  */
131     STRLEN grow = 0;    /* bytes needed to represent chars 128+ */
132     /* STRLEN topbit_grow = 0;  bytes needed to represent chars 128-255 */
133     STRLEN backslashes = 0;
134     STRLEN single_quotes = 0;
135     STRLEN qq_escapables = 0;   /* " $ @ will need a \ in "" strings.  */
136     STRLEN normal = 0;
137
138     /* this will need EBCDICification */
139     for (s = src; s < send; s += UTF8SKIP(s)) {
140         const UV k = utf8_to_uvchr((U8*)s, NULL);
141
142 #ifdef EBCDIC
143         if (!isprint(k) || k > 256) {
144 #else
145         if (k > 127) {
146 #endif
147             /* 4: \x{} then count the number of hex digits.  */
148             grow += 4 + (k <= 0xFF ? 2 : k <= 0xFFF ? 3 : k <= 0xFFFF ? 4 :
149 #if UVSIZE == 4
150                 8 /* We may allocate a bit more than the minimum here.  */
151 #else
152                 k <= 0xFFFFFFFF ? 8 : UVSIZE * 4
153 #endif
154                 );
155         } else if (k == '\\') {
156             backslashes++;
157         } else if (k == '\'') {
158             single_quotes++;
159         } else if (k == '"' || k == '$' || k == '@') {
160             qq_escapables++;
161         } else {
162             normal++;
163         }
164     }
165     if (grow) {
166         /* We have something needing hex. 3 is ""\0 */
167         sv_grow(sv, cur + 3 + grow + 2*backslashes + single_quotes
168                 + 2*qq_escapables + normal);
169         rstart = r = SvPVX(sv) + cur;
170
171         *r++ = '"';
172
173         for (s = src; s < send; s += UTF8SKIP(s)) {
174             const UV k = utf8_to_uvchr((U8*)s, NULL);
175
176             if (k == '"' || k == '\\' || k == '$' || k == '@') {
177                 *r++ = '\\';
178                 *r++ = (char)k;
179             }
180             else
181 #ifdef EBCDIC
182               if (isprint(k) && k < 256)
183 #else
184               if (k < 0x80)
185 #endif
186                 *r++ = (char)k;
187             else {
188                 r = r + my_sprintf(r, "\\x{%"UVxf"}", k);
189             }
190         }
191         *r++ = '"';
192     } else {
193         /* Single quotes.  */
194         sv_grow(sv, cur + 3 + 2*backslashes + 2*single_quotes
195                 + qq_escapables + normal);
196         rstart = r = SvPVX(sv) + cur;
197         *r++ = '\'';
198         for (s = src; s < send; s ++) {
199             const char k = *s;
200             if (k == '\'' || k == '\\')
201                 *r++ = '\\';
202             *r++ = k;
203         }
204         *r++ = '\'';
205     }
206     *r = '\0';
207     j = r - rstart;
208     SvCUR_set(sv, cur + j);
209
210     return j;
211 }
212
213 /* append a repeated string to an SV */
214 static SV *
215 sv_x(pTHX_ SV *sv, const char *str, STRLEN len, I32 n)
216 {
217     if (!sv)
218         sv = newSVpvn("", 0);
219 #ifdef DEBUGGING
220     else
221         assert(SvTYPE(sv) >= SVt_PV);
222 #endif
223
224     if (n > 0) {
225         SvGROW(sv, len*n + SvCUR(sv) + 1);
226         if (len == 1) {
227             char * const start = SvPVX(sv) + SvCUR(sv);
228             SvCUR_set(sv, SvCUR(sv) + n);
229             start[n] = '\0';
230             while (n > 0)
231                 start[--n] = str[0];
232         }
233         else
234             while (n > 0) {
235                 sv_catpvn(sv, str, len);
236                 --n;
237             }
238     }
239     return sv;
240 }
241
242 /*
243  * This ought to be split into smaller functions. (it is one long function since
244  * it exactly parallels the perl version, which was one long thing for
245  * efficiency raisins.)  Ugggh!
246  */
247 static I32
248 DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
249         AV *postav, I32 *levelp, I32 indent, SV *pad, SV *xpad,
250         SV *apad, SV *sep, SV *pair, SV *freezer, SV *toaster, I32 purity,
251         I32 deepcopy, I32 quotekeys, SV *bless, I32 maxdepth, SV *sortkeys)
252 {
253     char tmpbuf[128];
254     U32 i;
255     char *c, *r, *realpack, id[128];
256     SV **svp;
257     SV *sv, *ipad, *ival;
258     SV *blesspad = Nullsv;
259     AV *seenentry = NULL;
260     char *iname;
261     STRLEN inamelen, idlen = 0;
262     U32 realtype;
263
264     if (!val)
265         return 0;
266
267     realtype = SvTYPE(val);
268
269     if (SvGMAGICAL(val))
270         mg_get(val);
271     if (SvROK(val)) {
272
273         /* If a freeze method is provided and the object has it, call
274            it.  Warn on errors. */
275         if (SvOBJECT(SvRV(val)) && freezer &&
276             SvPOK(freezer) && SvCUR(freezer) &&
277             gv_fetchmeth(SvSTASH(SvRV(val)), SvPVX_const(freezer), 
278                          SvCUR(freezer), -1) != NULL)
279         {
280             dSP; ENTER; SAVETMPS; PUSHMARK(sp);
281             XPUSHs(val); PUTBACK;
282             i = perl_call_method(SvPVX_const(freezer), G_EVAL|G_VOID);
283             SPAGAIN;
284             if (SvTRUE(ERRSV))
285                 warn("WARNING(Freezer method call failed): %"SVf"", ERRSV);
286             PUTBACK; FREETMPS; LEAVE;
287         }
288         
289         ival = SvRV(val);
290         realtype = SvTYPE(ival);
291         idlen = my_snprintf(id, sizeof(id), "0x%"UVxf, PTR2UV(ival));
292         if (SvOBJECT(ival))
293             realpack = HvNAME_get(SvSTASH(ival));
294         else
295             realpack = Nullch;
296
297         /* if it has a name, we need to either look it up, or keep a tab
298          * on it so we know when we hit it later
299          */
300         if (namelen) {
301             if ((svp = hv_fetch(seenhv, id, idlen, FALSE))
302                 && (sv = *svp) && SvROK(sv) && (seenentry = (AV*)SvRV(sv)))
303             {
304                 SV *othername;
305                 if ((svp = av_fetch(seenentry, 0, FALSE))
306                     && (othername = *svp))
307                 {
308                     if (purity && *levelp > 0) {
309                         SV *postentry;
310                         
311                         if (realtype == SVt_PVHV)
312                             sv_catpvn(retval, "{}", 2);
313                         else if (realtype == SVt_PVAV)
314                             sv_catpvn(retval, "[]", 2);
315                         else
316                             sv_catpvn(retval, "do{my $o}", 9);
317                         postentry = newSVpvn(name, namelen);
318                         sv_catpvn(postentry, " = ", 3);
319                         sv_catsv(postentry, othername);
320                         av_push(postav, postentry);
321                     }
322                     else {
323                         if (name[0] == '@' || name[0] == '%') {
324                             if ((SvPVX_const(othername))[0] == '\\' &&
325                                 (SvPVX_const(othername))[1] == name[0]) {
326                                 sv_catpvn(retval, SvPVX_const(othername)+1,
327                                           SvCUR(othername)-1);
328                             }
329                             else {
330                                 sv_catpvn(retval, name, 1);
331                                 sv_catpvn(retval, "{", 1);
332                                 sv_catsv(retval, othername);
333                                 sv_catpvn(retval, "}", 1);
334                             }
335                         }
336                         else
337                             sv_catsv(retval, othername);
338                     }
339                     return 1;
340                 }
341                 else {
342                     warn("ref name not found for %s", id);
343                     return 0;
344                 }
345             }
346             else {   /* store our name and continue */
347                 SV *namesv;
348                 if (name[0] == '@' || name[0] == '%') {
349                     namesv = newSVpvn("\\", 1);
350                     sv_catpvn(namesv, name, namelen);
351                 }
352                 else if (realtype == SVt_PVCV && name[0] == '*') {
353                     namesv = newSVpvn("\\", 2);
354                     sv_catpvn(namesv, name, namelen);
355                     (SvPVX(namesv))[1] = '&';
356                 }
357                 else
358                     namesv = newSVpvn(name, namelen);
359                 seenentry = newAV();
360                 av_push(seenentry, namesv);
361                 (void)SvREFCNT_inc(val);
362                 av_push(seenentry, val);
363                 (void)hv_store(seenhv, id, strlen(id),
364                                newRV_inc((SV*)seenentry), 0);
365                 SvREFCNT_dec(seenentry);
366             }
367         }
368
369         if (realpack && *realpack == 'R' && strEQ(realpack, "Regexp")) {
370             STRLEN rlen;
371             const char *rval = SvPV(val, rlen);
372             const char *slash = strchr(rval, '/');
373             sv_catpvn(retval, "qr/", 3);
374             while (slash) {
375                 sv_catpvn(retval, rval, slash-rval);
376                 sv_catpvn(retval, "\\/", 2);
377                 rlen -= slash-rval+1;
378                 rval = slash+1;
379                 slash = strchr(rval, '/');
380             }
381             sv_catpvn(retval, rval, rlen);
382             sv_catpvn(retval, "/", 1);
383             return 1;
384         }
385
386         /* If purity is not set and maxdepth is set, then check depth:
387          * if we have reached maximum depth, return the string
388          * representation of the thing we are currently examining
389          * at this depth (i.e., 'Foo=ARRAY(0xdeadbeef)').
390          */
391         if (!purity && maxdepth > 0 && *levelp >= maxdepth) {
392             STRLEN vallen;
393             const char * const valstr = SvPV(val,vallen);
394             sv_catpvn(retval, "'", 1);
395             sv_catpvn(retval, valstr, vallen);
396             sv_catpvn(retval, "'", 1);
397             return 1;
398         }
399
400         if (realpack) {                         /* we have a blessed ref */
401             STRLEN blesslen;
402             const char * const blessstr = SvPV(bless, blesslen);
403             sv_catpvn(retval, blessstr, blesslen);
404             sv_catpvn(retval, "( ", 2);
405             if (indent >= 2) {
406                 blesspad = apad;
407                 apad = newSVsv(apad);
408                 sv_x(aTHX_ apad, " ", 1, blesslen+2);
409             }
410         }
411
412         (*levelp)++;
413         ipad = sv_x(aTHX_ Nullsv, SvPVX_const(xpad), SvCUR(xpad), *levelp);
414
415         if (realtype <= SVt_PVBM) {                          /* scalar ref */
416             SV * const namesv = newSVpvn("${", 2);
417             sv_catpvn(namesv, name, namelen);
418             sv_catpvn(namesv, "}", 1);
419             if (realpack) {                                  /* blessed */
420                 sv_catpvn(retval, "do{\\(my $o = ", 13);
421                 DD_dump(aTHX_ ival, SvPVX_const(namesv), SvCUR(namesv), retval, seenhv,
422                         postav, levelp, indent, pad, xpad, apad, sep, pair,
423                         freezer, toaster, purity, deepcopy, quotekeys, bless,
424                         maxdepth, sortkeys);
425                 sv_catpvn(retval, ")}", 2);
426             }                                                /* plain */
427             else {
428                 sv_catpvn(retval, "\\", 1);
429                 DD_dump(aTHX_ ival, SvPVX_const(namesv), SvCUR(namesv), retval, seenhv,
430                         postav, levelp, indent, pad, xpad, apad, sep, pair,
431                         freezer, toaster, purity, deepcopy, quotekeys, bless,
432                         maxdepth, sortkeys);
433             }
434             SvREFCNT_dec(namesv);
435         }
436         else if (realtype == SVt_PVGV) {                     /* glob ref */
437             SV * const namesv = newSVpvn("*{", 2);
438             sv_catpvn(namesv, name, namelen);
439             sv_catpvn(namesv, "}", 1);
440             sv_catpvn(retval, "\\", 1);
441             DD_dump(aTHX_ ival, SvPVX_const(namesv), SvCUR(namesv), retval, seenhv,
442                     postav, levelp,     indent, pad, xpad, apad, sep, pair,
443                     freezer, toaster, purity, deepcopy, quotekeys, bless,
444                     maxdepth, sortkeys);
445             SvREFCNT_dec(namesv);
446         }
447         else if (realtype == SVt_PVAV) {
448             SV *totpad;
449             I32 ix = 0;
450             const I32 ixmax = av_len((AV *)ival);
451         
452             SV * const ixsv = newSViv(0);
453             /* allowing for a 24 char wide array index */
454             New(0, iname, namelen+28, char);
455             (void)strcpy(iname, name);
456             inamelen = namelen;
457             if (name[0] == '@') {
458                 sv_catpvn(retval, "(", 1);
459                 iname[0] = '$';
460             }
461             else {
462                 sv_catpvn(retval, "[", 1);
463                 /* omit "->" in $foo{bar}->[0], but not in ${$foo}->[0] */
464                 /*if (namelen > 0
465                     && name[namelen-1] != ']' && name[namelen-1] != '}'
466                     && (namelen < 4 || (name[1] != '{' && name[2] != '{')))*/
467                 if ((namelen > 0
468                      && name[namelen-1] != ']' && name[namelen-1] != '}')
469                     || (namelen > 4
470                         && (name[1] == '{'
471                             || (name[0] == '\\' && name[2] == '{'))))
472                 {
473                     iname[inamelen++] = '-'; iname[inamelen++] = '>';
474                     iname[inamelen] = '\0';
475                 }
476             }
477             if (iname[0] == '*' && iname[inamelen-1] == '}' && inamelen >= 8 &&
478                 (instr(iname+inamelen-8, "{SCALAR}") ||
479                  instr(iname+inamelen-7, "{ARRAY}") ||
480                  instr(iname+inamelen-6, "{HASH}"))) {
481                 iname[inamelen++] = '-'; iname[inamelen++] = '>';
482             }
483             iname[inamelen++] = '['; iname[inamelen] = '\0';
484             totpad = newSVsv(sep);
485             sv_catsv(totpad, pad);
486             sv_catsv(totpad, apad);
487
488             for (ix = 0; ix <= ixmax; ++ix) {
489                 STRLEN ilen;
490                 SV *elem;
491                 svp = av_fetch((AV*)ival, ix, FALSE);
492                 if (svp)
493                     elem = *svp;
494                 else
495                     elem = &PL_sv_undef;
496                 
497                 ilen = inamelen;
498                 sv_setiv(ixsv, ix);
499                 ilen = ilen + my_sprintf(iname+ilen, "%"IVdf, (IV)ix);
500                 iname[ilen++] = ']'; iname[ilen] = '\0';
501                 if (indent >= 3) {
502                     sv_catsv(retval, totpad);
503                     sv_catsv(retval, ipad);
504                     sv_catpvn(retval, "#", 1);
505                     sv_catsv(retval, ixsv);
506                 }
507                 sv_catsv(retval, totpad);
508                 sv_catsv(retval, ipad);
509                 DD_dump(aTHX_ elem, iname, ilen, retval, seenhv, postav,
510                         levelp, indent, pad, xpad, apad, sep, pair,
511                         freezer, toaster, purity, deepcopy, quotekeys, bless,
512                         maxdepth, sortkeys);
513                 if (ix < ixmax)
514                     sv_catpvn(retval, ",", 1);
515             }
516             if (ixmax >= 0) {
517                 SV * const opad = sv_x(aTHX_ Nullsv, SvPVX_const(xpad), SvCUR(xpad), (*levelp)-1);
518                 sv_catsv(retval, totpad);
519                 sv_catsv(retval, opad);
520                 SvREFCNT_dec(opad);
521             }
522             if (name[0] == '@')
523                 sv_catpvn(retval, ")", 1);
524             else
525                 sv_catpvn(retval, "]", 1);
526             SvREFCNT_dec(ixsv);
527             SvREFCNT_dec(totpad);
528             Safefree(iname);
529         }
530         else if (realtype == SVt_PVHV) {
531             SV *totpad, *newapad;
532             SV *sname;
533             HE *entry;
534             char *key;
535             I32 klen;
536             SV *hval;
537             AV *keys = NULL;
538         
539             SV * const iname = newSVpvn(name, namelen);
540             if (name[0] == '%') {
541                 sv_catpvn(retval, "(", 1);
542                 (SvPVX(iname))[0] = '$';
543             }
544             else {
545                 sv_catpvn(retval, "{", 1);
546                 /* omit "->" in $foo[0]->{bar}, but not in ${$foo}->{bar} */
547                 if ((namelen > 0
548                      && name[namelen-1] != ']' && name[namelen-1] != '}')
549                     || (namelen > 4
550                         && (name[1] == '{'
551                             || (name[0] == '\\' && name[2] == '{'))))
552                 {
553                     sv_catpvn(iname, "->", 2);
554                 }
555             }
556             if (name[0] == '*' && name[namelen-1] == '}' && namelen >= 8 &&
557                 (instr(name+namelen-8, "{SCALAR}") ||
558                  instr(name+namelen-7, "{ARRAY}") ||
559                  instr(name+namelen-6, "{HASH}"))) {
560                 sv_catpvn(iname, "->", 2);
561             }
562             sv_catpvn(iname, "{", 1);
563             totpad = newSVsv(sep);
564             sv_catsv(totpad, pad);
565             sv_catsv(totpad, apad);
566         
567             /* If requested, get a sorted/filtered array of hash keys */
568             if (sortkeys) {
569                 if (sortkeys == &PL_sv_yes) {
570 #if PERL_VERSION < 8
571                     sortkeys = sv_2mortal(newSVpvn("Data::Dumper::_sortkeys", 23));
572 #else
573                     keys = newAV();
574                     (void)hv_iterinit((HV*)ival);
575                     while ((entry = hv_iternext((HV*)ival))) {
576                         sv = hv_iterkeysv(entry);
577                         SvREFCNT_inc(sv);
578                         av_push(keys, sv);
579                     }
580 # ifdef USE_LOCALE_NUMERIC
581                     sortsv(AvARRAY(keys), 
582                            av_len(keys)+1, 
583                            IN_LOCALE ? Perl_sv_cmp_locale : Perl_sv_cmp);
584 # else
585                     sortsv(AvARRAY(keys), 
586                            av_len(keys)+1, 
587                            Perl_sv_cmp);
588 # endif
589 #endif
590                 }
591                 if (sortkeys != &PL_sv_yes) {
592                     dSP; ENTER; SAVETMPS; PUSHMARK(sp);
593                     XPUSHs(sv_2mortal(newRV_inc(ival))); PUTBACK;
594                     i = perl_call_sv(sortkeys, G_SCALAR | G_EVAL);
595                     SPAGAIN;
596                     if (i) {
597                         sv = POPs;
598                         if (SvROK(sv) && (SvTYPE(SvRV(sv)) == SVt_PVAV))
599                             keys = (AV*)SvREFCNT_inc(SvRV(sv));
600                     }
601                     if (! keys)
602                         warn("Sortkeys subroutine did not return ARRAYREF\n");
603                     PUTBACK; FREETMPS; LEAVE;
604                 }
605                 if (keys)
606                     sv_2mortal((SV*)keys);
607             }
608             else
609                 (void)hv_iterinit((HV*)ival);
610
611             /* foreach (keys %hash) */
612             for (i = 0; 1; i++) {
613                 char *nkey;
614                 char *nkey_buffer = NULL;
615                 I32 nticks = 0;
616                 SV* keysv;
617                 STRLEN keylen;
618                 I32 nlen;
619                 bool do_utf8 = FALSE;
620
621                if (sortkeys) {
622                    if (!(keys && (I32)i <= av_len(keys))) break;
623                } else {
624                    if (!(entry = hv_iternext((HV *)ival))) break;
625                }
626
627                 if (i)
628                     sv_catpvn(retval, ",", 1);
629
630                 if (sortkeys) {
631                     char *key;
632                     svp = av_fetch(keys, i, FALSE);
633                     keysv = svp ? *svp : sv_mortalcopy(&PL_sv_undef);
634                     key = SvPV(keysv, keylen);
635                     svp = hv_fetch((HV*)ival, key,
636                                    SvUTF8(keysv) ? -(I32)keylen : keylen, 0);
637                     hval = svp ? *svp : sv_mortalcopy(&PL_sv_undef);
638                 }
639                 else {
640                     keysv = hv_iterkeysv(entry);
641                     hval = hv_iterval((HV*)ival, entry);
642                 }
643
644                 key = SvPV(keysv, keylen);
645                 do_utf8 = DO_UTF8(keysv);
646                 klen = keylen;
647
648                 sv_catsv(retval, totpad);
649                 sv_catsv(retval, ipad);
650                 /* old logic was first to check utf8 flag, and if utf8 always
651                    call esc_q_utf8.  This caused test to break under -Mutf8,
652                    because there even strings like 'c' have utf8 flag on.
653                    Hence with quotekeys == 0 the XS code would still '' quote
654                    them based on flags, whereas the perl code would not,
655                    based on regexps.
656                    The perl code is correct.
657                    needs_quote() decides that anything that isn't a valid
658                    perl identifier needs to be quoted, hence only correctly
659                    formed strings with no characters outside [A-Za-z0-9_:]
660                    won't need quoting.  None of those characters are used in
661                    the byte encoding of utf8, so anything with utf8
662                    encoded characters in will need quoting. Hence strings
663                    with utf8 encoded characters in will end up inside do_utf8
664                    just like before, but now strings with utf8 flag set but
665                    only ascii characters will end up in the unquoted section.
666
667                    There should also be less tests for the (probably currently)
668                    more common doesn't need quoting case.
669                    The code is also smaller (22044 vs 22260) because I've been
670                    able to pull the common logic out to both sides.  */
671                 if (quotekeys || needs_quote(key)) {
672                     if (do_utf8) {
673                         STRLEN ocur = SvCUR(retval);
674                         nlen = esc_q_utf8(aTHX_ retval, key, klen);
675                         nkey = SvPVX(retval) + ocur;
676                     }
677                     else {
678                         nticks = num_q(key, klen);
679                         New(0, nkey_buffer, klen+nticks+3, char);
680                         nkey = nkey_buffer;
681                         nkey[0] = '\'';
682                         if (nticks)
683                             klen += esc_q(nkey+1, key, klen);
684                         else
685                             (void)Copy(key, nkey+1, klen, char);
686                         nkey[++klen] = '\'';
687                         nkey[++klen] = '\0';
688                         nlen = klen;
689                         sv_catpvn(retval, nkey, klen);
690                     }
691                 }
692                 else {
693                     nkey = key;
694                     nlen = klen;
695                     sv_catpvn(retval, nkey, klen);
696                 }
697                 sname = newSVsv(iname);
698                 sv_catpvn(sname, nkey, nlen);
699                 sv_catpvn(sname, "}", 1);
700
701                 sv_catsv(retval, pair);
702                 if (indent >= 2) {
703                     char *extra;
704                     I32 elen = 0;
705                     newapad = newSVsv(apad);
706                     New(0, extra, klen+4+1, char);
707                     while (elen < (klen+4))
708                         extra[elen++] = ' ';
709                     extra[elen] = '\0';
710                     sv_catpvn(newapad, extra, elen);
711                     Safefree(extra);
712                 }
713                 else
714                     newapad = apad;
715
716                 DD_dump(aTHX_ hval, SvPVX_const(sname), SvCUR(sname), retval, seenhv,
717                         postav, levelp, indent, pad, xpad, newapad, sep, pair,
718                         freezer, toaster, purity, deepcopy, quotekeys, bless,
719                         maxdepth, sortkeys);
720                 SvREFCNT_dec(sname);
721                 Safefree(nkey_buffer);
722                 if (indent >= 2)
723                     SvREFCNT_dec(newapad);
724             }
725             if (i) {
726                 SV *opad = sv_x(aTHX_ Nullsv, SvPVX_const(xpad), SvCUR(xpad), *levelp-1);
727                 sv_catsv(retval, totpad);
728                 sv_catsv(retval, opad);
729                 SvREFCNT_dec(opad);
730             }
731             if (name[0] == '%')
732                 sv_catpvn(retval, ")", 1);
733             else
734                 sv_catpvn(retval, "}", 1);
735             SvREFCNT_dec(iname);
736             SvREFCNT_dec(totpad);
737         }
738         else if (realtype == SVt_PVCV) {
739             sv_catpvn(retval, "sub { \"DUMMY\" }", 15);
740             if (purity)
741                 warn("Encountered CODE ref, using dummy placeholder");
742         }
743         else {
744             warn("cannot handle ref type %ld", realtype);
745         }
746
747         if (realpack) {  /* free blessed allocs */
748             if (indent >= 2) {
749                 SvREFCNT_dec(apad);
750                 apad = blesspad;
751             }
752             sv_catpvn(retval, ", '", 3);
753             sv_catpvn(retval, realpack, strlen(realpack));
754             sv_catpvn(retval, "' )", 3);
755             if (toaster && SvPOK(toaster) && SvCUR(toaster)) {
756                 sv_catpvn(retval, "->", 2);
757                 sv_catsv(retval, toaster);
758                 sv_catpvn(retval, "()", 2);
759             }
760         }
761         SvREFCNT_dec(ipad);
762         (*levelp)--;
763     }
764     else {
765         STRLEN i;
766         
767         if (namelen) {
768             idlen = my_snprintf(id, sizeof(id), "0x%"UVxf, PTR2UV(val));
769             if ((svp = hv_fetch(seenhv, id, idlen, FALSE)) &&
770                 (sv = *svp) && SvROK(sv) &&
771                 (seenentry = (AV*)SvRV(sv)))
772             {
773                 SV *othername;
774                 if ((svp = av_fetch(seenentry, 0, FALSE)) && (othername = *svp)
775                     && (svp = av_fetch(seenentry, 2, FALSE)) && *svp && SvIV(*svp) > 0)
776                 {
777                     sv_catpvn(retval, "${", 2);
778                     sv_catsv(retval, othername);
779                     sv_catpvn(retval, "}", 1);
780                     return 1;
781                 }
782             }
783             else if (val != &PL_sv_undef) {
784                 SV * const namesv = newSVpvn("\\", 1);
785                 sv_catpvn(namesv, name, namelen);
786                 seenentry = newAV();
787                 av_push(seenentry, namesv);
788                 av_push(seenentry, newRV_inc(val));
789                 (void)hv_store(seenhv, id, strlen(id), newRV_inc((SV*)seenentry), 0);
790                 SvREFCNT_dec(seenentry);
791             }
792         }
793
794         if (DD_is_integer(val)) {
795             STRLEN len;
796             if (SvIsUV(val))
797               len = my_snprintf(tmpbuf, sizeof(tmpbuf), "%"UVuf, SvUV(val));
798             else
799               len = my_snprintf(tmpbuf, sizeof(tmpbuf), "%"IVdf, SvIV(val));
800             if (SvPOK(val)) {
801               /* Need to check to see if this is a string such as " 0".
802                  I'm assuming from sprintf isn't going to clash with utf8.
803                  Is this valid on EBCDIC?  */
804               STRLEN pvlen;
805               const char * const pv = SvPV(val, pvlen);
806               if (pvlen != len || memNE(pv, tmpbuf, len))
807                 goto integer_came_from_string;
808             }
809             if (len > 10) {
810               /* Looks like we're on a 64 bit system.  Make it a string so that
811                  if a 32 bit system reads the number it will cope better.  */
812               sv_catpvf(retval, "'%s'", tmpbuf);
813             } else
814               sv_catpvn(retval, tmpbuf, len);
815         }
816         else if (realtype == SVt_PVGV) {/* GLOBs can end up with scribbly names */
817             c = SvPV(val, i);
818             ++c; --i;                   /* just get the name */
819             if (i >= 6 && strncmp(c, "main::", 6) == 0) {
820                 c += 4;
821                 i -= 4;
822             }
823             if (needs_quote(c)) {
824                 sv_grow(retval, SvCUR(retval)+6+2*i);
825                 r = SvPVX(retval)+SvCUR(retval);
826                 r[0] = '*'; r[1] = '{'; r[2] = '\'';
827                 i += esc_q(r+3, c, i);
828                 i += 3;
829                 r[i++] = '\''; r[i++] = '}';
830                 r[i] = '\0';
831             }
832             else {
833                 sv_grow(retval, SvCUR(retval)+i+2);
834                 r = SvPVX(retval)+SvCUR(retval);
835                 r[0] = '*'; strcpy(r+1, c);
836                 i++;
837             }
838             SvCUR_set(retval, SvCUR(retval)+i);
839
840             if (purity) {
841                 static const char* const entries[] = { "{SCALAR}", "{ARRAY}", "{HASH}" };
842                 static const STRLEN sizes[] = { 8, 7, 6 };
843                 SV *e;
844                 SV * const nname = newSVpvn("", 0);
845                 SV * const newapad = newSVpvn("", 0);
846                 GV * const gv = (GV*)val;
847                 I32 j;
848                 
849                 for (j=0; j<3; j++) {
850                     e = ((j == 0) ? GvSV(gv) : (j == 1) ? (SV*)GvAV(gv) : (SV*)GvHV(gv));
851                     if (!e)
852                         continue;
853                     if (j == 0 && !SvOK(e))
854                         continue;
855
856                     {
857                         I32 nlevel = 0;
858                         SV *postentry = newSVpvn(r,i);
859                         
860                         sv_setsv(nname, postentry);
861                         sv_catpvn(nname, entries[j], sizes[j]);
862                         sv_catpvn(postentry, " = ", 3);
863                         av_push(postav, postentry);
864                         e = newRV_inc(e);
865                         
866                         SvCUR_set(newapad, 0);
867                         if (indent >= 2)
868                             (void)sv_x(aTHX_ newapad, " ", 1, SvCUR(postentry));
869                         
870                         DD_dump(aTHX_ e, SvPVX_const(nname), SvCUR(nname), postentry,
871                                 seenhv, postav, &nlevel, indent, pad, xpad,
872                                 newapad, sep, pair, freezer, toaster, purity,
873                                 deepcopy, quotekeys, bless, maxdepth, 
874                                 sortkeys);
875                         SvREFCNT_dec(e);
876                     }
877                 }
878                 
879                 SvREFCNT_dec(newapad);
880                 SvREFCNT_dec(nname);
881             }
882         }
883         else if (val == &PL_sv_undef || !SvOK(val)) {
884             sv_catpvn(retval, "undef", 5);
885         }
886         else {
887         integer_came_from_string:
888             c = SvPV(val, i);
889             if (DO_UTF8(val))
890                 i += esc_q_utf8(aTHX_ retval, c, i);
891             else {
892                 sv_grow(retval, SvCUR(retval)+3+2*i); /* 3: ""\0 */
893                 r = SvPVX(retval) + SvCUR(retval);
894                 r[0] = '\'';
895                 i += esc_q(r+1, c, i);
896                 ++i;
897                 r[i++] = '\'';
898                 r[i] = '\0';
899                 SvCUR_set(retval, SvCUR(retval)+i);
900             }
901         }
902     }
903
904     if (idlen) {
905         if (deepcopy)
906             (void)hv_delete(seenhv, id, idlen, G_DISCARD);
907         else if (namelen && seenentry) {
908             SV *mark = *av_fetch(seenentry, 2, TRUE);
909             sv_setiv(mark,1);
910         }
911     }
912     return 1;
913 }
914
915
916 MODULE = Data::Dumper           PACKAGE = Data::Dumper         PREFIX = Data_Dumper_
917
918 #
919 # This is the exact equivalent of Dump.  Well, almost. The things that are
920 # different as of now (due to Laziness):
921 #   * doesnt do double-quotes yet.
922 #
923
924 void
925 Data_Dumper_Dumpxs(href, ...)
926         SV      *href;
927         PROTOTYPE: $;$$
928         PPCODE:
929         {
930             HV *hv;
931             SV *retval, *valstr;
932             HV *seenhv = NULL;
933             AV *postav, *todumpav, *namesav;
934             I32 level = 0;
935             I32 indent, terse, i, imax, postlen;
936             SV **svp;
937             SV *val, *name, *pad, *xpad, *apad, *sep, *pair, *varname;
938             SV *freezer, *toaster, *bless, *sortkeys;
939             I32 purity, deepcopy, quotekeys, maxdepth = 0;
940             char tmpbuf[1024];
941             I32 gimme = GIMME;
942
943             if (!SvROK(href)) {         /* call new to get an object first */
944                 if (items < 2)
945                     croak("Usage: Data::Dumper::Dumpxs(PACKAGE, VAL_ARY_REF, [NAME_ARY_REF])");
946                 
947                 ENTER;
948                 SAVETMPS;
949                 
950                 PUSHMARK(sp);
951                 XPUSHs(href);
952                 XPUSHs(sv_2mortal(newSVsv(ST(1))));
953                 if (items >= 3)
954                     XPUSHs(sv_2mortal(newSVsv(ST(2))));
955                 PUTBACK;
956                 i = perl_call_method("new", G_SCALAR);
957                 SPAGAIN;
958                 if (i)
959                     href = newSVsv(POPs);
960
961                 PUTBACK;
962                 FREETMPS;
963                 LEAVE;
964                 if (i)
965                     (void)sv_2mortal(href);
966             }
967
968             todumpav = namesav = NULL;
969             seenhv = NULL;
970             val = pad = xpad = apad = sep = pair = varname
971                 = freezer = toaster = bless = sortkeys = &PL_sv_undef;
972             name = sv_newmortal();
973             indent = 2;
974             terse = purity = deepcopy = 0;
975             quotekeys = 1;
976         
977             retval = newSVpvn("", 0);
978             if (SvROK(href)
979                 && (hv = (HV*)SvRV((SV*)href))
980                 && SvTYPE(hv) == SVt_PVHV)              {
981
982                 if ((svp = hv_fetch(hv, "seen", 4, FALSE)) && SvROK(*svp))
983                     seenhv = (HV*)SvRV(*svp);
984                 if ((svp = hv_fetch(hv, "todump", 6, FALSE)) && SvROK(*svp))
985                     todumpav = (AV*)SvRV(*svp);
986                 if ((svp = hv_fetch(hv, "names", 5, FALSE)) && SvROK(*svp))
987                     namesav = (AV*)SvRV(*svp);
988                 if ((svp = hv_fetch(hv, "indent", 6, FALSE)))
989                     indent = SvIV(*svp);
990                 if ((svp = hv_fetch(hv, "purity", 6, FALSE)))
991                     purity = SvIV(*svp);
992                 if ((svp = hv_fetch(hv, "terse", 5, FALSE)))
993                     terse = SvTRUE(*svp);
994 #if 0 /* useqq currently unused */
995                 if ((svp = hv_fetch(hv, "useqq", 5, FALSE)))
996                     useqq = SvTRUE(*svp);
997 #endif
998                 if ((svp = hv_fetch(hv, "pad", 3, FALSE)))
999                     pad = *svp;
1000                 if ((svp = hv_fetch(hv, "xpad", 4, FALSE)))
1001                     xpad = *svp;
1002                 if ((svp = hv_fetch(hv, "apad", 4, FALSE)))
1003                     apad = *svp;
1004                 if ((svp = hv_fetch(hv, "sep", 3, FALSE)))
1005                     sep = *svp;
1006                 if ((svp = hv_fetch(hv, "pair", 4, FALSE)))
1007                     pair = *svp;
1008                 if ((svp = hv_fetch(hv, "varname", 7, FALSE)))
1009                     varname = *svp;
1010                 if ((svp = hv_fetch(hv, "freezer", 7, FALSE)))
1011                     freezer = *svp;
1012                 if ((svp = hv_fetch(hv, "toaster", 7, FALSE)))
1013                     toaster = *svp;
1014                 if ((svp = hv_fetch(hv, "deepcopy", 8, FALSE)))
1015                     deepcopy = SvTRUE(*svp);
1016                 if ((svp = hv_fetch(hv, "quotekeys", 9, FALSE)))
1017                     quotekeys = SvTRUE(*svp);
1018                 if ((svp = hv_fetch(hv, "bless", 5, FALSE)))
1019                     bless = *svp;
1020                 if ((svp = hv_fetch(hv, "maxdepth", 8, FALSE)))
1021                     maxdepth = SvIV(*svp);
1022                 if ((svp = hv_fetch(hv, "sortkeys", 8, FALSE))) {
1023                     sortkeys = *svp;
1024                     if (! SvTRUE(sortkeys))
1025                         sortkeys = NULL;
1026                     else if (! (SvROK(sortkeys) &&
1027                                 SvTYPE(SvRV(sortkeys)) == SVt_PVCV) )
1028                     {
1029                         /* flag to use qsortsv() for sorting hash keys */       
1030                         sortkeys = &PL_sv_yes; 
1031                     }
1032                 }
1033                 postav = newAV();
1034
1035                 if (todumpav)
1036                     imax = av_len(todumpav);
1037                 else
1038                     imax = -1;
1039                 valstr = newSVpvn("",0);
1040                 for (i = 0; i <= imax; ++i) {
1041                     SV *newapad;
1042                 
1043                     av_clear(postav);
1044                     if ((svp = av_fetch(todumpav, i, FALSE)))
1045                         val = *svp;
1046                     else
1047                         val = &PL_sv_undef;
1048                     if ((svp = av_fetch(namesav, i, TRUE))) {
1049                         sv_setsv(name, *svp);
1050                         if (SvOK(*svp) && !SvPOK(*svp))
1051                             (void)SvPV_nolen_const(name);
1052                     }
1053                     else
1054                         (void)SvOK_off(name);
1055                 
1056                     if (SvPOK(name)) {
1057                         if ((SvPVX_const(name))[0] == '*') {
1058                             if (SvROK(val)) {
1059                                 switch (SvTYPE(SvRV(val))) {
1060                                 case SVt_PVAV:
1061                                     (SvPVX(name))[0] = '@';
1062                                     break;
1063                                 case SVt_PVHV:
1064                                     (SvPVX(name))[0] = '%';
1065                                     break;
1066                                 case SVt_PVCV:
1067                                     (SvPVX(name))[0] = '*';
1068                                     break;
1069                                 default:
1070                                     (SvPVX(name))[0] = '$';
1071                                     break;
1072                                 }
1073                             }
1074                             else
1075                                 (SvPVX(name))[0] = '$';
1076                         }
1077                         else if ((SvPVX_const(name))[0] != '$')
1078                             sv_insert(name, 0, 0, "$", 1);
1079                     }
1080                     else {
1081                         STRLEN nchars;
1082                         sv_setpvn(name, "$", 1);
1083                         sv_catsv(name, varname);
1084                         nchars = my_snprintf(tmpbuf, sizeof(tmpbuf), "%"IVdf, (IV)(i+1));
1085                         sv_catpvn(name, tmpbuf, nchars);
1086                     }
1087                 
1088                     if (indent >= 2) {
1089                         SV * const tmpsv = sv_x(aTHX_ NULL, " ", 1, SvCUR(name)+3);
1090                         newapad = newSVsv(apad);
1091                         sv_catsv(newapad, tmpsv);
1092                         SvREFCNT_dec(tmpsv);
1093                     }
1094                     else
1095                         newapad = apad;
1096                 
1097                     DD_dump(aTHX_ val, SvPVX_const(name), SvCUR(name), valstr, seenhv,
1098                             postav, &level, indent, pad, xpad, newapad, sep, pair,
1099                             freezer, toaster, purity, deepcopy, quotekeys,
1100                             bless, maxdepth, sortkeys);
1101                 
1102                     if (indent >= 2)
1103                         SvREFCNT_dec(newapad);
1104
1105                     postlen = av_len(postav);
1106                     if (postlen >= 0 || !terse) {
1107                         sv_insert(valstr, 0, 0, " = ", 3);
1108                         sv_insert(valstr, 0, 0, SvPVX_const(name), SvCUR(name));
1109                         sv_catpvn(valstr, ";", 1);
1110                     }
1111                     sv_catsv(retval, pad);
1112                     sv_catsv(retval, valstr);
1113                     sv_catsv(retval, sep);
1114                     if (postlen >= 0) {
1115                         I32 i;
1116                         sv_catsv(retval, pad);
1117                         for (i = 0; i <= postlen; ++i) {
1118                             SV *elem;
1119                             svp = av_fetch(postav, i, FALSE);
1120                             if (svp && (elem = *svp)) {
1121                                 sv_catsv(retval, elem);
1122                                 if (i < postlen) {
1123                                     sv_catpvn(retval, ";", 1);
1124                                     sv_catsv(retval, sep);
1125                                     sv_catsv(retval, pad);
1126                                 }
1127                             }
1128                         }
1129                         sv_catpvn(retval, ";", 1);
1130                             sv_catsv(retval, sep);
1131                     }
1132                     sv_setpvn(valstr, "", 0);
1133                     if (gimme == G_ARRAY) {
1134                         XPUSHs(sv_2mortal(retval));
1135                         if (i < imax)   /* not the last time thro ? */
1136                             retval = newSVpvn("",0);
1137                     }
1138                 }
1139                 SvREFCNT_dec(postav);
1140                 SvREFCNT_dec(valstr);
1141             }
1142             else
1143                 croak("Call to new() method failed to return HASH ref");
1144             if (gimme == G_SCALAR)
1145                 XPUSHs(sv_2mortal(retval));
1146         }