This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Re: [perl #36310] sv_vcatpvfn() fails on some (very limited) situations
[perl5.git] / ext / Data / Dumper / Dumper.xs
1 #define PERL_NO_GET_CONTEXT
2 #include "EXTERN.h"
3 #include "perl.h"
4 #include "XSUB.h"
5
6 static I32 num_q (char *s, STRLEN slen);
7 static I32 esc_q (char *dest, char *src, STRLEN slen);
8 static I32 esc_q_utf8 (pTHX_ SV *sv, char *src, STRLEN slen);
9 static SV *sv_x (pTHX_ SV *sv, const char *str, STRLEN len, I32 n);
10 static I32 DD_dump (pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval,
11                     HV *seenhv, AV *postav, I32 *levelp, I32 indent,
12                     SV *pad, SV *xpad, SV *apad, SV *sep, SV *pair,
13                     SV *freezer, SV *toaster,
14                     I32 purity, I32 deepcopy, I32 quotekeys, SV *bless,
15                     I32 maxdepth, SV *sortkeys);
16
17 #ifndef HvNAME_get
18 #define HvNAME_get HvNAME
19 #endif
20
21 #if PERL_VERSION <= 6 /* Perl 5.6 and earlier */
22
23 # ifdef EBCDIC
24 #  define UNI_TO_NATIVE(ch) (((ch) > 255) ? (ch) : ASCII_TO_NATIVE(ch))
25 # else
26 #  define UNI_TO_NATIVE(ch) (ch)
27 # endif
28
29 UV
30 Perl_utf8_to_uvchr(pTHX_ U8 *s, STRLEN *retlen)
31 {
32     UV uv = utf8_to_uv(s, UTF8_MAXLEN, retlen,
33                     ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
34     return UNI_TO_NATIVE(uv);
35 }
36
37 # if !defined(PERL_IMPLICIT_CONTEXT)
38 #  define utf8_to_uvchr      Perl_utf8_to_uvchr
39 # else
40 #  define utf8_to_uvchr(a,b) Perl_utf8_to_uvchr(aTHX_ a,b)
41 # endif
42
43 #endif /* PERL_VERSION <= 6 */
44
45 /* Changes in 5.7 series mean that now IOK is only set if scalar is
46    precisely integer but in 5.6 and earlier we need to do a more
47    complex test  */
48 #if PERL_VERSION <= 6
49 #define DD_is_integer(sv) (SvIOK(sv) && (SvIsUV(val) ? SvUV(sv) == SvNV(sv) : SvIV(sv) == SvNV(sv)))
50 #else
51 #define DD_is_integer(sv) SvIOK(sv)
52 #endif
53
54 /* does a string need to be protected? */
55 static I32
56 needs_quote(register char *s)
57 {
58 TOP:
59     if (s[0] == ':') {
60         if (*++s) {
61             if (*s++ != ':')
62                 return 1;
63         }
64         else
65             return 1;
66     }
67     if (isIDFIRST(*s)) {
68         while (*++s)
69             if (!isALNUM(*s)) {
70                 if (*s == ':')
71                     goto TOP;
72                 else
73                     return 1;
74             }
75     }
76     else
77         return 1;
78     return 0;
79 }
80
81 /* count the number of "'"s and "\"s in string */
82 static I32
83 num_q(register char *s, register STRLEN slen)
84 {
85     register I32 ret = 0;
86
87     while (slen > 0) {
88         if (*s == '\'' || *s == '\\')
89             ++ret;
90         ++s;
91         --slen;
92     }
93     return ret;
94 }
95
96
97 /* returns number of chars added to escape "'"s and "\"s in s */
98 /* slen number of characters in s will be escaped */
99 /* destination must be long enough for additional chars */
100 static I32
101 esc_q(register char *d, register char *s, register STRLEN slen)
102 {
103     register I32 ret = 0;
104
105     while (slen > 0) {
106         switch (*s) {
107         case '\'':
108         case '\\':
109             *d = '\\';
110             ++d; ++ret;
111         default:
112             *d = *s;
113             ++d; ++s; --slen;
114             break;
115         }
116     }
117     return ret;
118 }
119
120 static I32
121 esc_q_utf8(pTHX_ SV* sv, register char *src, register STRLEN slen)
122 {
123     char *s, *send, *r, *rstart;
124     STRLEN j, cur = SvCUR(sv);
125     /* Could count 128-255 and 256+ in two variables, if we want to
126        be like &qquote and make a distinction.  */
127     STRLEN grow = 0;    /* bytes needed to represent chars 128+ */
128     /* STRLEN topbit_grow = 0;  bytes needed to represent chars 128-255 */
129     STRLEN backslashes = 0;
130     STRLEN single_quotes = 0;
131     STRLEN qq_escapables = 0;   /* " $ @ will need a \ in "" strings.  */
132     STRLEN normal = 0;
133
134     /* this will need EBCDICification */
135     for (s = src, send = src + slen; s < send; s += UTF8SKIP(s)) {
136         UV k = utf8_to_uvchr((U8*)s, NULL);
137
138         if (k > 127) {
139             /* 4: \x{} then count the number of hex digits.  */
140             grow += 4 + (k <= 0xFF ? 2 : k <= 0xFFF ? 3 : k <= 0xFFFF ? 4 :
141 #if UVSIZE == 4
142                 8 /* We may allocate a bit more than the minimum here.  */
143 #else
144                 k <= 0xFFFFFFFF ? 8 : UVSIZE * 4
145 #endif
146                 );
147         } else if (k == '\\') {
148             backslashes++;
149         } else if (k == '\'') {
150             single_quotes++;
151         } else if (k == '"' || k == '$' || k == '@') {
152             qq_escapables++;
153         } else {
154             normal++;
155         }
156     }
157     if (grow) {
158         /* We have something needing hex. 3 is ""\0 */
159         sv_grow(sv, cur + 3 + grow + 2*backslashes + single_quotes
160                 + 2*qq_escapables + normal);
161         rstart = r = SvPVX(sv) + cur;
162
163         *r++ = '"';
164
165         for (s = src; s < send; s += UTF8SKIP(s)) {
166             UV k = utf8_to_uvchr((U8*)s, NULL);
167
168             if (k == '"' || k == '\\' || k == '$' || k == '@') {
169                 *r++ = '\\';
170                 *r++ = (char)k;
171             }
172             else if (k < 0x80)
173                 *r++ = (char)k;
174             else {
175               /* The return value of sprintf() is unportable.
176                * In modern systems it returns (int) the number of characters,
177                * but in older systems it might return (char*) the original
178                * buffer, or it might even be (void).  The easiest portable
179                * thing to do is probably use sprintf() in void context and
180                * then strlen(buffer) for the length.  The more proper way
181                * would of course be to figure out the prototype of sprintf.
182                * --jhi */
183                 sprintf(r, "\\x{%"UVxf"}", k);
184                 r += strlen(r);
185             }
186         }
187         *r++ = '"';
188     } else {
189         /* Single quotes.  */
190         sv_grow(sv, cur + 3 + 2*backslashes + 2*single_quotes
191                 + qq_escapables + normal);
192         rstart = r = SvPVX(sv) + cur;
193         *r++ = '\'';
194         for (s = src; s < send; s ++) {
195             char k = *s;
196             if (k == '\'' || k == '\\')
197                 *r++ = '\\';
198             *r++ = k;
199         }
200         *r++ = '\'';
201     }
202     *r = '\0';
203     j = r - rstart;
204     SvCUR_set(sv, cur + j);
205
206     return j;
207 }
208
209 /* append a repeated string to an SV */
210 static SV *
211 sv_x(pTHX_ SV *sv, const char *str, STRLEN len, I32 n)
212 {
213     if (sv == Nullsv)
214         sv = newSVpvn("", 0);
215     else
216         assert(SvTYPE(sv) >= SVt_PV);
217
218     if (n > 0) {
219         SvGROW(sv, len*n + SvCUR(sv) + 1);
220         if (len == 1) {
221             char *start = SvPVX(sv) + SvCUR(sv);
222             SvCUR_set(sv, SvCUR(sv) + n);
223             start[n] = '\0';
224             while (n > 0)
225                 start[--n] = str[0];
226         }
227         else
228             while (n > 0) {
229                 sv_catpvn(sv, str, len);
230                 --n;
231             }
232     }
233     return sv;
234 }
235
236 /*
237  * This ought to be split into smaller functions. (it is one long function since
238  * it exactly parallels the perl version, which was one long thing for
239  * efficiency raisins.)  Ugggh!
240  */
241 static I32
242 DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
243         AV *postav, I32 *levelp, I32 indent, SV *pad, SV *xpad,
244         SV *apad, SV *sep, SV *pair, SV *freezer, SV *toaster, I32 purity,
245         I32 deepcopy, I32 quotekeys, SV *bless, I32 maxdepth, SV *sortkeys)
246 {
247     char tmpbuf[128];
248     U32 i;
249     char *c, *r, *realpack, id[128];
250     SV **svp;
251     SV *sv, *ipad, *ival;
252     SV *blesspad = Nullsv;
253     AV *seenentry = Nullav;
254     char *iname;
255     STRLEN inamelen, idlen = 0;
256     U32 realtype;
257
258     if (!val)
259         return 0;
260
261     realtype = SvTYPE(val);
262
263     if (SvGMAGICAL(val))
264         mg_get(val);
265     if (SvROK(val)) {
266
267         /* If a freeze method is provided and the object has it, call
268            it.  Warn on errors. */
269         if (SvOBJECT(SvRV(val)) && freezer &&
270             SvPOK(freezer) && SvCUR(freezer) &&
271             gv_fetchmeth(SvSTASH(SvRV(val)), SvPVX_const(freezer), 
272                          SvCUR(freezer), -1) != NULL)
273         {
274             dSP; ENTER; SAVETMPS; PUSHMARK(sp);
275             XPUSHs(val); PUTBACK;
276             i = perl_call_method(SvPVX_const(freezer), G_EVAL|G_VOID);
277             SPAGAIN;
278             if (SvTRUE(ERRSV))
279                 warn("WARNING(Freezer method call failed): %"SVf"", ERRSV);
280             PUTBACK; FREETMPS; LEAVE;
281         }
282         
283         ival = SvRV(val);
284         realtype = SvTYPE(ival);
285         (void) sprintf(id, "0x%"UVxf, PTR2UV(ival));
286         idlen = strlen(id);
287         if (SvOBJECT(ival))
288             realpack = HvNAME_get(SvSTASH(ival));
289         else
290             realpack = Nullch;
291
292         /* if it has a name, we need to either look it up, or keep a tab
293          * on it so we know when we hit it later
294          */
295         if (namelen) {
296             if ((svp = hv_fetch(seenhv, id, idlen, FALSE))
297                 && (sv = *svp) && SvROK(sv) && (seenentry = (AV*)SvRV(sv)))
298             {
299                 SV *othername;
300                 if ((svp = av_fetch(seenentry, 0, FALSE))
301                     && (othername = *svp))
302                 {
303                     if (purity && *levelp > 0) {
304                         SV *postentry;
305                         
306                         if (realtype == SVt_PVHV)
307                             sv_catpvn(retval, "{}", 2);
308                         else if (realtype == SVt_PVAV)
309                             sv_catpvn(retval, "[]", 2);
310                         else
311                             sv_catpvn(retval, "do{my $o}", 9);
312                         postentry = newSVpvn(name, namelen);
313                         sv_catpvn(postentry, " = ", 3);
314                         sv_catsv(postentry, othername);
315                         av_push(postav, postentry);
316                     }
317                     else {
318                         if (name[0] == '@' || name[0] == '%') {
319                             if ((SvPVX_const(othername))[0] == '\\' &&
320                                 (SvPVX_const(othername))[1] == name[0]) {
321                                 sv_catpvn(retval, SvPVX_const(othername)+1,
322                                           SvCUR(othername)-1);
323                             }
324                             else {
325                                 sv_catpvn(retval, name, 1);
326                                 sv_catpvn(retval, "{", 1);
327                                 sv_catsv(retval, othername);
328                                 sv_catpvn(retval, "}", 1);
329                             }
330                         }
331                         else
332                             sv_catsv(retval, othername);
333                     }
334                     return 1;
335                 }
336                 else {
337                     warn("ref name not found for %s", id);
338                     return 0;
339                 }
340             }
341             else {   /* store our name and continue */
342                 SV *namesv;
343                 if (name[0] == '@' || name[0] == '%') {
344                     namesv = newSVpvn("\\", 1);
345                     sv_catpvn(namesv, name, namelen);
346                 }
347                 else if (realtype == SVt_PVCV && name[0] == '*') {
348                     namesv = newSVpvn("\\", 2);
349                     sv_catpvn(namesv, name, namelen);
350                     (SvPVX(namesv))[1] = '&';
351                 }
352                 else
353                     namesv = newSVpvn(name, namelen);
354                 seenentry = newAV();
355                 av_push(seenentry, namesv);
356                 (void)SvREFCNT_inc(val);
357                 av_push(seenentry, val);
358                 (void)hv_store(seenhv, id, strlen(id),
359                                newRV_inc((SV*)seenentry), 0);
360                 SvREFCNT_dec(seenentry);
361             }
362         }
363
364         if (realpack && *realpack == 'R' && strEQ(realpack, "Regexp")) {
365             STRLEN rlen;
366             char *rval = SvPV(val, rlen);
367             char *slash = strchr(rval, '/');
368             sv_catpvn(retval, "qr/", 3);
369             while (slash) {
370                 sv_catpvn(retval, rval, slash-rval);
371                 sv_catpvn(retval, "\\/", 2);
372                 rlen -= slash-rval+1;
373                 rval = slash+1;
374                 slash = strchr(rval, '/');
375             }
376             sv_catpvn(retval, rval, rlen);
377             sv_catpvn(retval, "/", 1);
378             return 1;
379         }
380
381         /* If purity is not set and maxdepth is set, then check depth:
382          * if we have reached maximum depth, return the string
383          * representation of the thing we are currently examining
384          * at this depth (i.e., 'Foo=ARRAY(0xdeadbeef)').
385          */
386         if (!purity && maxdepth > 0 && *levelp >= maxdepth) {
387             STRLEN vallen;
388             char *valstr = SvPV(val,vallen);
389             sv_catpvn(retval, "'", 1);
390             sv_catpvn(retval, valstr, vallen);
391             sv_catpvn(retval, "'", 1);
392             return 1;
393         }
394
395         if (realpack) {                         /* we have a blessed ref */
396             STRLEN blesslen;
397             char *blessstr = SvPV(bless, blesslen);
398             sv_catpvn(retval, blessstr, blesslen);
399             sv_catpvn(retval, "( ", 2);
400             if (indent >= 2) {
401                 blesspad = apad;
402                 apad = newSVsv(apad);
403                 sv_x(aTHX_ apad, " ", 1, blesslen+2);
404             }
405         }
406
407         (*levelp)++;
408         ipad = sv_x(aTHX_ Nullsv, SvPVX_const(xpad), SvCUR(xpad), *levelp);
409
410         if (realtype <= SVt_PVBM) {                          /* scalar ref */
411             SV *namesv = newSVpvn("${", 2);
412             sv_catpvn(namesv, name, namelen);
413             sv_catpvn(namesv, "}", 1);
414             if (realpack) {                                  /* blessed */
415                 sv_catpvn(retval, "do{\\(my $o = ", 13);
416                 DD_dump(aTHX_ ival, SvPVX_const(namesv), SvCUR(namesv), retval, seenhv,
417                         postav, levelp, indent, pad, xpad, apad, sep, pair,
418                         freezer, toaster, purity, deepcopy, quotekeys, bless,
419                         maxdepth, sortkeys);
420                 sv_catpvn(retval, ")}", 2);
421             }                                                /* plain */
422             else {
423                 sv_catpvn(retval, "\\", 1);
424                 DD_dump(aTHX_ ival, SvPVX_const(namesv), SvCUR(namesv), retval, seenhv,
425                         postav, levelp, indent, pad, xpad, apad, sep, pair,
426                         freezer, toaster, purity, deepcopy, quotekeys, bless,
427                         maxdepth, sortkeys);
428             }
429             SvREFCNT_dec(namesv);
430         }
431         else if (realtype == SVt_PVGV) {                     /* glob ref */
432             SV *namesv = newSVpvn("*{", 2);
433             sv_catpvn(namesv, name, namelen);
434             sv_catpvn(namesv, "}", 1);
435             sv_catpvn(retval, "\\", 1);
436             DD_dump(aTHX_ ival, SvPVX_const(namesv), SvCUR(namesv), retval, seenhv,
437                     postav, levelp,     indent, pad, xpad, apad, sep, pair,
438                     freezer, toaster, purity, deepcopy, quotekeys, bless,
439                     maxdepth, sortkeys);
440             SvREFCNT_dec(namesv);
441         }
442         else if (realtype == SVt_PVAV) {
443             SV *totpad;
444             I32 ix = 0;
445             I32 ixmax = av_len((AV *)ival);
446         
447             SV *ixsv = newSViv(0);
448             /* allowing for a 24 char wide array index */
449             New(0, iname, namelen+28, char);
450             (void)strcpy(iname, name);
451             inamelen = namelen;
452             if (name[0] == '@') {
453                 sv_catpvn(retval, "(", 1);
454                 iname[0] = '$';
455             }
456             else {
457                 sv_catpvn(retval, "[", 1);
458                 /* omit "->" in $foo{bar}->[0], but not in ${$foo}->[0] */
459                 /*if (namelen > 0
460                     && name[namelen-1] != ']' && name[namelen-1] != '}'
461                     && (namelen < 4 || (name[1] != '{' && name[2] != '{')))*/
462                 if ((namelen > 0
463                      && name[namelen-1] != ']' && name[namelen-1] != '}')
464                     || (namelen > 4
465                         && (name[1] == '{'
466                             || (name[0] == '\\' && name[2] == '{'))))
467                 {
468                     iname[inamelen++] = '-'; iname[inamelen++] = '>';
469                     iname[inamelen] = '\0';
470                 }
471             }
472             if (iname[0] == '*' && iname[inamelen-1] == '}' && inamelen >= 8 &&
473                 (instr(iname+inamelen-8, "{SCALAR}") ||
474                  instr(iname+inamelen-7, "{ARRAY}") ||
475                  instr(iname+inamelen-6, "{HASH}"))) {
476                 iname[inamelen++] = '-'; iname[inamelen++] = '>';
477             }
478             iname[inamelen++] = '['; iname[inamelen] = '\0';
479             totpad = newSVsv(sep);
480             sv_catsv(totpad, pad);
481             sv_catsv(totpad, apad);
482
483             for (ix = 0; ix <= ixmax; ++ix) {
484                 STRLEN ilen;
485                 SV *elem;
486                 svp = av_fetch((AV*)ival, ix, FALSE);
487                 if (svp)
488                     elem = *svp;
489                 else
490                     elem = &PL_sv_undef;
491                 
492                 ilen = inamelen;
493                 sv_setiv(ixsv, ix);
494                 (void) sprintf(iname+ilen, "%"IVdf, (IV)ix);
495                 ilen = strlen(iname);
496                 iname[ilen++] = ']'; iname[ilen] = '\0';
497                 if (indent >= 3) {
498                     sv_catsv(retval, totpad);
499                     sv_catsv(retval, ipad);
500                     sv_catpvn(retval, "#", 1);
501                     sv_catsv(retval, ixsv);
502                 }
503                 sv_catsv(retval, totpad);
504                 sv_catsv(retval, ipad);
505                 DD_dump(aTHX_ elem, iname, ilen, retval, seenhv, postav,
506                         levelp, indent, pad, xpad, apad, sep, pair,
507                         freezer, toaster, purity, deepcopy, quotekeys, bless,
508                         maxdepth, sortkeys);
509                 if (ix < ixmax)
510                     sv_catpvn(retval, ",", 1);
511             }
512             if (ixmax >= 0) {
513                 SV *opad = sv_x(aTHX_ Nullsv, SvPVX_const(xpad), SvCUR(xpad), (*levelp)-1);
514                 sv_catsv(retval, totpad);
515                 sv_catsv(retval, opad);
516                 SvREFCNT_dec(opad);
517             }
518             if (name[0] == '@')
519                 sv_catpvn(retval, ")", 1);
520             else
521                 sv_catpvn(retval, "]", 1);
522             SvREFCNT_dec(ixsv);
523             SvREFCNT_dec(totpad);
524             Safefree(iname);
525         }
526         else if (realtype == SVt_PVHV) {
527             SV *totpad, *newapad;
528             SV *iname, *sname;
529             HE *entry;
530             char *key;
531             I32 klen;
532             SV *hval;
533             AV *keys = Nullav;
534         
535             iname = newSVpvn(name, namelen);
536             if (name[0] == '%') {
537                 sv_catpvn(retval, "(", 1);
538                 (SvPVX(iname))[0] = '$';
539             }
540             else {
541                 sv_catpvn(retval, "{", 1);
542                 /* omit "->" in $foo[0]->{bar}, but not in ${$foo}->{bar} */
543                 if ((namelen > 0
544                      && name[namelen-1] != ']' && name[namelen-1] != '}')
545                     || (namelen > 4
546                         && (name[1] == '{'
547                             || (name[0] == '\\' && name[2] == '{'))))
548                 {
549                     sv_catpvn(iname, "->", 2);
550                 }
551             }
552             if (name[0] == '*' && name[namelen-1] == '}' && namelen >= 8 &&
553                 (instr(name+namelen-8, "{SCALAR}") ||
554                  instr(name+namelen-7, "{ARRAY}") ||
555                  instr(name+namelen-6, "{HASH}"))) {
556                 sv_catpvn(iname, "->", 2);
557             }
558             sv_catpvn(iname, "{", 1);
559             totpad = newSVsv(sep);
560             sv_catsv(totpad, pad);
561             sv_catsv(totpad, apad);
562         
563             /* If requested, get a sorted/filtered array of hash keys */
564             if (sortkeys) {
565                 if (sortkeys == &PL_sv_yes) {
566 #if PERL_VERSION < 8
567                     sortkeys = sv_2mortal(newSVpvn("Data::Dumper::_sortkeys", 23));
568 #else
569                     keys = newAV();
570                     (void)hv_iterinit((HV*)ival);
571                     while ((entry = hv_iternext((HV*)ival))) {
572                         sv = hv_iterkeysv(entry);
573                         SvREFCNT_inc(sv);
574                         av_push(keys, sv);
575                     }
576 # ifdef USE_LOCALE_NUMERIC
577                     sortsv(AvARRAY(keys), 
578                            av_len(keys)+1, 
579                            IN_LOCALE ? Perl_sv_cmp_locale : Perl_sv_cmp);
580 # else
581                     sortsv(AvARRAY(keys), 
582                            av_len(keys)+1, 
583                            Perl_sv_cmp);
584 # endif
585 #endif
586                 }
587                 if (sortkeys != &PL_sv_yes) {
588                     dSP; ENTER; SAVETMPS; PUSHMARK(sp);
589                     XPUSHs(sv_2mortal(newRV_inc(ival))); PUTBACK;
590                     i = perl_call_sv(sortkeys, G_SCALAR | G_EVAL);
591                     SPAGAIN;
592                     if (i) {
593                         sv = POPs;
594                         if (SvROK(sv) && (SvTYPE(SvRV(sv)) == SVt_PVAV))
595                             keys = (AV*)SvREFCNT_inc(SvRV(sv));
596                     }
597                     if (! keys)
598                         warn("Sortkeys subroutine did not return ARRAYREF\n");
599                     PUTBACK; FREETMPS; LEAVE;
600                 }
601                 if (keys)
602                     sv_2mortal((SV*)keys);
603             }
604             else
605                 (void)hv_iterinit((HV*)ival);
606
607             /* foreach (keys %hash) */
608             for (i = 0; 1; i++) {
609                 char *nkey;
610                 char *nkey_buffer = NULL;
611                 I32 nticks = 0;
612                 SV* keysv;
613                 STRLEN keylen;
614                 I32 nlen;
615                 bool do_utf8 = FALSE;
616
617                 if ((sortkeys && !(keys && (I32)i <= av_len(keys))) ||
618                     !(entry = hv_iternext((HV *)ival)))
619                     break;
620
621                 if (i)
622                     sv_catpvn(retval, ",", 1);
623
624                 if (sortkeys) {
625                     char *key;
626                     svp = av_fetch(keys, i, FALSE);
627                     keysv = svp ? *svp : sv_mortalcopy(&PL_sv_undef);
628                     key = SvPV(keysv, keylen);
629                     svp = hv_fetch((HV*)ival, key,
630                                    SvUTF8(keysv) ? -(I32)keylen : keylen, 0);
631                     hval = svp ? *svp : sv_mortalcopy(&PL_sv_undef);
632                 }
633                 else {
634                     keysv = hv_iterkeysv(entry);
635                     hval = hv_iterval((HV*)ival, entry);
636                 }
637
638                 do_utf8 = DO_UTF8(keysv);
639                 key = SvPV(keysv, keylen);
640                 klen = keylen;
641
642                 sv_catsv(retval, totpad);
643                 sv_catsv(retval, ipad);
644                 /* old logic was first to check utf8 flag, and if utf8 always
645                    call esc_q_utf8.  This caused test to break under -Mutf8,
646                    because there even strings like 'c' have utf8 flag on.
647                    Hence with quotekeys == 0 the XS code would still '' quote
648                    them based on flags, whereas the perl code would not,
649                    based on regexps.
650                    The perl code is correct.
651                    needs_quote() decides that anything that isn't a valid
652                    perl identifier needs to be quoted, hence only correctly
653                    formed strings with no characters outside [A-Za-z0-9_:]
654                    won't need quoting.  None of those characters are used in
655                    the byte encoding of utf8, so anything with utf8
656                    encoded characters in will need quoting. Hence strings
657                    with utf8 encoded characters in will end up inside do_utf8
658                    just like before, but now strings with utf8 flag set but
659                    only ascii characters will end up in the unquoted section.
660
661                    There should also be less tests for the (probably currently)
662                    more common doesn't need quoting case.
663                    The code is also smaller (22044 vs 22260) because I've been
664                    able to pull the common logic out to both sides.  */
665                 if (quotekeys || needs_quote(key)) {
666                     if (do_utf8) {
667                         STRLEN ocur = SvCUR(retval);
668                         nlen = esc_q_utf8(aTHX_ retval, key, klen);
669                         nkey = SvPVX(retval) + ocur;
670                     }
671                     else {
672                         nticks = num_q(key, klen);
673                         New(0, nkey_buffer, klen+nticks+3, char);
674                         nkey = nkey_buffer;
675                         nkey[0] = '\'';
676                         if (nticks)
677                             klen += esc_q(nkey+1, key, klen);
678                         else
679                             (void)Copy(key, nkey+1, klen, char);
680                         nkey[++klen] = '\'';
681                         nkey[++klen] = '\0';
682                         nlen = klen;
683                         sv_catpvn(retval, nkey, klen);
684                     }
685                 }
686                 else {
687                     nkey = key;
688                     nlen = klen;
689                     sv_catpvn(retval, nkey, klen);
690                 }
691                 sname = newSVsv(iname);
692                 sv_catpvn(sname, nkey, nlen);
693                 sv_catpvn(sname, "}", 1);
694
695                 sv_catsv(retval, pair);
696                 if (indent >= 2) {
697                     char *extra;
698                     I32 elen = 0;
699                     newapad = newSVsv(apad);
700                     New(0, extra, klen+4+1, char);
701                     while (elen < (klen+4))
702                         extra[elen++] = ' ';
703                     extra[elen] = '\0';
704                     sv_catpvn(newapad, extra, elen);
705                     Safefree(extra);
706                 }
707                 else
708                     newapad = apad;
709
710                 DD_dump(aTHX_ hval, SvPVX_const(sname), SvCUR(sname), retval, seenhv,
711                         postav, levelp, indent, pad, xpad, newapad, sep, pair,
712                         freezer, toaster, purity, deepcopy, quotekeys, bless,
713                         maxdepth, sortkeys);
714                 SvREFCNT_dec(sname);
715                 Safefree(nkey_buffer);
716                 if (indent >= 2)
717                     SvREFCNT_dec(newapad);
718             }
719             if (i) {
720                 SV *opad = sv_x(aTHX_ Nullsv, SvPVX_const(xpad), SvCUR(xpad), *levelp-1);
721                 sv_catsv(retval, totpad);
722                 sv_catsv(retval, opad);
723                 SvREFCNT_dec(opad);
724             }
725             if (name[0] == '%')
726                 sv_catpvn(retval, ")", 1);
727             else
728                 sv_catpvn(retval, "}", 1);
729             SvREFCNT_dec(iname);
730             SvREFCNT_dec(totpad);
731         }
732         else if (realtype == SVt_PVCV) {
733             sv_catpvn(retval, "sub { \"DUMMY\" }", 15);
734             if (purity)
735                 warn("Encountered CODE ref, using dummy placeholder");
736         }
737         else {
738             warn("cannot handle ref type %ld", realtype);
739         }
740
741         if (realpack) {  /* free blessed allocs */
742             if (indent >= 2) {
743                 SvREFCNT_dec(apad);
744                 apad = blesspad;
745             }
746             sv_catpvn(retval, ", '", 3);
747             sv_catpvn(retval, realpack, strlen(realpack));
748             sv_catpvn(retval, "' )", 3);
749             if (toaster && SvPOK(toaster) && SvCUR(toaster)) {
750                 sv_catpvn(retval, "->", 2);
751                 sv_catsv(retval, toaster);
752                 sv_catpvn(retval, "()", 2);
753             }
754         }
755         SvREFCNT_dec(ipad);
756         (*levelp)--;
757     }
758     else {
759         STRLEN i;
760         
761         if (namelen) {
762             (void) sprintf(id, "0x%"UVxf, PTR2UV(val));
763             if ((svp = hv_fetch(seenhv, id, (idlen = strlen(id)), FALSE)) &&
764                 (sv = *svp) && SvROK(sv) &&
765                 (seenentry = (AV*)SvRV(sv)))
766             {
767                 SV *othername;
768                 if ((svp = av_fetch(seenentry, 0, FALSE)) && (othername = *svp)
769                     && (svp = av_fetch(seenentry, 2, FALSE)) && *svp && SvIV(*svp) > 0)
770                 {
771                     sv_catpvn(retval, "${", 2);
772                     sv_catsv(retval, othername);
773                     sv_catpvn(retval, "}", 1);
774                     return 1;
775                 }
776             }
777             else if (val != &PL_sv_undef) {
778                 SV *namesv;
779                 namesv = newSVpvn("\\", 1);
780                 sv_catpvn(namesv, name, namelen);
781                 seenentry = newAV();
782                 av_push(seenentry, namesv);
783                 av_push(seenentry, newRV_inc(val));
784                 (void)hv_store(seenhv, id, strlen(id), newRV_inc((SV*)seenentry), 0);
785                 SvREFCNT_dec(seenentry);
786             }
787         }
788
789         if (DD_is_integer(val)) {
790             STRLEN len;
791             if (SvIsUV(val))
792               (void) sprintf(tmpbuf, "%"UVuf, SvUV(val));
793             else
794               (void) sprintf(tmpbuf, "%"IVdf, SvIV(val));
795             len = strlen(tmpbuf);
796             if (SvPOK(val)) {
797               /* Need to check to see if this is a string such as " 0".
798                  I'm assuming from sprintf isn't going to clash with utf8.
799                  Is this valid on EBCDIC?  */
800               STRLEN pvlen;
801               const char *pv = SvPV(val, pvlen);
802               if (pvlen != len || memNE(pv, tmpbuf, len))
803                 goto integer_came_from_string;
804             }
805             if (len > 10) {
806               /* Looks like we're on a 64 bit system.  Make it a string so that
807                  if a 32 bit system reads the number it will cope better.  */
808               sv_catpvf(retval, "'%s'", tmpbuf);
809             } else
810               sv_catpvn(retval, tmpbuf, len);
811         }
812         else if (realtype == SVt_PVGV) {/* GLOBs can end up with scribbly names */
813             c = SvPV(val, i);
814             ++c; --i;                   /* just get the name */
815             if (i >= 6 && strncmp(c, "main::", 6) == 0) {
816                 c += 4;
817                 i -= 4;
818             }
819             if (needs_quote(c)) {
820                 sv_grow(retval, SvCUR(retval)+6+2*i);
821                 r = SvPVX(retval)+SvCUR(retval);
822                 r[0] = '*'; r[1] = '{'; r[2] = '\'';
823                 i += esc_q(r+3, c, i);
824                 i += 3;
825                 r[i++] = '\''; r[i++] = '}';
826                 r[i] = '\0';
827             }
828             else {
829                 sv_grow(retval, SvCUR(retval)+i+2);
830                 r = SvPVX(retval)+SvCUR(retval);
831                 r[0] = '*'; strcpy(r+1, c);
832                 i++;
833             }
834             SvCUR_set(retval, SvCUR(retval)+i);
835
836             if (purity) {
837                 static const char* const entries[] = { "{SCALAR}", "{ARRAY}", "{HASH}" };
838                 static const STRLEN sizes[] = { 8, 7, 6 };
839                 SV *e;
840                 SV *nname = newSVpvn("", 0);
841                 SV *newapad = newSVpvn("", 0);
842                 GV *gv = (GV*)val;
843                 I32 j;
844                 
845                 for (j=0; j<3; j++) {
846                     e = ((j == 0) ? GvSV(gv) : (j == 1) ? (SV*)GvAV(gv) : (SV*)GvHV(gv));
847                     if (!e)
848                         continue;
849                     if (j == 0 && !SvOK(e))
850                         continue;
851
852                     {
853                         I32 nlevel = 0;
854                         SV *postentry = newSVpvn(r,i);
855                         
856                         sv_setsv(nname, postentry);
857                         sv_catpvn(nname, entries[j], sizes[j]);
858                         sv_catpvn(postentry, " = ", 3);
859                         av_push(postav, postentry);
860                         e = newRV_inc(e);
861                         
862                         SvCUR_set(newapad, 0);
863                         if (indent >= 2)
864                             (void)sv_x(aTHX_ newapad, " ", 1, SvCUR(postentry));
865                         
866                         DD_dump(aTHX_ e, SvPVX_const(nname), SvCUR(nname), postentry,
867                                 seenhv, postav, &nlevel, indent, pad, xpad,
868                                 newapad, sep, pair, freezer, toaster, purity,
869                                 deepcopy, quotekeys, bless, maxdepth, 
870                                 sortkeys);
871                         SvREFCNT_dec(e);
872                     }
873                 }
874                 
875                 SvREFCNT_dec(newapad);
876                 SvREFCNT_dec(nname);
877             }
878         }
879         else if (val == &PL_sv_undef || !SvOK(val)) {
880             sv_catpvn(retval, "undef", 5);
881         }
882         else {
883         integer_came_from_string:
884             c = SvPV(val, i);
885             if (DO_UTF8(val))
886                 i += esc_q_utf8(aTHX_ retval, c, i);
887             else {
888                 sv_grow(retval, SvCUR(retval)+3+2*i); /* 3: ""\0 */
889                 r = SvPVX(retval) + SvCUR(retval);
890                 r[0] = '\'';
891                 i += esc_q(r+1, c, i);
892                 ++i;
893                 r[i++] = '\'';
894                 r[i] = '\0';
895                 SvCUR_set(retval, SvCUR(retval)+i);
896             }
897         }
898     }
899
900     if (idlen) {
901         if (deepcopy)
902             (void)hv_delete(seenhv, id, idlen, G_DISCARD);
903         else if (namelen && seenentry) {
904             SV *mark = *av_fetch(seenentry, 2, TRUE);
905             sv_setiv(mark,1);
906         }
907     }
908     return 1;
909 }
910
911
912 MODULE = Data::Dumper           PACKAGE = Data::Dumper         PREFIX = Data_Dumper_
913
914 #
915 # This is the exact equivalent of Dump.  Well, almost. The things that are
916 # different as of now (due to Laziness):
917 #   * doesnt do double-quotes yet.
918 #
919
920 void
921 Data_Dumper_Dumpxs(href, ...)
922         SV      *href;
923         PROTOTYPE: $;$$
924         PPCODE:
925         {
926             HV *hv;
927             SV *retval, *valstr;
928             HV *seenhv = Nullhv;
929             AV *postav, *todumpav, *namesav;
930             I32 level = 0;
931             I32 indent, terse, i, imax, postlen;
932             SV **svp;
933             SV *val, *name, *pad, *xpad, *apad, *sep, *pair, *varname;
934             SV *freezer, *toaster, *bless, *sortkeys;
935             I32 purity, deepcopy, quotekeys, maxdepth = 0;
936             char tmpbuf[1024];
937             I32 gimme = GIMME;
938
939             if (!SvROK(href)) {         /* call new to get an object first */
940                 if (items < 2)
941                     croak("Usage: Data::Dumper::Dumpxs(PACKAGE, VAL_ARY_REF, [NAME_ARY_REF])");
942                 
943                 ENTER;
944                 SAVETMPS;
945                 
946                 PUSHMARK(sp);
947                 XPUSHs(href);
948                 XPUSHs(sv_2mortal(newSVsv(ST(1))));
949                 if (items >= 3)
950                     XPUSHs(sv_2mortal(newSVsv(ST(2))));
951                 PUTBACK;
952                 i = perl_call_method("new", G_SCALAR);
953                 SPAGAIN;
954                 if (i)
955                     href = newSVsv(POPs);
956
957                 PUTBACK;
958                 FREETMPS;
959                 LEAVE;
960                 if (i)
961                     (void)sv_2mortal(href);
962             }
963
964             todumpav = namesav = Nullav;
965             seenhv = Nullhv;
966             val = pad = xpad = apad = sep = pair = varname
967                 = freezer = toaster = bless = &PL_sv_undef;
968             name = sv_newmortal();
969             indent = 2;
970             terse = purity = deepcopy = 0;
971             quotekeys = 1;
972         
973             retval = newSVpvn("", 0);
974             if (SvROK(href)
975                 && (hv = (HV*)SvRV((SV*)href))
976                 && SvTYPE(hv) == SVt_PVHV)              {
977
978                 if ((svp = hv_fetch(hv, "seen", 4, FALSE)) && SvROK(*svp))
979                     seenhv = (HV*)SvRV(*svp);
980                 if ((svp = hv_fetch(hv, "todump", 6, FALSE)) && SvROK(*svp))
981                     todumpav = (AV*)SvRV(*svp);
982                 if ((svp = hv_fetch(hv, "names", 5, FALSE)) && SvROK(*svp))
983                     namesav = (AV*)SvRV(*svp);
984                 if ((svp = hv_fetch(hv, "indent", 6, FALSE)))
985                     indent = SvIV(*svp);
986                 if ((svp = hv_fetch(hv, "purity", 6, FALSE)))
987                     purity = SvIV(*svp);
988                 if ((svp = hv_fetch(hv, "terse", 5, FALSE)))
989                     terse = SvTRUE(*svp);
990 #if 0 /* useqq currently unused */
991                 if ((svp = hv_fetch(hv, "useqq", 5, FALSE)))
992                     useqq = SvTRUE(*svp);
993 #endif
994                 if ((svp = hv_fetch(hv, "pad", 3, FALSE)))
995                     pad = *svp;
996                 if ((svp = hv_fetch(hv, "xpad", 4, FALSE)))
997                     xpad = *svp;
998                 if ((svp = hv_fetch(hv, "apad", 4, FALSE)))
999                     apad = *svp;
1000                 if ((svp = hv_fetch(hv, "sep", 3, FALSE)))
1001                     sep = *svp;
1002                 if ((svp = hv_fetch(hv, "pair", 4, FALSE)))
1003                     pair = *svp;
1004                 if ((svp = hv_fetch(hv, "varname", 7, FALSE)))
1005                     varname = *svp;
1006                 if ((svp = hv_fetch(hv, "freezer", 7, FALSE)))
1007                     freezer = *svp;
1008                 if ((svp = hv_fetch(hv, "toaster", 7, FALSE)))
1009                     toaster = *svp;
1010                 if ((svp = hv_fetch(hv, "deepcopy", 8, FALSE)))
1011                     deepcopy = SvTRUE(*svp);
1012                 if ((svp = hv_fetch(hv, "quotekeys", 9, FALSE)))
1013                     quotekeys = SvTRUE(*svp);
1014                 if ((svp = hv_fetch(hv, "bless", 5, FALSE)))
1015                     bless = *svp;
1016                 if ((svp = hv_fetch(hv, "maxdepth", 8, FALSE)))
1017                     maxdepth = SvIV(*svp);
1018                 if ((svp = hv_fetch(hv, "sortkeys", 8, FALSE))) {
1019                     sortkeys = *svp;
1020                     if (! SvTRUE(sortkeys))
1021                         sortkeys = NULL;
1022                     else if (! (SvROK(sortkeys) &&
1023                                 SvTYPE(SvRV(sortkeys)) == SVt_PVCV) )
1024                     {
1025                         /* flag to use qsortsv() for sorting hash keys */       
1026                         sortkeys = &PL_sv_yes; 
1027                     }
1028                 }
1029                 postav = newAV();
1030
1031                 if (todumpav)
1032                     imax = av_len(todumpav);
1033                 else
1034                     imax = -1;
1035                 valstr = newSVpvn("",0);
1036                 for (i = 0; i <= imax; ++i) {
1037                     SV *newapad;
1038                 
1039                     av_clear(postav);
1040                     if ((svp = av_fetch(todumpav, i, FALSE)))
1041                         val = *svp;
1042                     else
1043                         val = &PL_sv_undef;
1044                     if ((svp = av_fetch(namesav, i, TRUE)))
1045                         sv_setsv(name, *svp);
1046                     else
1047                         (void)SvOK_off(name);
1048                 
1049                     if (SvOK(name)) {
1050                         if ((SvPVX_const(name))[0] == '*') {
1051                             if (SvROK(val)) {
1052                                 switch (SvTYPE(SvRV(val))) {
1053                                 case SVt_PVAV:
1054                                     (SvPVX(name))[0] = '@';
1055                                     break;
1056                                 case SVt_PVHV:
1057                                     (SvPVX(name))[0] = '%';
1058                                     break;
1059                                 case SVt_PVCV:
1060                                     (SvPVX(name))[0] = '*';
1061                                     break;
1062                                 default:
1063                                     (SvPVX(name))[0] = '$';
1064                                     break;
1065                                 }
1066                             }
1067                             else
1068                                 (SvPVX(name))[0] = '$';
1069                         }
1070                         else if ((SvPVX_const(name))[0] != '$')
1071                             sv_insert(name, 0, 0, "$", 1);
1072                     }
1073                     else {
1074                         STRLEN nchars = 0;
1075                         sv_setpvn(name, "$", 1);
1076                         sv_catsv(name, varname);
1077                         (void) sprintf(tmpbuf, "%"IVdf, (IV)(i+1));
1078                         nchars = strlen(tmpbuf);
1079                         sv_catpvn(name, tmpbuf, nchars);
1080                     }
1081                 
1082                     if (indent >= 2) {
1083                         SV *tmpsv = sv_x(aTHX_ Nullsv, " ", 1, SvCUR(name)+3);
1084                         newapad = newSVsv(apad);
1085                         sv_catsv(newapad, tmpsv);
1086                         SvREFCNT_dec(tmpsv);
1087                     }
1088                     else
1089                         newapad = apad;
1090                 
1091                     DD_dump(aTHX_ val, SvPVX_const(name), SvCUR(name), valstr, seenhv,
1092                             postav, &level, indent, pad, xpad, newapad, sep, pair,
1093                             freezer, toaster, purity, deepcopy, quotekeys,
1094                             bless, maxdepth, sortkeys);
1095                 
1096                     if (indent >= 2)
1097                         SvREFCNT_dec(newapad);
1098
1099                     postlen = av_len(postav);
1100                     if (postlen >= 0 || !terse) {
1101                         sv_insert(valstr, 0, 0, " = ", 3);
1102                         sv_insert(valstr, 0, 0, SvPVX_const(name), SvCUR(name));
1103                         sv_catpvn(valstr, ";", 1);
1104                     }
1105                     sv_catsv(retval, pad);
1106                     sv_catsv(retval, valstr);
1107                     sv_catsv(retval, sep);
1108                     if (postlen >= 0) {
1109                         I32 i;
1110                         sv_catsv(retval, pad);
1111                         for (i = 0; i <= postlen; ++i) {
1112                             SV *elem;
1113                             svp = av_fetch(postav, i, FALSE);
1114                             if (svp && (elem = *svp)) {
1115                                 sv_catsv(retval, elem);
1116                                 if (i < postlen) {
1117                                     sv_catpvn(retval, ";", 1);
1118                                     sv_catsv(retval, sep);
1119                                     sv_catsv(retval, pad);
1120                                 }
1121                             }
1122                         }
1123                         sv_catpvn(retval, ";", 1);
1124                             sv_catsv(retval, sep);
1125                     }
1126                     sv_setpvn(valstr, "", 0);
1127                     if (gimme == G_ARRAY) {
1128                         XPUSHs(sv_2mortal(retval));
1129                         if (i < imax)   /* not the last time thro ? */
1130                             retval = newSVpvn("",0);
1131                     }
1132                 }
1133                 SvREFCNT_dec(postav);
1134                 SvREFCNT_dec(valstr);
1135             }
1136             else
1137                 croak("Call to new() method failed to return HASH ref");
1138             if (gimme == G_SCALAR)
1139                 XPUSHs(sv_2mortal(retval));
1140         }