This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Refactoring to Sv*_set() macros - patch #5
[perl5.git] / ext / Data / Dumper / Dumper.xs
1 #define PERL_NO_GET_CONTEXT
2 #include "EXTERN.h"
3 #include "perl.h"
4 #include "XSUB.h"
5
6 static I32 num_q (char *s, STRLEN slen);
7 static I32 esc_q (char *dest, char *src, STRLEN slen);
8 static I32 esc_q_utf8 (pTHX_ SV *sv, char *src, STRLEN slen);
9 static SV *sv_x (pTHX_ SV *sv, char *str, STRLEN len, I32 n);
10 static I32 DD_dump (pTHX_ SV *val, char *name, STRLEN namelen, SV *retval,
11                     HV *seenhv, AV *postav, I32 *levelp, I32 indent,
12                     SV *pad, SV *xpad, SV *apad, SV *sep, SV *pair,
13                     SV *freezer, SV *toaster,
14                     I32 purity, I32 deepcopy, I32 quotekeys, SV *bless,
15                     I32 maxdepth, SV *sortkeys);
16
17 #if PERL_VERSION <= 6 /* Perl 5.6 and earlier */
18
19 # ifdef EBCDIC
20 #  define UNI_TO_NATIVE(ch) (((ch) > 255) ? (ch) : ASCII_TO_NATIVE(ch))
21 # else
22 #  define UNI_TO_NATIVE(ch) (ch)
23 # endif
24
25 UV
26 Perl_utf8_to_uvchr(pTHX_ U8 *s, STRLEN *retlen)
27 {
28     UV uv = utf8_to_uv(s, UTF8_MAXLEN, retlen,
29                     ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
30     return UNI_TO_NATIVE(uv);
31 }
32
33 # if !defined(PERL_IMPLICIT_CONTEXT)
34 #  define utf8_to_uvchr      Perl_utf8_to_uvchr
35 # else
36 #  define utf8_to_uvchr(a,b) Perl_utf8_to_uvchr(aTHX_ a,b)
37 # endif
38
39 #endif /* PERL_VERSION <= 6 */
40
41 /* Changes in 5.7 series mean that now IOK is only set if scalar is
42    precisely integer but in 5.6 and earlier we need to do a more
43    complex test  */
44 #if PERL_VERSION <= 6
45 #define DD_is_integer(sv) (SvIOK(sv) && (SvIsUV(val) ? SvUV(sv) == SvNV(sv) : SvIV(sv) == SvNV(sv)))
46 #else
47 #define DD_is_integer(sv) SvIOK(sv)
48 #endif
49
50 /* does a string need to be protected? */
51 static I32
52 needs_quote(register char *s)
53 {
54 TOP:
55     if (s[0] == ':') {
56         if (*++s) {
57             if (*s++ != ':')
58                 return 1;
59         }
60         else
61             return 1;
62     }
63     if (isIDFIRST(*s)) {
64         while (*++s)
65             if (!isALNUM(*s)) {
66                 if (*s == ':')
67                     goto TOP;
68                 else
69                     return 1;
70             }
71     }
72     else
73         return 1;
74     return 0;
75 }
76
77 /* count the number of "'"s and "\"s in string */
78 static I32
79 num_q(register char *s, register STRLEN slen)
80 {
81     register I32 ret = 0;
82
83     while (slen > 0) {
84         if (*s == '\'' || *s == '\\')
85             ++ret;
86         ++s;
87         --slen;
88     }
89     return ret;
90 }
91
92
93 /* returns number of chars added to escape "'"s and "\"s in s */
94 /* slen number of characters in s will be escaped */
95 /* destination must be long enough for additional chars */
96 static I32
97 esc_q(register char *d, register char *s, register STRLEN slen)
98 {
99     register I32 ret = 0;
100
101     while (slen > 0) {
102         switch (*s) {
103         case '\'':
104         case '\\':
105             *d = '\\';
106             ++d; ++ret;
107         default:
108             *d = *s;
109             ++d; ++s; --slen;
110             break;
111         }
112     }
113     return ret;
114 }
115
116 static I32
117 esc_q_utf8(pTHX_ SV* sv, register char *src, register STRLEN slen)
118 {
119     char *s, *send, *r, *rstart;
120     STRLEN j, cur = SvCUR(sv);
121     /* Could count 128-255 and 256+ in two variables, if we want to
122        be like &qquote and make a distinction.  */
123     STRLEN grow = 0;    /* bytes needed to represent chars 128+ */
124     /* STRLEN topbit_grow = 0;  bytes needed to represent chars 128-255 */
125     STRLEN backslashes = 0;
126     STRLEN single_quotes = 0;
127     STRLEN qq_escapables = 0;   /* " $ @ will need a \ in "" strings.  */
128     STRLEN normal = 0;
129
130     /* this will need EBCDICification */
131     for (s = src, send = src + slen; s < send; s += UTF8SKIP(s)) {
132         UV k = utf8_to_uvchr((U8*)s, NULL);
133
134         if (k > 127) {
135             /* 4: \x{} then count the number of hex digits.  */
136             grow += 4 + (k <= 0xFF ? 2 : k <= 0xFFF ? 3 : k <= 0xFFFF ? 4 :
137 #if UVSIZE == 4
138                 8 /* We may allocate a bit more than the minimum here.  */
139 #else
140                 k <= 0xFFFFFFFF ? 8 : UVSIZE * 4
141 #endif
142                 );
143         } else if (k == '\\') {
144             backslashes++;
145         } else if (k == '\'') {
146             single_quotes++;
147         } else if (k == '"' || k == '$' || k == '@') {
148             qq_escapables++;
149         } else {
150             normal++;
151         }
152     }
153     if (grow) {
154         /* We have something needing hex. 3 is ""\0 */
155         sv_grow(sv, cur + 3 + grow + 2*backslashes + single_quotes
156                 + 2*qq_escapables + normal);
157         rstart = r = SvPVX(sv) + cur;
158
159         *r++ = '"';
160
161         for (s = src; s < send; s += UTF8SKIP(s)) {
162             UV k = utf8_to_uvchr((U8*)s, NULL);
163
164             if (k == '"' || k == '\\' || k == '$' || k == '@') {
165                 *r++ = '\\';
166                 *r++ = (char)k;
167             }
168             else if (k < 0x80)
169                 *r++ = (char)k;
170             else {
171               /* The return value of sprintf() is unportable.
172                * In modern systems it returns (int) the number of characters,
173                * but in older systems it might return (char*) the original
174                * buffer, or it might even be (void).  The easiest portable
175                * thing to do is probably use sprintf() in void context and
176                * then strlen(buffer) for the length.  The more proper way
177                * would of course be to figure out the prototype of sprintf.
178                * --jhi */
179                 sprintf(r, "\\x{%"UVxf"}", k);
180                 r += strlen(r);
181             }
182         }
183         *r++ = '"';
184     } else {
185         /* Single quotes.  */
186         sv_grow(sv, cur + 3 + 2*backslashes + 2*single_quotes
187                 + qq_escapables + normal);
188         rstart = r = SvPVX(sv) + cur;
189         *r++ = '\'';
190         for (s = src; s < send; s ++) {
191             char k = *s;
192             if (k == '\'' || k == '\\')
193                 *r++ = '\\';
194             *r++ = k;
195         }
196         *r++ = '\'';
197     }
198     *r = '\0';
199     j = r - rstart;
200     SvCUR_set(sv, cur + j);
201
202     return j;
203 }
204
205 /* append a repeated string to an SV */
206 static SV *
207 sv_x(pTHX_ SV *sv, register char *str, STRLEN len, I32 n)
208 {
209     if (sv == Nullsv)
210         sv = newSVpvn("", 0);
211     else
212         assert(SvTYPE(sv) >= SVt_PV);
213
214     if (n > 0) {
215         SvGROW(sv, len*n + SvCUR(sv) + 1);
216         if (len == 1) {
217             char *start = SvPVX(sv) + SvCUR(sv);
218             SvCUR_set(sv, SvCUR(sv) + n);
219             start[n] = '\0';
220             while (n > 0)
221                 start[--n] = str[0];
222         }
223         else
224             while (n > 0) {
225                 sv_catpvn(sv, str, len);
226                 --n;
227             }
228     }
229     return sv;
230 }
231
232 /*
233  * This ought to be split into smaller functions. (it is one long function since
234  * it exactly parallels the perl version, which was one long thing for
235  * efficiency raisins.)  Ugggh!
236  */
237 static I32
238 DD_dump(pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, HV *seenhv,
239         AV *postav, I32 *levelp, I32 indent, SV *pad, SV *xpad,
240         SV *apad, SV *sep, SV *pair, SV *freezer, SV *toaster, I32 purity,
241         I32 deepcopy, I32 quotekeys, SV *bless, I32 maxdepth, SV *sortkeys)
242 {
243     char tmpbuf[128];
244     U32 i;
245     char *c, *r, *realpack, id[128];
246     SV **svp;
247     SV *sv, *ipad, *ival;
248     SV *blesspad = Nullsv;
249     AV *seenentry = Nullav;
250     char *iname;
251     STRLEN inamelen, idlen = 0;
252     U32 realtype;
253
254     if (!val)
255         return 0;
256
257     realtype = SvTYPE(val);
258
259     if (SvGMAGICAL(val))
260         mg_get(val);
261     if (SvROK(val)) {
262
263         /* If a freeze method is provided and the object has it, call
264            it.  Warn on errors. */
265         if (SvOBJECT(SvRV(val)) && freezer &&
266             SvPOK(freezer) && SvCUR(freezer) &&
267             gv_fetchmeth(SvSTASH(SvRV(val)), SvPVX(freezer), 
268                          SvCUR(freezer), -1) != NULL)
269         {
270             dSP; ENTER; SAVETMPS; PUSHMARK(sp);
271             XPUSHs(val); PUTBACK;
272             i = perl_call_method(SvPVX(freezer), G_EVAL|G_VOID);
273             SPAGAIN;
274             if (SvTRUE(ERRSV))
275                 warn("WARNING(Freezer method call failed): %"SVf"", ERRSV);
276             PUTBACK; FREETMPS; LEAVE;
277         }
278         
279         ival = SvRV(val);
280         realtype = SvTYPE(ival);
281         (void) sprintf(id, "0x%"UVxf, PTR2UV(ival));
282         idlen = strlen(id);
283         if (SvOBJECT(ival))
284             realpack = HvNAME(SvSTASH(ival));
285         else
286             realpack = Nullch;
287
288         /* if it has a name, we need to either look it up, or keep a tab
289          * on it so we know when we hit it later
290          */
291         if (namelen) {
292             if ((svp = hv_fetch(seenhv, id, idlen, FALSE))
293                 && (sv = *svp) && SvROK(sv) && (seenentry = (AV*)SvRV(sv)))
294             {
295                 SV *othername;
296                 if ((svp = av_fetch(seenentry, 0, FALSE))
297                     && (othername = *svp))
298                 {
299                     if (purity && *levelp > 0) {
300                         SV *postentry;
301                         
302                         if (realtype == SVt_PVHV)
303                             sv_catpvn(retval, "{}", 2);
304                         else if (realtype == SVt_PVAV)
305                             sv_catpvn(retval, "[]", 2);
306                         else
307                             sv_catpvn(retval, "do{my $o}", 9);
308                         postentry = newSVpvn(name, namelen);
309                         sv_catpvn(postentry, " = ", 3);
310                         sv_catsv(postentry, othername);
311                         av_push(postav, postentry);
312                     }
313                     else {
314                         if (name[0] == '@' || name[0] == '%') {
315                             if ((SvPVX(othername))[0] == '\\' &&
316                                 (SvPVX(othername))[1] == name[0]) {
317                                 sv_catpvn(retval, SvPVX(othername)+1,
318                                           SvCUR(othername)-1);
319                             }
320                             else {
321                                 sv_catpvn(retval, name, 1);
322                                 sv_catpvn(retval, "{", 1);
323                                 sv_catsv(retval, othername);
324                                 sv_catpvn(retval, "}", 1);
325                             }
326                         }
327                         else
328                             sv_catsv(retval, othername);
329                     }
330                     return 1;
331                 }
332                 else {
333                     warn("ref name not found for %s", id);
334                     return 0;
335                 }
336             }
337             else {   /* store our name and continue */
338                 SV *namesv;
339                 if (name[0] == '@' || name[0] == '%') {
340                     namesv = newSVpvn("\\", 1);
341                     sv_catpvn(namesv, name, namelen);
342                 }
343                 else if (realtype == SVt_PVCV && name[0] == '*') {
344                     namesv = newSVpvn("\\", 2);
345                     sv_catpvn(namesv, name, namelen);
346                     (SvPVX(namesv))[1] = '&';
347                 }
348                 else
349                     namesv = newSVpvn(name, namelen);
350                 seenentry = newAV();
351                 av_push(seenentry, namesv);
352                 (void)SvREFCNT_inc(val);
353                 av_push(seenentry, val);
354                 (void)hv_store(seenhv, id, strlen(id),
355                                newRV_inc((SV*)seenentry), 0);
356                 SvREFCNT_dec(seenentry);
357             }
358         }
359
360         if (realpack && *realpack == 'R' && strEQ(realpack, "Regexp")) {
361             STRLEN rlen;
362             char *rval = SvPV(val, rlen);
363             char *slash = strchr(rval, '/');
364             sv_catpvn(retval, "qr/", 3);
365             while (slash) {
366                 sv_catpvn(retval, rval, slash-rval);
367                 sv_catpvn(retval, "\\/", 2);
368                 rlen -= slash-rval+1;
369                 rval = slash+1;
370                 slash = strchr(rval, '/');
371             }
372             sv_catpvn(retval, rval, rlen);
373             sv_catpvn(retval, "/", 1);
374             return 1;
375         }
376
377         /* If purity is not set and maxdepth is set, then check depth:
378          * if we have reached maximum depth, return the string
379          * representation of the thing we are currently examining
380          * at this depth (i.e., 'Foo=ARRAY(0xdeadbeef)').
381          */
382         if (!purity && maxdepth > 0 && *levelp >= maxdepth) {
383             STRLEN vallen;
384             char *valstr = SvPV(val,vallen);
385             sv_catpvn(retval, "'", 1);
386             sv_catpvn(retval, valstr, vallen);
387             sv_catpvn(retval, "'", 1);
388             return 1;
389         }
390
391         if (realpack) {                         /* we have a blessed ref */
392             STRLEN blesslen;
393             char *blessstr = SvPV(bless, blesslen);
394             sv_catpvn(retval, blessstr, blesslen);
395             sv_catpvn(retval, "( ", 2);
396             if (indent >= 2) {
397                 blesspad = apad;
398                 apad = newSVsv(apad);
399                 sv_x(aTHX_ apad, " ", 1, blesslen+2);
400             }
401         }
402
403         (*levelp)++;
404         ipad = sv_x(aTHX_ Nullsv, SvPVX(xpad), SvCUR(xpad), *levelp);
405
406         if (realtype <= SVt_PVBM) {                          /* scalar ref */
407             SV *namesv = newSVpvn("${", 2);
408             sv_catpvn(namesv, name, namelen);
409             sv_catpvn(namesv, "}", 1);
410             if (realpack) {                                  /* blessed */
411                 sv_catpvn(retval, "do{\\(my $o = ", 13);
412                 DD_dump(aTHX_ ival, SvPVX(namesv), SvCUR(namesv), retval, seenhv,
413                         postav, levelp, indent, pad, xpad, apad, sep, pair,
414                         freezer, toaster, purity, deepcopy, quotekeys, bless,
415                         maxdepth, sortkeys);
416                 sv_catpvn(retval, ")}", 2);
417             }                                                /* plain */
418             else {
419                 sv_catpvn(retval, "\\", 1);
420                 DD_dump(aTHX_ ival, SvPVX(namesv), SvCUR(namesv), retval, seenhv,
421                         postav, levelp, indent, pad, xpad, apad, sep, pair,
422                         freezer, toaster, purity, deepcopy, quotekeys, bless,
423                         maxdepth, sortkeys);
424             }
425             SvREFCNT_dec(namesv);
426         }
427         else if (realtype == SVt_PVGV) {                     /* glob ref */
428             SV *namesv = newSVpvn("*{", 2);
429             sv_catpvn(namesv, name, namelen);
430             sv_catpvn(namesv, "}", 1);
431             sv_catpvn(retval, "\\", 1);
432             DD_dump(aTHX_ ival, SvPVX(namesv), SvCUR(namesv), retval, seenhv,
433                     postav, levelp,     indent, pad, xpad, apad, sep, pair,
434                     freezer, toaster, purity, deepcopy, quotekeys, bless,
435                     maxdepth, sortkeys);
436             SvREFCNT_dec(namesv);
437         }
438         else if (realtype == SVt_PVAV) {
439             SV *totpad;
440             I32 ix = 0;
441             I32 ixmax = av_len((AV *)ival);
442         
443             SV *ixsv = newSViv(0);
444             /* allowing for a 24 char wide array index */
445             New(0, iname, namelen+28, char);
446             (void)strcpy(iname, name);
447             inamelen = namelen;
448             if (name[0] == '@') {
449                 sv_catpvn(retval, "(", 1);
450                 iname[0] = '$';
451             }
452             else {
453                 sv_catpvn(retval, "[", 1);
454                 /* omit "->" in $foo{bar}->[0], but not in ${$foo}->[0] */
455                 /*if (namelen > 0
456                     && name[namelen-1] != ']' && name[namelen-1] != '}'
457                     && (namelen < 4 || (name[1] != '{' && name[2] != '{')))*/
458                 if ((namelen > 0
459                      && name[namelen-1] != ']' && name[namelen-1] != '}')
460                     || (namelen > 4
461                         && (name[1] == '{'
462                             || (name[0] == '\\' && name[2] == '{'))))
463                 {
464                     iname[inamelen++] = '-'; iname[inamelen++] = '>';
465                     iname[inamelen] = '\0';
466                 }
467             }
468             if (iname[0] == '*' && iname[inamelen-1] == '}' && inamelen >= 8 &&
469                 (instr(iname+inamelen-8, "{SCALAR}") ||
470                  instr(iname+inamelen-7, "{ARRAY}") ||
471                  instr(iname+inamelen-6, "{HASH}"))) {
472                 iname[inamelen++] = '-'; iname[inamelen++] = '>';
473             }
474             iname[inamelen++] = '['; iname[inamelen] = '\0';
475             totpad = newSVsv(sep);
476             sv_catsv(totpad, pad);
477             sv_catsv(totpad, apad);
478
479             for (ix = 0; ix <= ixmax; ++ix) {
480                 STRLEN ilen;
481                 SV *elem;
482                 svp = av_fetch((AV*)ival, ix, FALSE);
483                 if (svp)
484                     elem = *svp;
485                 else
486                     elem = &PL_sv_undef;
487                 
488                 ilen = inamelen;
489                 sv_setiv(ixsv, ix);
490                 (void) sprintf(iname+ilen, "%"IVdf, (IV)ix);
491                 ilen = strlen(iname);
492                 iname[ilen++] = ']'; iname[ilen] = '\0';
493                 if (indent >= 3) {
494                     sv_catsv(retval, totpad);
495                     sv_catsv(retval, ipad);
496                     sv_catpvn(retval, "#", 1);
497                     sv_catsv(retval, ixsv);
498                 }
499                 sv_catsv(retval, totpad);
500                 sv_catsv(retval, ipad);
501                 DD_dump(aTHX_ elem, iname, ilen, retval, seenhv, postav,
502                         levelp, indent, pad, xpad, apad, sep, pair,
503                         freezer, toaster, purity, deepcopy, quotekeys, bless,
504                         maxdepth, sortkeys);
505                 if (ix < ixmax)
506                     sv_catpvn(retval, ",", 1);
507             }
508             if (ixmax >= 0) {
509                 SV *opad = sv_x(aTHX_ Nullsv, SvPVX(xpad), SvCUR(xpad), (*levelp)-1);
510                 sv_catsv(retval, totpad);
511                 sv_catsv(retval, opad);
512                 SvREFCNT_dec(opad);
513             }
514             if (name[0] == '@')
515                 sv_catpvn(retval, ")", 1);
516             else
517                 sv_catpvn(retval, "]", 1);
518             SvREFCNT_dec(ixsv);
519             SvREFCNT_dec(totpad);
520             Safefree(iname);
521         }
522         else if (realtype == SVt_PVHV) {
523             SV *totpad, *newapad;
524             SV *iname, *sname;
525             HE *entry;
526             char *key;
527             I32 klen;
528             SV *hval;
529             AV *keys = Nullav;
530         
531             iname = newSVpvn(name, namelen);
532             if (name[0] == '%') {
533                 sv_catpvn(retval, "(", 1);
534                 (SvPVX(iname))[0] = '$';
535             }
536             else {
537                 sv_catpvn(retval, "{", 1);
538                 /* omit "->" in $foo[0]->{bar}, but not in ${$foo}->{bar} */
539                 if ((namelen > 0
540                      && name[namelen-1] != ']' && name[namelen-1] != '}')
541                     || (namelen > 4
542                         && (name[1] == '{'
543                             || (name[0] == '\\' && name[2] == '{'))))
544                 {
545                     sv_catpvn(iname, "->", 2);
546                 }
547             }
548             if (name[0] == '*' && name[namelen-1] == '}' && namelen >= 8 &&
549                 (instr(name+namelen-8, "{SCALAR}") ||
550                  instr(name+namelen-7, "{ARRAY}") ||
551                  instr(name+namelen-6, "{HASH}"))) {
552                 sv_catpvn(iname, "->", 2);
553             }
554             sv_catpvn(iname, "{", 1);
555             totpad = newSVsv(sep);
556             sv_catsv(totpad, pad);
557             sv_catsv(totpad, apad);
558         
559             /* If requested, get a sorted/filtered array of hash keys */
560             if (sortkeys) {
561                 if (sortkeys == &PL_sv_yes) {
562 #if PERL_VERSION < 8
563                     sortkeys = sv_2mortal(newSVpvn("Data::Dumper::_sortkeys", 23));
564 #else
565                     keys = newAV();
566                     (void)hv_iterinit((HV*)ival);
567                     while ((entry = hv_iternext((HV*)ival))) {
568                         sv = hv_iterkeysv(entry);
569                         SvREFCNT_inc(sv);
570                         av_push(keys, sv);
571                     }
572 # ifdef USE_LOCALE_NUMERIC
573                     sortsv(AvARRAY(keys), 
574                            av_len(keys)+1, 
575                            IN_LOCALE ? Perl_sv_cmp_locale : Perl_sv_cmp);
576 # else
577                     sortsv(AvARRAY(keys), 
578                            av_len(keys)+1, 
579                            Perl_sv_cmp);
580 # endif
581 #endif
582                 }
583                 if (sortkeys != &PL_sv_yes) {
584                     dSP; ENTER; SAVETMPS; PUSHMARK(sp);
585                     XPUSHs(sv_2mortal(newRV_inc(ival))); PUTBACK;
586                     i = perl_call_sv(sortkeys, G_SCALAR | G_EVAL);
587                     SPAGAIN;
588                     if (i) {
589                         sv = POPs;
590                         if (SvROK(sv) && (SvTYPE(SvRV(sv)) == SVt_PVAV))
591                             keys = (AV*)SvREFCNT_inc(SvRV(sv));
592                     }
593                     if (! keys)
594                         warn("Sortkeys subroutine did not return ARRAYREF\n");
595                     PUTBACK; FREETMPS; LEAVE;
596                 }
597                 if (keys)
598                     sv_2mortal((SV*)keys);
599             }
600             else
601                 (void)hv_iterinit((HV*)ival);
602
603             /* foreach (keys %hash) */
604             for (i = 0; 1; i++) {
605                 char *nkey;
606                 char *nkey_buffer = NULL;
607                 I32 nticks = 0;
608                 SV* keysv;
609                 STRLEN keylen;
610                 I32 nlen;
611                 bool do_utf8 = FALSE;
612
613                 if ((sortkeys && !(keys && (I32)i <= av_len(keys))) ||
614                     !(entry = hv_iternext((HV *)ival)))
615                     break;
616
617                 if (i)
618                     sv_catpvn(retval, ",", 1);
619
620                 if (sortkeys) {
621                     char *key;
622                     svp = av_fetch(keys, i, FALSE);
623                     keysv = svp ? *svp : sv_mortalcopy(&PL_sv_undef);
624                     key = SvPV(keysv, keylen);
625                     svp = hv_fetch((HV*)ival, key,
626                                    SvUTF8(keysv) ? -(I32)keylen : keylen, 0);
627                     hval = svp ? *svp : sv_mortalcopy(&PL_sv_undef);
628                 }
629                 else {
630                     keysv = hv_iterkeysv(entry);
631                     hval = hv_iterval((HV*)ival, entry);
632                 }
633
634                 do_utf8 = DO_UTF8(keysv);
635                 key = SvPV(keysv, keylen);
636                 klen = keylen;
637
638                 sv_catsv(retval, totpad);
639                 sv_catsv(retval, ipad);
640                 /* old logic was first to check utf8 flag, and if utf8 always
641                    call esc_q_utf8.  This caused test to break under -Mutf8,
642                    because there even strings like 'c' have utf8 flag on.
643                    Hence with quotekeys == 0 the XS code would still '' quote
644                    them based on flags, whereas the perl code would not,
645                    based on regexps.
646                    The perl code is correct.
647                    needs_quote() decides that anything that isn't a valid
648                    perl identifier needs to be quoted, hence only correctly
649                    formed strings with no characters outside [A-Za-z0-9_:]
650                    won't need quoting.  None of those characters are used in
651                    the byte encoding of utf8, so anything with utf8
652                    encoded characters in will need quoting. Hence strings
653                    with utf8 encoded characters in will end up inside do_utf8
654                    just like before, but now strings with utf8 flag set but
655                    only ascii characters will end up in the unquoted section.
656
657                    There should also be less tests for the (probably currently)
658                    more common doesn't need quoting case.
659                    The code is also smaller (22044 vs 22260) because I've been
660                    able to pull the common logic out to both sides.  */
661                 if (quotekeys || needs_quote(key)) {
662                     if (do_utf8) {
663                         STRLEN ocur = SvCUR(retval);
664                         nlen = esc_q_utf8(aTHX_ retval, key, klen);
665                         nkey = SvPVX(retval) + ocur;
666                     }
667                     else {
668                         nticks = num_q(key, klen);
669                         New(0, nkey_buffer, klen+nticks+3, char);
670                         nkey = nkey_buffer;
671                         nkey[0] = '\'';
672                         if (nticks)
673                             klen += esc_q(nkey+1, key, klen);
674                         else
675                             (void)Copy(key, nkey+1, klen, char);
676                         nkey[++klen] = '\'';
677                         nkey[++klen] = '\0';
678                         nlen = klen;
679                         sv_catpvn(retval, nkey, klen);
680                     }
681                 }
682                 else {
683                     nkey = key;
684                     nlen = klen;
685                     sv_catpvn(retval, nkey, klen);
686                 }
687                 sname = newSVsv(iname);
688                 sv_catpvn(sname, nkey, nlen);
689                 sv_catpvn(sname, "}", 1);
690
691                 sv_catsv(retval, pair);
692                 if (indent >= 2) {
693                     char *extra;
694                     I32 elen = 0;
695                     newapad = newSVsv(apad);
696                     New(0, extra, klen+4+1, char);
697                     while (elen < (klen+4))
698                         extra[elen++] = ' ';
699                     extra[elen] = '\0';
700                     sv_catpvn(newapad, extra, elen);
701                     Safefree(extra);
702                 }
703                 else
704                     newapad = apad;
705
706                 DD_dump(aTHX_ hval, SvPVX(sname), SvCUR(sname), retval, seenhv,
707                         postav, levelp, indent, pad, xpad, newapad, sep, pair,
708                         freezer, toaster, purity, deepcopy, quotekeys, bless,
709                         maxdepth, sortkeys);
710                 SvREFCNT_dec(sname);
711                 Safefree(nkey_buffer);
712                 if (indent >= 2)
713                     SvREFCNT_dec(newapad);
714             }
715             if (i) {
716                 SV *opad = sv_x(aTHX_ Nullsv, SvPVX(xpad), SvCUR(xpad), *levelp-1);
717                 sv_catsv(retval, totpad);
718                 sv_catsv(retval, opad);
719                 SvREFCNT_dec(opad);
720             }
721             if (name[0] == '%')
722                 sv_catpvn(retval, ")", 1);
723             else
724                 sv_catpvn(retval, "}", 1);
725             SvREFCNT_dec(iname);
726             SvREFCNT_dec(totpad);
727         }
728         else if (realtype == SVt_PVCV) {
729             sv_catpvn(retval, "sub { \"DUMMY\" }", 15);
730             if (purity)
731                 warn("Encountered CODE ref, using dummy placeholder");
732         }
733         else {
734             warn("cannot handle ref type %ld", realtype);
735         }
736
737         if (realpack) {  /* free blessed allocs */
738             if (indent >= 2) {
739                 SvREFCNT_dec(apad);
740                 apad = blesspad;
741             }
742             sv_catpvn(retval, ", '", 3);
743             sv_catpvn(retval, realpack, strlen(realpack));
744             sv_catpvn(retval, "' )", 3);
745             if (toaster && SvPOK(toaster) && SvCUR(toaster)) {
746                 sv_catpvn(retval, "->", 2);
747                 sv_catsv(retval, toaster);
748                 sv_catpvn(retval, "()", 2);
749             }
750         }
751         SvREFCNT_dec(ipad);
752         (*levelp)--;
753     }
754     else {
755         STRLEN i;
756         
757         if (namelen) {
758             (void) sprintf(id, "0x%"UVxf, PTR2UV(val));
759             if ((svp = hv_fetch(seenhv, id, (idlen = strlen(id)), FALSE)) &&
760                 (sv = *svp) && SvROK(sv) &&
761                 (seenentry = (AV*)SvRV(sv)))
762             {
763                 SV *othername;
764                 if ((svp = av_fetch(seenentry, 0, FALSE)) && (othername = *svp)
765                     && (svp = av_fetch(seenentry, 2, FALSE)) && *svp && SvIV(*svp) > 0)
766                 {
767                     sv_catpvn(retval, "${", 2);
768                     sv_catsv(retval, othername);
769                     sv_catpvn(retval, "}", 1);
770                     return 1;
771                 }
772             }
773             else if (val != &PL_sv_undef) {
774                 SV *namesv;
775                 namesv = newSVpvn("\\", 1);
776                 sv_catpvn(namesv, name, namelen);
777                 seenentry = newAV();
778                 av_push(seenentry, namesv);
779                 av_push(seenentry, newRV_inc(val));
780                 (void)hv_store(seenhv, id, strlen(id), newRV_inc((SV*)seenentry), 0);
781                 SvREFCNT_dec(seenentry);
782             }
783         }
784
785         if (DD_is_integer(val)) {
786             STRLEN len;
787             if (SvIsUV(val))
788               (void) sprintf(tmpbuf, "%"UVuf, SvUV(val));
789             else
790               (void) sprintf(tmpbuf, "%"IVdf, SvIV(val));
791             len = strlen(tmpbuf);
792             if (SvPOK(val)) {
793               /* Need to check to see if this is a string such as " 0".
794                  I'm assuming from sprintf isn't going to clash with utf8.
795                  Is this valid on EBCDIC?  */
796               STRLEN pvlen;
797               const char *pv = SvPV(val, pvlen);
798               if (pvlen != len || memNE(pv, tmpbuf, len))
799                 goto integer_came_from_string;
800             }
801             if (len > 10) {
802               /* Looks like we're on a 64 bit system.  Make it a string so that
803                  if a 32 bit system reads the number it will cope better.  */
804               sv_catpvf(retval, "'%s'", tmpbuf);
805             } else
806               sv_catpvn(retval, tmpbuf, len);
807         }
808         else if (realtype == SVt_PVGV) {/* GLOBs can end up with scribbly names */
809             c = SvPV(val, i);
810             ++c; --i;                   /* just get the name */
811             if (i >= 6 && strncmp(c, "main::", 6) == 0) {
812                 c += 4;
813                 i -= 4;
814             }
815             if (needs_quote(c)) {
816                 sv_grow(retval, SvCUR(retval)+6+2*i);
817                 r = SvPVX(retval)+SvCUR(retval);
818                 r[0] = '*'; r[1] = '{'; r[2] = '\'';
819                 i += esc_q(r+3, c, i);
820                 i += 3;
821                 r[i++] = '\''; r[i++] = '}';
822                 r[i] = '\0';
823             }
824             else {
825                 sv_grow(retval, SvCUR(retval)+i+2);
826                 r = SvPVX(retval)+SvCUR(retval);
827                 r[0] = '*'; strcpy(r+1, c);
828                 i++;
829             }
830             SvCUR_set(retval, SvCUR(retval)+i);
831
832             if (purity) {
833                 static char *entries[] = { "{SCALAR}", "{ARRAY}", "{HASH}" };
834                 static STRLEN sizes[] = { 8, 7, 6 };
835                 SV *e;
836                 SV *nname = newSVpvn("", 0);
837                 SV *newapad = newSVpvn("", 0);
838                 GV *gv = (GV*)val;
839                 I32 j;
840                 
841                 for (j=0; j<3; j++) {
842                     e = ((j == 0) ? GvSV(gv) : (j == 1) ? (SV*)GvAV(gv) : (SV*)GvHV(gv));
843                     if (!e)
844                         continue;
845                     if (j == 0 && !SvOK(e))
846                         continue;
847
848                     {
849                         I32 nlevel = 0;
850                         SV *postentry = newSVpvn(r,i);
851                         
852                         sv_setsv(nname, postentry);
853                         sv_catpvn(nname, entries[j], sizes[j]);
854                         sv_catpvn(postentry, " = ", 3);
855                         av_push(postav, postentry);
856                         e = newRV_inc(e);
857                         
858                         SvCUR_set(newapad, 0);
859                         if (indent >= 2)
860                             (void)sv_x(aTHX_ newapad, " ", 1, SvCUR(postentry));
861                         
862                         DD_dump(aTHX_ e, SvPVX(nname), SvCUR(nname), postentry,
863                                 seenhv, postav, &nlevel, indent, pad, xpad,
864                                 newapad, sep, pair, freezer, toaster, purity,
865                                 deepcopy, quotekeys, bless, maxdepth, 
866                                 sortkeys);
867                         SvREFCNT_dec(e);
868                     }
869                 }
870                 
871                 SvREFCNT_dec(newapad);
872                 SvREFCNT_dec(nname);
873             }
874         }
875         else if (val == &PL_sv_undef || !SvOK(val)) {
876             sv_catpvn(retval, "undef", 5);
877         }
878         else {
879         integer_came_from_string:
880             c = SvPV(val, i);
881             if (DO_UTF8(val))
882                 i += esc_q_utf8(aTHX_ retval, c, i);
883             else {
884                 sv_grow(retval, SvCUR(retval)+3+2*i); /* 3: ""\0 */
885                 r = SvPVX(retval) + SvCUR(retval);
886                 r[0] = '\'';
887                 i += esc_q(r+1, c, i);
888                 ++i;
889                 r[i++] = '\'';
890                 r[i] = '\0';
891                 SvCUR_set(retval, SvCUR(retval)+i);
892             }
893         }
894     }
895
896     if (idlen) {
897         if (deepcopy)
898             (void)hv_delete(seenhv, id, idlen, G_DISCARD);
899         else if (namelen && seenentry) {
900             SV *mark = *av_fetch(seenentry, 2, TRUE);
901             sv_setiv(mark,1);
902         }
903     }
904     return 1;
905 }
906
907
908 MODULE = Data::Dumper           PACKAGE = Data::Dumper         PREFIX = Data_Dumper_
909
910 #
911 # This is the exact equivalent of Dump.  Well, almost. The things that are
912 # different as of now (due to Laziness):
913 #   * doesnt do double-quotes yet.
914 #
915
916 void
917 Data_Dumper_Dumpxs(href, ...)
918         SV      *href;
919         PROTOTYPE: $;$$
920         PPCODE:
921         {
922             HV *hv;
923             SV *retval, *valstr;
924             HV *seenhv = Nullhv;
925             AV *postav, *todumpav, *namesav;
926             I32 level = 0;
927             I32 indent, terse, i, imax, postlen;
928             SV **svp;
929             SV *val, *name, *pad, *xpad, *apad, *sep, *pair, *varname;
930             SV *freezer, *toaster, *bless, *sortkeys;
931             I32 purity, deepcopy, quotekeys, maxdepth = 0;
932             char tmpbuf[1024];
933             I32 gimme = GIMME;
934
935             if (!SvROK(href)) {         /* call new to get an object first */
936                 if (items < 2)
937                     croak("Usage: Data::Dumper::Dumpxs(PACKAGE, VAL_ARY_REF, [NAME_ARY_REF])");
938                 
939                 ENTER;
940                 SAVETMPS;
941                 
942                 PUSHMARK(sp);
943                 XPUSHs(href);
944                 XPUSHs(sv_2mortal(newSVsv(ST(1))));
945                 if (items >= 3)
946                     XPUSHs(sv_2mortal(newSVsv(ST(2))));
947                 PUTBACK;
948                 i = perl_call_method("new", G_SCALAR);
949                 SPAGAIN;
950                 if (i)
951                     href = newSVsv(POPs);
952
953                 PUTBACK;
954                 FREETMPS;
955                 LEAVE;
956                 if (i)
957                     (void)sv_2mortal(href);
958             }
959
960             todumpav = namesav = Nullav;
961             seenhv = Nullhv;
962             val = pad = xpad = apad = sep = pair = varname
963                 = freezer = toaster = bless = &PL_sv_undef;
964             name = sv_newmortal();
965             indent = 2;
966             terse = purity = deepcopy = 0;
967             quotekeys = 1;
968         
969             retval = newSVpvn("", 0);
970             if (SvROK(href)
971                 && (hv = (HV*)SvRV((SV*)href))
972                 && SvTYPE(hv) == SVt_PVHV)              {
973
974                 if ((svp = hv_fetch(hv, "seen", 4, FALSE)) && SvROK(*svp))
975                     seenhv = (HV*)SvRV(*svp);
976                 if ((svp = hv_fetch(hv, "todump", 6, FALSE)) && SvROK(*svp))
977                     todumpav = (AV*)SvRV(*svp);
978                 if ((svp = hv_fetch(hv, "names", 5, FALSE)) && SvROK(*svp))
979                     namesav = (AV*)SvRV(*svp);
980                 if ((svp = hv_fetch(hv, "indent", 6, FALSE)))
981                     indent = SvIV(*svp);
982                 if ((svp = hv_fetch(hv, "purity", 6, FALSE)))
983                     purity = SvIV(*svp);
984                 if ((svp = hv_fetch(hv, "terse", 5, FALSE)))
985                     terse = SvTRUE(*svp);
986 #if 0 /* useqq currently unused */
987                 if ((svp = hv_fetch(hv, "useqq", 5, FALSE)))
988                     useqq = SvTRUE(*svp);
989 #endif
990                 if ((svp = hv_fetch(hv, "pad", 3, FALSE)))
991                     pad = *svp;
992                 if ((svp = hv_fetch(hv, "xpad", 4, FALSE)))
993                     xpad = *svp;
994                 if ((svp = hv_fetch(hv, "apad", 4, FALSE)))
995                     apad = *svp;
996                 if ((svp = hv_fetch(hv, "sep", 3, FALSE)))
997                     sep = *svp;
998                 if ((svp = hv_fetch(hv, "pair", 4, FALSE)))
999                     pair = *svp;
1000                 if ((svp = hv_fetch(hv, "varname", 7, FALSE)))
1001                     varname = *svp;
1002                 if ((svp = hv_fetch(hv, "freezer", 7, FALSE)))
1003                     freezer = *svp;
1004                 if ((svp = hv_fetch(hv, "toaster", 7, FALSE)))
1005                     toaster = *svp;
1006                 if ((svp = hv_fetch(hv, "deepcopy", 8, FALSE)))
1007                     deepcopy = SvTRUE(*svp);
1008                 if ((svp = hv_fetch(hv, "quotekeys", 9, FALSE)))
1009                     quotekeys = SvTRUE(*svp);
1010                 if ((svp = hv_fetch(hv, "bless", 5, FALSE)))
1011                     bless = *svp;
1012                 if ((svp = hv_fetch(hv, "maxdepth", 8, FALSE)))
1013                     maxdepth = SvIV(*svp);
1014                 if ((svp = hv_fetch(hv, "sortkeys", 8, FALSE))) {
1015                     sortkeys = *svp;
1016                     if (! SvTRUE(sortkeys))
1017                         sortkeys = NULL;
1018                     else if (! (SvROK(sortkeys) &&
1019                                 SvTYPE(SvRV(sortkeys)) == SVt_PVCV) )
1020                     {
1021                         /* flag to use qsortsv() for sorting hash keys */       
1022                         sortkeys = &PL_sv_yes; 
1023                     }
1024                 }
1025                 postav = newAV();
1026
1027                 if (todumpav)
1028                     imax = av_len(todumpav);
1029                 else
1030                     imax = -1;
1031                 valstr = newSVpvn("",0);
1032                 for (i = 0; i <= imax; ++i) {
1033                     SV *newapad;
1034                 
1035                     av_clear(postav);
1036                     if ((svp = av_fetch(todumpav, i, FALSE)))
1037                         val = *svp;
1038                     else
1039                         val = &PL_sv_undef;
1040                     if ((svp = av_fetch(namesav, i, TRUE)))
1041                         sv_setsv(name, *svp);
1042                     else
1043                         (void)SvOK_off(name);
1044                 
1045                     if (SvOK(name)) {
1046                         if ((SvPVX(name))[0] == '*') {
1047                             if (SvROK(val)) {
1048                                 switch (SvTYPE(SvRV(val))) {
1049                                 case SVt_PVAV:
1050                                     (SvPVX(name))[0] = '@';
1051                                     break;
1052                                 case SVt_PVHV:
1053                                     (SvPVX(name))[0] = '%';
1054                                     break;
1055                                 case SVt_PVCV:
1056                                     (SvPVX(name))[0] = '*';
1057                                     break;
1058                                 default:
1059                                     (SvPVX(name))[0] = '$';
1060                                     break;
1061                                 }
1062                             }
1063                             else
1064                                 (SvPVX(name))[0] = '$';
1065                         }
1066                         else if ((SvPVX(name))[0] != '$')
1067                             sv_insert(name, 0, 0, "$", 1);
1068                     }
1069                     else {
1070                         STRLEN nchars = 0;
1071                         sv_setpvn(name, "$", 1);
1072                         sv_catsv(name, varname);
1073                         (void) sprintf(tmpbuf, "%"IVdf, (IV)(i+1));
1074                         nchars = strlen(tmpbuf);
1075                         sv_catpvn(name, tmpbuf, nchars);
1076                     }
1077                 
1078                     if (indent >= 2) {
1079                         SV *tmpsv = sv_x(aTHX_ Nullsv, " ", 1, SvCUR(name)+3);
1080                         newapad = newSVsv(apad);
1081                         sv_catsv(newapad, tmpsv);
1082                         SvREFCNT_dec(tmpsv);
1083                     }
1084                     else
1085                         newapad = apad;
1086                 
1087                     DD_dump(aTHX_ val, SvPVX(name), SvCUR(name), valstr, seenhv,
1088                             postav, &level, indent, pad, xpad, newapad, sep, pair,
1089                             freezer, toaster, purity, deepcopy, quotekeys,
1090                             bless, maxdepth, sortkeys);
1091                 
1092                     if (indent >= 2)
1093                         SvREFCNT_dec(newapad);
1094
1095                     postlen = av_len(postav);
1096                     if (postlen >= 0 || !terse) {
1097                         sv_insert(valstr, 0, 0, " = ", 3);
1098                         sv_insert(valstr, 0, 0, SvPVX(name), SvCUR(name));
1099                         sv_catpvn(valstr, ";", 1);
1100                     }
1101                     sv_catsv(retval, pad);
1102                     sv_catsv(retval, valstr);
1103                     sv_catsv(retval, sep);
1104                     if (postlen >= 0) {
1105                         I32 i;
1106                         sv_catsv(retval, pad);
1107                         for (i = 0; i <= postlen; ++i) {
1108                             SV *elem;
1109                             svp = av_fetch(postav, i, FALSE);
1110                             if (svp && (elem = *svp)) {
1111                                 sv_catsv(retval, elem);
1112                                 if (i < postlen) {
1113                                     sv_catpvn(retval, ";", 1);
1114                                     sv_catsv(retval, sep);
1115                                     sv_catsv(retval, pad);
1116                                 }
1117                             }
1118                         }
1119                         sv_catpvn(retval, ";", 1);
1120                             sv_catsv(retval, sep);
1121                     }
1122                     sv_setpvn(valstr, "", 0);
1123                     if (gimme == G_ARRAY) {
1124                         XPUSHs(sv_2mortal(retval));
1125                         if (i < imax)   /* not the last time thro ? */
1126                             retval = newSVpvn("",0);
1127                     }
1128                 }
1129                 SvREFCNT_dec(postav);
1130                 SvREFCNT_dec(valstr);
1131             }
1132             else
1133                 croak("Call to new() method failed to return HASH ref");
1134             if (gimme == G_SCALAR)
1135                 XPUSHs(sv_2mortal(retval));
1136         }