This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Re: [perl #36310] sv_vcatpvfn() fails on some (very limited) situations
[perl5.git] / ext / Data / Dumper / Dumper.xs
... / ...
CommitLineData
1#define PERL_NO_GET_CONTEXT
2#include "EXTERN.h"
3#include "perl.h"
4#include "XSUB.h"
5
6static I32 num_q (char *s, STRLEN slen);
7static I32 esc_q (char *dest, char *src, STRLEN slen);
8static I32 esc_q_utf8 (pTHX_ SV *sv, char *src, STRLEN slen);
9static SV *sv_x (pTHX_ SV *sv, const char *str, STRLEN len, I32 n);
10static I32 DD_dump (pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval,
11 HV *seenhv, AV *postav, I32 *levelp, I32 indent,
12 SV *pad, SV *xpad, SV *apad, SV *sep, SV *pair,
13 SV *freezer, SV *toaster,
14 I32 purity, I32 deepcopy, I32 quotekeys, SV *bless,
15 I32 maxdepth, SV *sortkeys);
16
17#ifndef HvNAME_get
18#define HvNAME_get HvNAME
19#endif
20
21#if PERL_VERSION <= 6 /* Perl 5.6 and earlier */
22
23# ifdef EBCDIC
24# define UNI_TO_NATIVE(ch) (((ch) > 255) ? (ch) : ASCII_TO_NATIVE(ch))
25# else
26# define UNI_TO_NATIVE(ch) (ch)
27# endif
28
29UV
30Perl_utf8_to_uvchr(pTHX_ U8 *s, STRLEN *retlen)
31{
32 UV uv = utf8_to_uv(s, UTF8_MAXLEN, retlen,
33 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
34 return UNI_TO_NATIVE(uv);
35}
36
37# if !defined(PERL_IMPLICIT_CONTEXT)
38# define utf8_to_uvchr Perl_utf8_to_uvchr
39# else
40# define utf8_to_uvchr(a,b) Perl_utf8_to_uvchr(aTHX_ a,b)
41# endif
42
43#endif /* PERL_VERSION <= 6 */
44
45/* Changes in 5.7 series mean that now IOK is only set if scalar is
46 precisely integer but in 5.6 and earlier we need to do a more
47 complex test */
48#if PERL_VERSION <= 6
49#define DD_is_integer(sv) (SvIOK(sv) && (SvIsUV(val) ? SvUV(sv) == SvNV(sv) : SvIV(sv) == SvNV(sv)))
50#else
51#define DD_is_integer(sv) SvIOK(sv)
52#endif
53
54/* does a string need to be protected? */
55static I32
56needs_quote(register char *s)
57{
58TOP:
59 if (s[0] == ':') {
60 if (*++s) {
61 if (*s++ != ':')
62 return 1;
63 }
64 else
65 return 1;
66 }
67 if (isIDFIRST(*s)) {
68 while (*++s)
69 if (!isALNUM(*s)) {
70 if (*s == ':')
71 goto TOP;
72 else
73 return 1;
74 }
75 }
76 else
77 return 1;
78 return 0;
79}
80
81/* count the number of "'"s and "\"s in string */
82static I32
83num_q(register char *s, register STRLEN slen)
84{
85 register I32 ret = 0;
86
87 while (slen > 0) {
88 if (*s == '\'' || *s == '\\')
89 ++ret;
90 ++s;
91 --slen;
92 }
93 return ret;
94}
95
96
97/* returns number of chars added to escape "'"s and "\"s in s */
98/* slen number of characters in s will be escaped */
99/* destination must be long enough for additional chars */
100static I32
101esc_q(register char *d, register char *s, register STRLEN slen)
102{
103 register I32 ret = 0;
104
105 while (slen > 0) {
106 switch (*s) {
107 case '\'':
108 case '\\':
109 *d = '\\';
110 ++d; ++ret;
111 default:
112 *d = *s;
113 ++d; ++s; --slen;
114 break;
115 }
116 }
117 return ret;
118}
119
120static I32
121esc_q_utf8(pTHX_ SV* sv, register char *src, register STRLEN slen)
122{
123 char *s, *send, *r, *rstart;
124 STRLEN j, cur = SvCUR(sv);
125 /* Could count 128-255 and 256+ in two variables, if we want to
126 be like &qquote and make a distinction. */
127 STRLEN grow = 0; /* bytes needed to represent chars 128+ */
128 /* STRLEN topbit_grow = 0; bytes needed to represent chars 128-255 */
129 STRLEN backslashes = 0;
130 STRLEN single_quotes = 0;
131 STRLEN qq_escapables = 0; /* " $ @ will need a \ in "" strings. */
132 STRLEN normal = 0;
133
134 /* this will need EBCDICification */
135 for (s = src, send = src + slen; s < send; s += UTF8SKIP(s)) {
136 UV k = utf8_to_uvchr((U8*)s, NULL);
137
138 if (k > 127) {
139 /* 4: \x{} then count the number of hex digits. */
140 grow += 4 + (k <= 0xFF ? 2 : k <= 0xFFF ? 3 : k <= 0xFFFF ? 4 :
141#if UVSIZE == 4
142 8 /* We may allocate a bit more than the minimum here. */
143#else
144 k <= 0xFFFFFFFF ? 8 : UVSIZE * 4
145#endif
146 );
147 } else if (k == '\\') {
148 backslashes++;
149 } else if (k == '\'') {
150 single_quotes++;
151 } else if (k == '"' || k == '$' || k == '@') {
152 qq_escapables++;
153 } else {
154 normal++;
155 }
156 }
157 if (grow) {
158 /* We have something needing hex. 3 is ""\0 */
159 sv_grow(sv, cur + 3 + grow + 2*backslashes + single_quotes
160 + 2*qq_escapables + normal);
161 rstart = r = SvPVX(sv) + cur;
162
163 *r++ = '"';
164
165 for (s = src; s < send; s += UTF8SKIP(s)) {
166 UV k = utf8_to_uvchr((U8*)s, NULL);
167
168 if (k == '"' || k == '\\' || k == '$' || k == '@') {
169 *r++ = '\\';
170 *r++ = (char)k;
171 }
172 else if (k < 0x80)
173 *r++ = (char)k;
174 else {
175 /* The return value of sprintf() is unportable.
176 * In modern systems it returns (int) the number of characters,
177 * but in older systems it might return (char*) the original
178 * buffer, or it might even be (void). The easiest portable
179 * thing to do is probably use sprintf() in void context and
180 * then strlen(buffer) for the length. The more proper way
181 * would of course be to figure out the prototype of sprintf.
182 * --jhi */
183 sprintf(r, "\\x{%"UVxf"}", k);
184 r += strlen(r);
185 }
186 }
187 *r++ = '"';
188 } else {
189 /* Single quotes. */
190 sv_grow(sv, cur + 3 + 2*backslashes + 2*single_quotes
191 + qq_escapables + normal);
192 rstart = r = SvPVX(sv) + cur;
193 *r++ = '\'';
194 for (s = src; s < send; s ++) {
195 char k = *s;
196 if (k == '\'' || k == '\\')
197 *r++ = '\\';
198 *r++ = k;
199 }
200 *r++ = '\'';
201 }
202 *r = '\0';
203 j = r - rstart;
204 SvCUR_set(sv, cur + j);
205
206 return j;
207}
208
209/* append a repeated string to an SV */
210static SV *
211sv_x(pTHX_ SV *sv, const char *str, STRLEN len, I32 n)
212{
213 if (sv == Nullsv)
214 sv = newSVpvn("", 0);
215 else
216 assert(SvTYPE(sv) >= SVt_PV);
217
218 if (n > 0) {
219 SvGROW(sv, len*n + SvCUR(sv) + 1);
220 if (len == 1) {
221 char *start = SvPVX(sv) + SvCUR(sv);
222 SvCUR_set(sv, SvCUR(sv) + n);
223 start[n] = '\0';
224 while (n > 0)
225 start[--n] = str[0];
226 }
227 else
228 while (n > 0) {
229 sv_catpvn(sv, str, len);
230 --n;
231 }
232 }
233 return sv;
234}
235
236/*
237 * This ought to be split into smaller functions. (it is one long function since
238 * it exactly parallels the perl version, which was one long thing for
239 * efficiency raisins.) Ugggh!
240 */
241static I32
242DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
243 AV *postav, I32 *levelp, I32 indent, SV *pad, SV *xpad,
244 SV *apad, SV *sep, SV *pair, SV *freezer, SV *toaster, I32 purity,
245 I32 deepcopy, I32 quotekeys, SV *bless, I32 maxdepth, SV *sortkeys)
246{
247 char tmpbuf[128];
248 U32 i;
249 char *c, *r, *realpack, id[128];
250 SV **svp;
251 SV *sv, *ipad, *ival;
252 SV *blesspad = Nullsv;
253 AV *seenentry = Nullav;
254 char *iname;
255 STRLEN inamelen, idlen = 0;
256 U32 realtype;
257
258 if (!val)
259 return 0;
260
261 realtype = SvTYPE(val);
262
263 if (SvGMAGICAL(val))
264 mg_get(val);
265 if (SvROK(val)) {
266
267 /* If a freeze method is provided and the object has it, call
268 it. Warn on errors. */
269 if (SvOBJECT(SvRV(val)) && freezer &&
270 SvPOK(freezer) && SvCUR(freezer) &&
271 gv_fetchmeth(SvSTASH(SvRV(val)), SvPVX_const(freezer),
272 SvCUR(freezer), -1) != NULL)
273 {
274 dSP; ENTER; SAVETMPS; PUSHMARK(sp);
275 XPUSHs(val); PUTBACK;
276 i = perl_call_method(SvPVX_const(freezer), G_EVAL|G_VOID);
277 SPAGAIN;
278 if (SvTRUE(ERRSV))
279 warn("WARNING(Freezer method call failed): %"SVf"", ERRSV);
280 PUTBACK; FREETMPS; LEAVE;
281 }
282
283 ival = SvRV(val);
284 realtype = SvTYPE(ival);
285 (void) sprintf(id, "0x%"UVxf, PTR2UV(ival));
286 idlen = strlen(id);
287 if (SvOBJECT(ival))
288 realpack = HvNAME_get(SvSTASH(ival));
289 else
290 realpack = Nullch;
291
292 /* if it has a name, we need to either look it up, or keep a tab
293 * on it so we know when we hit it later
294 */
295 if (namelen) {
296 if ((svp = hv_fetch(seenhv, id, idlen, FALSE))
297 && (sv = *svp) && SvROK(sv) && (seenentry = (AV*)SvRV(sv)))
298 {
299 SV *othername;
300 if ((svp = av_fetch(seenentry, 0, FALSE))
301 && (othername = *svp))
302 {
303 if (purity && *levelp > 0) {
304 SV *postentry;
305
306 if (realtype == SVt_PVHV)
307 sv_catpvn(retval, "{}", 2);
308 else if (realtype == SVt_PVAV)
309 sv_catpvn(retval, "[]", 2);
310 else
311 sv_catpvn(retval, "do{my $o}", 9);
312 postentry = newSVpvn(name, namelen);
313 sv_catpvn(postentry, " = ", 3);
314 sv_catsv(postentry, othername);
315 av_push(postav, postentry);
316 }
317 else {
318 if (name[0] == '@' || name[0] == '%') {
319 if ((SvPVX_const(othername))[0] == '\\' &&
320 (SvPVX_const(othername))[1] == name[0]) {
321 sv_catpvn(retval, SvPVX_const(othername)+1,
322 SvCUR(othername)-1);
323 }
324 else {
325 sv_catpvn(retval, name, 1);
326 sv_catpvn(retval, "{", 1);
327 sv_catsv(retval, othername);
328 sv_catpvn(retval, "}", 1);
329 }
330 }
331 else
332 sv_catsv(retval, othername);
333 }
334 return 1;
335 }
336 else {
337 warn("ref name not found for %s", id);
338 return 0;
339 }
340 }
341 else { /* store our name and continue */
342 SV *namesv;
343 if (name[0] == '@' || name[0] == '%') {
344 namesv = newSVpvn("\\", 1);
345 sv_catpvn(namesv, name, namelen);
346 }
347 else if (realtype == SVt_PVCV && name[0] == '*') {
348 namesv = newSVpvn("\\", 2);
349 sv_catpvn(namesv, name, namelen);
350 (SvPVX(namesv))[1] = '&';
351 }
352 else
353 namesv = newSVpvn(name, namelen);
354 seenentry = newAV();
355 av_push(seenentry, namesv);
356 (void)SvREFCNT_inc(val);
357 av_push(seenentry, val);
358 (void)hv_store(seenhv, id, strlen(id),
359 newRV_inc((SV*)seenentry), 0);
360 SvREFCNT_dec(seenentry);
361 }
362 }
363
364 if (realpack && *realpack == 'R' && strEQ(realpack, "Regexp")) {
365 STRLEN rlen;
366 char *rval = SvPV(val, rlen);
367 char *slash = strchr(rval, '/');
368 sv_catpvn(retval, "qr/", 3);
369 while (slash) {
370 sv_catpvn(retval, rval, slash-rval);
371 sv_catpvn(retval, "\\/", 2);
372 rlen -= slash-rval+1;
373 rval = slash+1;
374 slash = strchr(rval, '/');
375 }
376 sv_catpvn(retval, rval, rlen);
377 sv_catpvn(retval, "/", 1);
378 return 1;
379 }
380
381 /* If purity is not set and maxdepth is set, then check depth:
382 * if we have reached maximum depth, return the string
383 * representation of the thing we are currently examining
384 * at this depth (i.e., 'Foo=ARRAY(0xdeadbeef)').
385 */
386 if (!purity && maxdepth > 0 && *levelp >= maxdepth) {
387 STRLEN vallen;
388 char *valstr = SvPV(val,vallen);
389 sv_catpvn(retval, "'", 1);
390 sv_catpvn(retval, valstr, vallen);
391 sv_catpvn(retval, "'", 1);
392 return 1;
393 }
394
395 if (realpack) { /* we have a blessed ref */
396 STRLEN blesslen;
397 char *blessstr = SvPV(bless, blesslen);
398 sv_catpvn(retval, blessstr, blesslen);
399 sv_catpvn(retval, "( ", 2);
400 if (indent >= 2) {
401 blesspad = apad;
402 apad = newSVsv(apad);
403 sv_x(aTHX_ apad, " ", 1, blesslen+2);
404 }
405 }
406
407 (*levelp)++;
408 ipad = sv_x(aTHX_ Nullsv, SvPVX_const(xpad), SvCUR(xpad), *levelp);
409
410 if (realtype <= SVt_PVBM) { /* scalar ref */
411 SV *namesv = newSVpvn("${", 2);
412 sv_catpvn(namesv, name, namelen);
413 sv_catpvn(namesv, "}", 1);
414 if (realpack) { /* blessed */
415 sv_catpvn(retval, "do{\\(my $o = ", 13);
416 DD_dump(aTHX_ ival, SvPVX_const(namesv), SvCUR(namesv), retval, seenhv,
417 postav, levelp, indent, pad, xpad, apad, sep, pair,
418 freezer, toaster, purity, deepcopy, quotekeys, bless,
419 maxdepth, sortkeys);
420 sv_catpvn(retval, ")}", 2);
421 } /* plain */
422 else {
423 sv_catpvn(retval, "\\", 1);
424 DD_dump(aTHX_ ival, SvPVX_const(namesv), SvCUR(namesv), retval, seenhv,
425 postav, levelp, indent, pad, xpad, apad, sep, pair,
426 freezer, toaster, purity, deepcopy, quotekeys, bless,
427 maxdepth, sortkeys);
428 }
429 SvREFCNT_dec(namesv);
430 }
431 else if (realtype == SVt_PVGV) { /* glob ref */
432 SV *namesv = newSVpvn("*{", 2);
433 sv_catpvn(namesv, name, namelen);
434 sv_catpvn(namesv, "}", 1);
435 sv_catpvn(retval, "\\", 1);
436 DD_dump(aTHX_ ival, SvPVX_const(namesv), SvCUR(namesv), retval, seenhv,
437 postav, levelp, indent, pad, xpad, apad, sep, pair,
438 freezer, toaster, purity, deepcopy, quotekeys, bless,
439 maxdepth, sortkeys);
440 SvREFCNT_dec(namesv);
441 }
442 else if (realtype == SVt_PVAV) {
443 SV *totpad;
444 I32 ix = 0;
445 I32 ixmax = av_len((AV *)ival);
446
447 SV *ixsv = newSViv(0);
448 /* allowing for a 24 char wide array index */
449 New(0, iname, namelen+28, char);
450 (void)strcpy(iname, name);
451 inamelen = namelen;
452 if (name[0] == '@') {
453 sv_catpvn(retval, "(", 1);
454 iname[0] = '$';
455 }
456 else {
457 sv_catpvn(retval, "[", 1);
458 /* omit "->" in $foo{bar}->[0], but not in ${$foo}->[0] */
459 /*if (namelen > 0
460 && name[namelen-1] != ']' && name[namelen-1] != '}'
461 && (namelen < 4 || (name[1] != '{' && name[2] != '{')))*/
462 if ((namelen > 0
463 && name[namelen-1] != ']' && name[namelen-1] != '}')
464 || (namelen > 4
465 && (name[1] == '{'
466 || (name[0] == '\\' && name[2] == '{'))))
467 {
468 iname[inamelen++] = '-'; iname[inamelen++] = '>';
469 iname[inamelen] = '\0';
470 }
471 }
472 if (iname[0] == '*' && iname[inamelen-1] == '}' && inamelen >= 8 &&
473 (instr(iname+inamelen-8, "{SCALAR}") ||
474 instr(iname+inamelen-7, "{ARRAY}") ||
475 instr(iname+inamelen-6, "{HASH}"))) {
476 iname[inamelen++] = '-'; iname[inamelen++] = '>';
477 }
478 iname[inamelen++] = '['; iname[inamelen] = '\0';
479 totpad = newSVsv(sep);
480 sv_catsv(totpad, pad);
481 sv_catsv(totpad, apad);
482
483 for (ix = 0; ix <= ixmax; ++ix) {
484 STRLEN ilen;
485 SV *elem;
486 svp = av_fetch((AV*)ival, ix, FALSE);
487 if (svp)
488 elem = *svp;
489 else
490 elem = &PL_sv_undef;
491
492 ilen = inamelen;
493 sv_setiv(ixsv, ix);
494 (void) sprintf(iname+ilen, "%"IVdf, (IV)ix);
495 ilen = strlen(iname);
496 iname[ilen++] = ']'; iname[ilen] = '\0';
497 if (indent >= 3) {
498 sv_catsv(retval, totpad);
499 sv_catsv(retval, ipad);
500 sv_catpvn(retval, "#", 1);
501 sv_catsv(retval, ixsv);
502 }
503 sv_catsv(retval, totpad);
504 sv_catsv(retval, ipad);
505 DD_dump(aTHX_ elem, iname, ilen, retval, seenhv, postav,
506 levelp, indent, pad, xpad, apad, sep, pair,
507 freezer, toaster, purity, deepcopy, quotekeys, bless,
508 maxdepth, sortkeys);
509 if (ix < ixmax)
510 sv_catpvn(retval, ",", 1);
511 }
512 if (ixmax >= 0) {
513 SV *opad = sv_x(aTHX_ Nullsv, SvPVX_const(xpad), SvCUR(xpad), (*levelp)-1);
514 sv_catsv(retval, totpad);
515 sv_catsv(retval, opad);
516 SvREFCNT_dec(opad);
517 }
518 if (name[0] == '@')
519 sv_catpvn(retval, ")", 1);
520 else
521 sv_catpvn(retval, "]", 1);
522 SvREFCNT_dec(ixsv);
523 SvREFCNT_dec(totpad);
524 Safefree(iname);
525 }
526 else if (realtype == SVt_PVHV) {
527 SV *totpad, *newapad;
528 SV *iname, *sname;
529 HE *entry;
530 char *key;
531 I32 klen;
532 SV *hval;
533 AV *keys = Nullav;
534
535 iname = newSVpvn(name, namelen);
536 if (name[0] == '%') {
537 sv_catpvn(retval, "(", 1);
538 (SvPVX(iname))[0] = '$';
539 }
540 else {
541 sv_catpvn(retval, "{", 1);
542 /* omit "->" in $foo[0]->{bar}, but not in ${$foo}->{bar} */
543 if ((namelen > 0
544 && name[namelen-1] != ']' && name[namelen-1] != '}')
545 || (namelen > 4
546 && (name[1] == '{'
547 || (name[0] == '\\' && name[2] == '{'))))
548 {
549 sv_catpvn(iname, "->", 2);
550 }
551 }
552 if (name[0] == '*' && name[namelen-1] == '}' && namelen >= 8 &&
553 (instr(name+namelen-8, "{SCALAR}") ||
554 instr(name+namelen-7, "{ARRAY}") ||
555 instr(name+namelen-6, "{HASH}"))) {
556 sv_catpvn(iname, "->", 2);
557 }
558 sv_catpvn(iname, "{", 1);
559 totpad = newSVsv(sep);
560 sv_catsv(totpad, pad);
561 sv_catsv(totpad, apad);
562
563 /* If requested, get a sorted/filtered array of hash keys */
564 if (sortkeys) {
565 if (sortkeys == &PL_sv_yes) {
566#if PERL_VERSION < 8
567 sortkeys = sv_2mortal(newSVpvn("Data::Dumper::_sortkeys", 23));
568#else
569 keys = newAV();
570 (void)hv_iterinit((HV*)ival);
571 while ((entry = hv_iternext((HV*)ival))) {
572 sv = hv_iterkeysv(entry);
573 SvREFCNT_inc(sv);
574 av_push(keys, sv);
575 }
576# ifdef USE_LOCALE_NUMERIC
577 sortsv(AvARRAY(keys),
578 av_len(keys)+1,
579 IN_LOCALE ? Perl_sv_cmp_locale : Perl_sv_cmp);
580# else
581 sortsv(AvARRAY(keys),
582 av_len(keys)+1,
583 Perl_sv_cmp);
584# endif
585#endif
586 }
587 if (sortkeys != &PL_sv_yes) {
588 dSP; ENTER; SAVETMPS; PUSHMARK(sp);
589 XPUSHs(sv_2mortal(newRV_inc(ival))); PUTBACK;
590 i = perl_call_sv(sortkeys, G_SCALAR | G_EVAL);
591 SPAGAIN;
592 if (i) {
593 sv = POPs;
594 if (SvROK(sv) && (SvTYPE(SvRV(sv)) == SVt_PVAV))
595 keys = (AV*)SvREFCNT_inc(SvRV(sv));
596 }
597 if (! keys)
598 warn("Sortkeys subroutine did not return ARRAYREF\n");
599 PUTBACK; FREETMPS; LEAVE;
600 }
601 if (keys)
602 sv_2mortal((SV*)keys);
603 }
604 else
605 (void)hv_iterinit((HV*)ival);
606
607 /* foreach (keys %hash) */
608 for (i = 0; 1; i++) {
609 char *nkey;
610 char *nkey_buffer = NULL;
611 I32 nticks = 0;
612 SV* keysv;
613 STRLEN keylen;
614 I32 nlen;
615 bool do_utf8 = FALSE;
616
617 if ((sortkeys && !(keys && (I32)i <= av_len(keys))) ||
618 !(entry = hv_iternext((HV *)ival)))
619 break;
620
621 if (i)
622 sv_catpvn(retval, ",", 1);
623
624 if (sortkeys) {
625 char *key;
626 svp = av_fetch(keys, i, FALSE);
627 keysv = svp ? *svp : sv_mortalcopy(&PL_sv_undef);
628 key = SvPV(keysv, keylen);
629 svp = hv_fetch((HV*)ival, key,
630 SvUTF8(keysv) ? -(I32)keylen : keylen, 0);
631 hval = svp ? *svp : sv_mortalcopy(&PL_sv_undef);
632 }
633 else {
634 keysv = hv_iterkeysv(entry);
635 hval = hv_iterval((HV*)ival, entry);
636 }
637
638 do_utf8 = DO_UTF8(keysv);
639 key = SvPV(keysv, keylen);
640 klen = keylen;
641
642 sv_catsv(retval, totpad);
643 sv_catsv(retval, ipad);
644 /* old logic was first to check utf8 flag, and if utf8 always
645 call esc_q_utf8. This caused test to break under -Mutf8,
646 because there even strings like 'c' have utf8 flag on.
647 Hence with quotekeys == 0 the XS code would still '' quote
648 them based on flags, whereas the perl code would not,
649 based on regexps.
650 The perl code is correct.
651 needs_quote() decides that anything that isn't a valid
652 perl identifier needs to be quoted, hence only correctly
653 formed strings with no characters outside [A-Za-z0-9_:]
654 won't need quoting. None of those characters are used in
655 the byte encoding of utf8, so anything with utf8
656 encoded characters in will need quoting. Hence strings
657 with utf8 encoded characters in will end up inside do_utf8
658 just like before, but now strings with utf8 flag set but
659 only ascii characters will end up in the unquoted section.
660
661 There should also be less tests for the (probably currently)
662 more common doesn't need quoting case.
663 The code is also smaller (22044 vs 22260) because I've been
664 able to pull the common logic out to both sides. */
665 if (quotekeys || needs_quote(key)) {
666 if (do_utf8) {
667 STRLEN ocur = SvCUR(retval);
668 nlen = esc_q_utf8(aTHX_ retval, key, klen);
669 nkey = SvPVX(retval) + ocur;
670 }
671 else {
672 nticks = num_q(key, klen);
673 New(0, nkey_buffer, klen+nticks+3, char);
674 nkey = nkey_buffer;
675 nkey[0] = '\'';
676 if (nticks)
677 klen += esc_q(nkey+1, key, klen);
678 else
679 (void)Copy(key, nkey+1, klen, char);
680 nkey[++klen] = '\'';
681 nkey[++klen] = '\0';
682 nlen = klen;
683 sv_catpvn(retval, nkey, klen);
684 }
685 }
686 else {
687 nkey = key;
688 nlen = klen;
689 sv_catpvn(retval, nkey, klen);
690 }
691 sname = newSVsv(iname);
692 sv_catpvn(sname, nkey, nlen);
693 sv_catpvn(sname, "}", 1);
694
695 sv_catsv(retval, pair);
696 if (indent >= 2) {
697 char *extra;
698 I32 elen = 0;
699 newapad = newSVsv(apad);
700 New(0, extra, klen+4+1, char);
701 while (elen < (klen+4))
702 extra[elen++] = ' ';
703 extra[elen] = '\0';
704 sv_catpvn(newapad, extra, elen);
705 Safefree(extra);
706 }
707 else
708 newapad = apad;
709
710 DD_dump(aTHX_ hval, SvPVX_const(sname), SvCUR(sname), retval, seenhv,
711 postav, levelp, indent, pad, xpad, newapad, sep, pair,
712 freezer, toaster, purity, deepcopy, quotekeys, bless,
713 maxdepth, sortkeys);
714 SvREFCNT_dec(sname);
715 Safefree(nkey_buffer);
716 if (indent >= 2)
717 SvREFCNT_dec(newapad);
718 }
719 if (i) {
720 SV *opad = sv_x(aTHX_ Nullsv, SvPVX_const(xpad), SvCUR(xpad), *levelp-1);
721 sv_catsv(retval, totpad);
722 sv_catsv(retval, opad);
723 SvREFCNT_dec(opad);
724 }
725 if (name[0] == '%')
726 sv_catpvn(retval, ")", 1);
727 else
728 sv_catpvn(retval, "}", 1);
729 SvREFCNT_dec(iname);
730 SvREFCNT_dec(totpad);
731 }
732 else if (realtype == SVt_PVCV) {
733 sv_catpvn(retval, "sub { \"DUMMY\" }", 15);
734 if (purity)
735 warn("Encountered CODE ref, using dummy placeholder");
736 }
737 else {
738 warn("cannot handle ref type %ld", realtype);
739 }
740
741 if (realpack) { /* free blessed allocs */
742 if (indent >= 2) {
743 SvREFCNT_dec(apad);
744 apad = blesspad;
745 }
746 sv_catpvn(retval, ", '", 3);
747 sv_catpvn(retval, realpack, strlen(realpack));
748 sv_catpvn(retval, "' )", 3);
749 if (toaster && SvPOK(toaster) && SvCUR(toaster)) {
750 sv_catpvn(retval, "->", 2);
751 sv_catsv(retval, toaster);
752 sv_catpvn(retval, "()", 2);
753 }
754 }
755 SvREFCNT_dec(ipad);
756 (*levelp)--;
757 }
758 else {
759 STRLEN i;
760
761 if (namelen) {
762 (void) sprintf(id, "0x%"UVxf, PTR2UV(val));
763 if ((svp = hv_fetch(seenhv, id, (idlen = strlen(id)), FALSE)) &&
764 (sv = *svp) && SvROK(sv) &&
765 (seenentry = (AV*)SvRV(sv)))
766 {
767 SV *othername;
768 if ((svp = av_fetch(seenentry, 0, FALSE)) && (othername = *svp)
769 && (svp = av_fetch(seenentry, 2, FALSE)) && *svp && SvIV(*svp) > 0)
770 {
771 sv_catpvn(retval, "${", 2);
772 sv_catsv(retval, othername);
773 sv_catpvn(retval, "}", 1);
774 return 1;
775 }
776 }
777 else if (val != &PL_sv_undef) {
778 SV *namesv;
779 namesv = newSVpvn("\\", 1);
780 sv_catpvn(namesv, name, namelen);
781 seenentry = newAV();
782 av_push(seenentry, namesv);
783 av_push(seenentry, newRV_inc(val));
784 (void)hv_store(seenhv, id, strlen(id), newRV_inc((SV*)seenentry), 0);
785 SvREFCNT_dec(seenentry);
786 }
787 }
788
789 if (DD_is_integer(val)) {
790 STRLEN len;
791 if (SvIsUV(val))
792 (void) sprintf(tmpbuf, "%"UVuf, SvUV(val));
793 else
794 (void) sprintf(tmpbuf, "%"IVdf, SvIV(val));
795 len = strlen(tmpbuf);
796 if (SvPOK(val)) {
797 /* Need to check to see if this is a string such as " 0".
798 I'm assuming from sprintf isn't going to clash with utf8.
799 Is this valid on EBCDIC? */
800 STRLEN pvlen;
801 const char *pv = SvPV(val, pvlen);
802 if (pvlen != len || memNE(pv, tmpbuf, len))
803 goto integer_came_from_string;
804 }
805 if (len > 10) {
806 /* Looks like we're on a 64 bit system. Make it a string so that
807 if a 32 bit system reads the number it will cope better. */
808 sv_catpvf(retval, "'%s'", tmpbuf);
809 } else
810 sv_catpvn(retval, tmpbuf, len);
811 }
812 else if (realtype == SVt_PVGV) {/* GLOBs can end up with scribbly names */
813 c = SvPV(val, i);
814 ++c; --i; /* just get the name */
815 if (i >= 6 && strncmp(c, "main::", 6) == 0) {
816 c += 4;
817 i -= 4;
818 }
819 if (needs_quote(c)) {
820 sv_grow(retval, SvCUR(retval)+6+2*i);
821 r = SvPVX(retval)+SvCUR(retval);
822 r[0] = '*'; r[1] = '{'; r[2] = '\'';
823 i += esc_q(r+3, c, i);
824 i += 3;
825 r[i++] = '\''; r[i++] = '}';
826 r[i] = '\0';
827 }
828 else {
829 sv_grow(retval, SvCUR(retval)+i+2);
830 r = SvPVX(retval)+SvCUR(retval);
831 r[0] = '*'; strcpy(r+1, c);
832 i++;
833 }
834 SvCUR_set(retval, SvCUR(retval)+i);
835
836 if (purity) {
837 static const char* const entries[] = { "{SCALAR}", "{ARRAY}", "{HASH}" };
838 static const STRLEN sizes[] = { 8, 7, 6 };
839 SV *e;
840 SV *nname = newSVpvn("", 0);
841 SV *newapad = newSVpvn("", 0);
842 GV *gv = (GV*)val;
843 I32 j;
844
845 for (j=0; j<3; j++) {
846 e = ((j == 0) ? GvSV(gv) : (j == 1) ? (SV*)GvAV(gv) : (SV*)GvHV(gv));
847 if (!e)
848 continue;
849 if (j == 0 && !SvOK(e))
850 continue;
851
852 {
853 I32 nlevel = 0;
854 SV *postentry = newSVpvn(r,i);
855
856 sv_setsv(nname, postentry);
857 sv_catpvn(nname, entries[j], sizes[j]);
858 sv_catpvn(postentry, " = ", 3);
859 av_push(postav, postentry);
860 e = newRV_inc(e);
861
862 SvCUR_set(newapad, 0);
863 if (indent >= 2)
864 (void)sv_x(aTHX_ newapad, " ", 1, SvCUR(postentry));
865
866 DD_dump(aTHX_ e, SvPVX_const(nname), SvCUR(nname), postentry,
867 seenhv, postav, &nlevel, indent, pad, xpad,
868 newapad, sep, pair, freezer, toaster, purity,
869 deepcopy, quotekeys, bless, maxdepth,
870 sortkeys);
871 SvREFCNT_dec(e);
872 }
873 }
874
875 SvREFCNT_dec(newapad);
876 SvREFCNT_dec(nname);
877 }
878 }
879 else if (val == &PL_sv_undef || !SvOK(val)) {
880 sv_catpvn(retval, "undef", 5);
881 }
882 else {
883 integer_came_from_string:
884 c = SvPV(val, i);
885 if (DO_UTF8(val))
886 i += esc_q_utf8(aTHX_ retval, c, i);
887 else {
888 sv_grow(retval, SvCUR(retval)+3+2*i); /* 3: ""\0 */
889 r = SvPVX(retval) + SvCUR(retval);
890 r[0] = '\'';
891 i += esc_q(r+1, c, i);
892 ++i;
893 r[i++] = '\'';
894 r[i] = '\0';
895 SvCUR_set(retval, SvCUR(retval)+i);
896 }
897 }
898 }
899
900 if (idlen) {
901 if (deepcopy)
902 (void)hv_delete(seenhv, id, idlen, G_DISCARD);
903 else if (namelen && seenentry) {
904 SV *mark = *av_fetch(seenentry, 2, TRUE);
905 sv_setiv(mark,1);
906 }
907 }
908 return 1;
909}
910
911
912MODULE = Data::Dumper PACKAGE = Data::Dumper PREFIX = Data_Dumper_
913
914#
915# This is the exact equivalent of Dump. Well, almost. The things that are
916# different as of now (due to Laziness):
917# * doesnt do double-quotes yet.
918#
919
920void
921Data_Dumper_Dumpxs(href, ...)
922 SV *href;
923 PROTOTYPE: $;$$
924 PPCODE:
925 {
926 HV *hv;
927 SV *retval, *valstr;
928 HV *seenhv = Nullhv;
929 AV *postav, *todumpav, *namesav;
930 I32 level = 0;
931 I32 indent, terse, i, imax, postlen;
932 SV **svp;
933 SV *val, *name, *pad, *xpad, *apad, *sep, *pair, *varname;
934 SV *freezer, *toaster, *bless, *sortkeys;
935 I32 purity, deepcopy, quotekeys, maxdepth = 0;
936 char tmpbuf[1024];
937 I32 gimme = GIMME;
938
939 if (!SvROK(href)) { /* call new to get an object first */
940 if (items < 2)
941 croak("Usage: Data::Dumper::Dumpxs(PACKAGE, VAL_ARY_REF, [NAME_ARY_REF])");
942
943 ENTER;
944 SAVETMPS;
945
946 PUSHMARK(sp);
947 XPUSHs(href);
948 XPUSHs(sv_2mortal(newSVsv(ST(1))));
949 if (items >= 3)
950 XPUSHs(sv_2mortal(newSVsv(ST(2))));
951 PUTBACK;
952 i = perl_call_method("new", G_SCALAR);
953 SPAGAIN;
954 if (i)
955 href = newSVsv(POPs);
956
957 PUTBACK;
958 FREETMPS;
959 LEAVE;
960 if (i)
961 (void)sv_2mortal(href);
962 }
963
964 todumpav = namesav = Nullav;
965 seenhv = Nullhv;
966 val = pad = xpad = apad = sep = pair = varname
967 = freezer = toaster = bless = &PL_sv_undef;
968 name = sv_newmortal();
969 indent = 2;
970 terse = purity = deepcopy = 0;
971 quotekeys = 1;
972
973 retval = newSVpvn("", 0);
974 if (SvROK(href)
975 && (hv = (HV*)SvRV((SV*)href))
976 && SvTYPE(hv) == SVt_PVHV) {
977
978 if ((svp = hv_fetch(hv, "seen", 4, FALSE)) && SvROK(*svp))
979 seenhv = (HV*)SvRV(*svp);
980 if ((svp = hv_fetch(hv, "todump", 6, FALSE)) && SvROK(*svp))
981 todumpav = (AV*)SvRV(*svp);
982 if ((svp = hv_fetch(hv, "names", 5, FALSE)) && SvROK(*svp))
983 namesav = (AV*)SvRV(*svp);
984 if ((svp = hv_fetch(hv, "indent", 6, FALSE)))
985 indent = SvIV(*svp);
986 if ((svp = hv_fetch(hv, "purity", 6, FALSE)))
987 purity = SvIV(*svp);
988 if ((svp = hv_fetch(hv, "terse", 5, FALSE)))
989 terse = SvTRUE(*svp);
990#if 0 /* useqq currently unused */
991 if ((svp = hv_fetch(hv, "useqq", 5, FALSE)))
992 useqq = SvTRUE(*svp);
993#endif
994 if ((svp = hv_fetch(hv, "pad", 3, FALSE)))
995 pad = *svp;
996 if ((svp = hv_fetch(hv, "xpad", 4, FALSE)))
997 xpad = *svp;
998 if ((svp = hv_fetch(hv, "apad", 4, FALSE)))
999 apad = *svp;
1000 if ((svp = hv_fetch(hv, "sep", 3, FALSE)))
1001 sep = *svp;
1002 if ((svp = hv_fetch(hv, "pair", 4, FALSE)))
1003 pair = *svp;
1004 if ((svp = hv_fetch(hv, "varname", 7, FALSE)))
1005 varname = *svp;
1006 if ((svp = hv_fetch(hv, "freezer", 7, FALSE)))
1007 freezer = *svp;
1008 if ((svp = hv_fetch(hv, "toaster", 7, FALSE)))
1009 toaster = *svp;
1010 if ((svp = hv_fetch(hv, "deepcopy", 8, FALSE)))
1011 deepcopy = SvTRUE(*svp);
1012 if ((svp = hv_fetch(hv, "quotekeys", 9, FALSE)))
1013 quotekeys = SvTRUE(*svp);
1014 if ((svp = hv_fetch(hv, "bless", 5, FALSE)))
1015 bless = *svp;
1016 if ((svp = hv_fetch(hv, "maxdepth", 8, FALSE)))
1017 maxdepth = SvIV(*svp);
1018 if ((svp = hv_fetch(hv, "sortkeys", 8, FALSE))) {
1019 sortkeys = *svp;
1020 if (! SvTRUE(sortkeys))
1021 sortkeys = NULL;
1022 else if (! (SvROK(sortkeys) &&
1023 SvTYPE(SvRV(sortkeys)) == SVt_PVCV) )
1024 {
1025 /* flag to use qsortsv() for sorting hash keys */
1026 sortkeys = &PL_sv_yes;
1027 }
1028 }
1029 postav = newAV();
1030
1031 if (todumpav)
1032 imax = av_len(todumpav);
1033 else
1034 imax = -1;
1035 valstr = newSVpvn("",0);
1036 for (i = 0; i <= imax; ++i) {
1037 SV *newapad;
1038
1039 av_clear(postav);
1040 if ((svp = av_fetch(todumpav, i, FALSE)))
1041 val = *svp;
1042 else
1043 val = &PL_sv_undef;
1044 if ((svp = av_fetch(namesav, i, TRUE)))
1045 sv_setsv(name, *svp);
1046 else
1047 (void)SvOK_off(name);
1048
1049 if (SvOK(name)) {
1050 if ((SvPVX_const(name))[0] == '*') {
1051 if (SvROK(val)) {
1052 switch (SvTYPE(SvRV(val))) {
1053 case SVt_PVAV:
1054 (SvPVX(name))[0] = '@';
1055 break;
1056 case SVt_PVHV:
1057 (SvPVX(name))[0] = '%';
1058 break;
1059 case SVt_PVCV:
1060 (SvPVX(name))[0] = '*';
1061 break;
1062 default:
1063 (SvPVX(name))[0] = '$';
1064 break;
1065 }
1066 }
1067 else
1068 (SvPVX(name))[0] = '$';
1069 }
1070 else if ((SvPVX_const(name))[0] != '$')
1071 sv_insert(name, 0, 0, "$", 1);
1072 }
1073 else {
1074 STRLEN nchars = 0;
1075 sv_setpvn(name, "$", 1);
1076 sv_catsv(name, varname);
1077 (void) sprintf(tmpbuf, "%"IVdf, (IV)(i+1));
1078 nchars = strlen(tmpbuf);
1079 sv_catpvn(name, tmpbuf, nchars);
1080 }
1081
1082 if (indent >= 2) {
1083 SV *tmpsv = sv_x(aTHX_ Nullsv, " ", 1, SvCUR(name)+3);
1084 newapad = newSVsv(apad);
1085 sv_catsv(newapad, tmpsv);
1086 SvREFCNT_dec(tmpsv);
1087 }
1088 else
1089 newapad = apad;
1090
1091 DD_dump(aTHX_ val, SvPVX_const(name), SvCUR(name), valstr, seenhv,
1092 postav, &level, indent, pad, xpad, newapad, sep, pair,
1093 freezer, toaster, purity, deepcopy, quotekeys,
1094 bless, maxdepth, sortkeys);
1095
1096 if (indent >= 2)
1097 SvREFCNT_dec(newapad);
1098
1099 postlen = av_len(postav);
1100 if (postlen >= 0 || !terse) {
1101 sv_insert(valstr, 0, 0, " = ", 3);
1102 sv_insert(valstr, 0, 0, SvPVX_const(name), SvCUR(name));
1103 sv_catpvn(valstr, ";", 1);
1104 }
1105 sv_catsv(retval, pad);
1106 sv_catsv(retval, valstr);
1107 sv_catsv(retval, sep);
1108 if (postlen >= 0) {
1109 I32 i;
1110 sv_catsv(retval, pad);
1111 for (i = 0; i <= postlen; ++i) {
1112 SV *elem;
1113 svp = av_fetch(postav, i, FALSE);
1114 if (svp && (elem = *svp)) {
1115 sv_catsv(retval, elem);
1116 if (i < postlen) {
1117 sv_catpvn(retval, ";", 1);
1118 sv_catsv(retval, sep);
1119 sv_catsv(retval, pad);
1120 }
1121 }
1122 }
1123 sv_catpvn(retval, ";", 1);
1124 sv_catsv(retval, sep);
1125 }
1126 sv_setpvn(valstr, "", 0);
1127 if (gimme == G_ARRAY) {
1128 XPUSHs(sv_2mortal(retval));
1129 if (i < imax) /* not the last time thro ? */
1130 retval = newSVpvn("",0);
1131 }
1132 }
1133 SvREFCNT_dec(postav);
1134 SvREFCNT_dec(valstr);
1135 }
1136 else
1137 croak("Call to new() method failed to return HASH ref");
1138 if (gimme == G_SCALAR)
1139 XPUSHs(sv_2mortal(retval));
1140 }