This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Data::Dumper: refactor XS implementation
[perl5.git] / dist / Data-Dumper / Dumper.xs
CommitLineData
c5be433b 1#define PERL_NO_GET_CONTEXT
823edd99
GS
2#include "EXTERN.h"
3#include "perl.h"
4#include "XSUB.h"
171b7b3b 5#ifdef USE_PPPORT_H
02c14053 6# define NEED_my_snprintf
ab4d8678 7# define NEED_sv_2pv_flags
171b7b3b
JH
8# include "ppport.h"
9#endif
823edd99 10
ecf5217a 11#if PERL_VERSION < 8
e52c0e5a
NC
12# define DD_USE_OLD_ID_FORMAT
13#endif
14
31ac59b6
KW
15/* These definitions are ASCII only. But the pure-perl .pm avoids
16 * calling this .xs file for releases where they aren't defined */
17
18#ifndef isASCII
19# define isASCII(c) (((UV) (c)) < 128)
20#endif
21
22#ifndef ESC_NATIVE /* \e */
23# define ESC_NATIVE 27
24#endif
25
26#ifndef isPRINT
27# define isPRINT(c) (((UV) (c)) >= ' ' && ((UV) (c)) < 127)
28#endif
29
30#ifndef isALPHA
31# define isALPHA(c) ( (((UV) (c)) >= 'a' && ((UV) (c)) <= 'z') \
32 || (((UV) (c)) <= 'Z' && ((UV) (c)) >= 'A'))
33#endif
34
35#ifndef isIDFIRST
36# define isIDFIRST(c) (isALPHA(c) || (c) == '_')
37#endif
38
d5e7da3f 39#ifndef isWORDCHAR
31ac59b6
KW
40# define isWORDCHAR(c) (isIDFIRST(c) \
41 || (((UV) (c)) >= '0' && ((UV) (c)) <= '9'))
d5e7da3f
KW
42#endif
43
920781c4
AC
44/* This struct contains almost all the user's desired configuration, and it
45 * is treated as constant by the recursive function. This arrangement has
46 * the advantage of needing less memory than passing all of them on the
47 * stack all the time (as was the case in an earlier implementation). But
48 * this means that, for example, "sortkeys" is a separate parameter. */
49typedef struct {
50 I32 indent;
51 SV *pad;
52 SV *xpad;
53 SV *sep;
54 SV *pair;
55 SV *freezer;
56 SV *toaster;
57 I32 purity;
58 I32 deepcopy;
59 I32 quotekeys;
60 SV *bless;
61 I32 maxdepth;
62 int use_sparse_seen_hash;
63 I32 useqq;
64 IV maxrecurse;
65} Style;
66
9061c4b9
AL
67static I32 num_q (const char *s, STRLEN slen);
68static I32 esc_q (char *dest, const char *src, STRLEN slen);
9baac1a3 69static I32 esc_q_utf8 (pTHX_ SV *sv, const char *src, STRLEN slen, I32 do_utf8, I32 useqq);
5b50ddc0
TC
70static bool globname_needs_quote(const char *s, STRLEN len);
71static bool key_needs_quote(const char *s, STRLEN len);
72static bool safe_decimal_number(const char *p, STRLEN len);
aa07b2f6
SP
73static SV *sv_x (pTHX_ SV *sv, const char *str, STRLEN len, I32 n);
74static I32 DD_dump (pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval,
920781c4
AC
75 HV *seenhv, AV *postav, I32 *levelp, SV *apad, SV *sortkeys,
76 const Style *style);
823edd99 77
bfcb3514
NC
78#ifndef HvNAME_get
79#define HvNAME_get HvNAME
80#endif
81
54161612
KW
82/* Perls 7 through portions of 15 used utf8_to_uvchr() which didn't have a
83 * length parameter. This wrongly allowed reading beyond the end of buffer
84 * given malformed input */
85
fec5e1eb
IM
86#if PERL_VERSION <= 6 /* Perl 5.6 and earlier */
87
fec5e1eb 88UV
4b88fb76 89Perl_utf8_to_uvchr_buf(pTHX_ U8 *s, U8 *send, STRLEN *retlen)
fec5e1eb 90{
4b88fb76 91 const UV uv = utf8_to_uv(s, send - s, retlen,
fec5e1eb
IM
92 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
93 return UNI_TO_NATIVE(uv);
94}
95
96# if !defined(PERL_IMPLICIT_CONTEXT)
4b88fb76 97# define utf8_to_uvchr_buf Perl_utf8_to_uvchr_buf
fec5e1eb 98# else
54161612 99# define utf8_to_uvchr_buf(a,b,c) Perl_utf8_to_uvchr_buf(aTHX_ a,b,c)
fec5e1eb
IM
100# endif
101
102#endif /* PERL_VERSION <= 6 */
103
54161612
KW
104/* Perl 5.7 through part of 5.15 */
105#if PERL_VERSION > 6 && PERL_VERSION <= 15 && ! defined(utf8_to_uvchr_buf)
106
107UV
108Perl_utf8_to_uvchr_buf(pTHX_ U8 *s, U8 *send, STRLEN *retlen)
109{
110 /* We have to discard <send> for these versions; hence can read off the
111 * end of the buffer if there is a malformation that indicates the
112 * character is longer than the space available */
113
31ac59b6 114 return utf8_to_uvchr(s, retlen);
54161612
KW
115}
116
117# if !defined(PERL_IMPLICIT_CONTEXT)
118# define utf8_to_uvchr_buf Perl_utf8_to_uvchr_buf
119# else
120# define utf8_to_uvchr_buf(a,b,c) Perl_utf8_to_uvchr_buf(aTHX_ a,b,c)
121# endif
122
123#endif /* PERL_VERSION > 6 && <= 15 */
124
fec5e1eb
IM
125/* Changes in 5.7 series mean that now IOK is only set if scalar is
126 precisely integer but in 5.6 and earlier we need to do a more
127 complex test */
128#if PERL_VERSION <= 6
129#define DD_is_integer(sv) (SvIOK(sv) && (SvIsUV(val) ? SvUV(sv) == SvNV(sv) : SvIV(sv) == SvNV(sv)))
130#else
131#define DD_is_integer(sv) SvIOK(sv)
132#endif
133
5b50ddc0
TC
134/* does a glob name need to be protected? */
135static bool
136globname_needs_quote(const char *s, STRLEN len)
823edd99 137{
95869c09 138 const char *send = s+len;
823edd99
GS
139TOP:
140 if (s[0] == ':') {
95869c09 141 if (++s<send) {
823edd99 142 if (*s++ != ':')
5b50ddc0 143 return TRUE;
823edd99
GS
144 }
145 else
5b50ddc0 146 return TRUE;
823edd99
GS
147 }
148 if (isIDFIRST(*s)) {
95869c09 149 while (++s<send)
0eb30aeb 150 if (!isWORDCHAR(*s)) {
823edd99
GS
151 if (*s == ':')
152 goto TOP;
153 else
5b50ddc0 154 return TRUE;
7b0972df 155 }
823edd99 156 }
6cde4e94 157 else
5b50ddc0
TC
158 return TRUE;
159
160 return FALSE;
161}
162
163/* does a hash key need to be quoted (to the left of => ).
164 Previously this used (globname_)needs_quote() which accepted strings
165 like '::foo', but these aren't safe as unquoted keys under strict.
166*/
167static bool
168key_needs_quote(const char *s, STRLEN len) {
169 const char *send = s+len;
170
171 if (safe_decimal_number(s, len)) {
172 return FALSE;
173 }
174 else if (isIDFIRST(*s)) {
175 while (++s<send)
176 if (!isWORDCHAR(*s))
177 return TRUE;
178 }
179 else
180 return TRUE;
181
182 return FALSE;
823edd99
GS
183}
184
059639d5
TC
185/* Check that the SV can be represented as a simple decimal integer.
186 *
187 * The perl code does this by matching against /^(?:0|-?[1-9]\d{0,8})\z/
188*/
189static bool
5b50ddc0 190safe_decimal_number(const char *p, STRLEN len) {
059639d5
TC
191 if (len == 1 && *p == '0')
192 return TRUE;
193
194 if (len && *p == '-') {
195 ++p;
196 --len;
197 }
198
199 if (len == 0 || *p < '1' || *p > '9')
200 return FALSE;
201
202 ++p;
203 --len;
204
205 if (len > 8)
206 return FALSE;
207
208 while (len > 0) {
209 /* the perl code checks /\d/ but we don't want unicode digits here */
210 if (*p < '0' || *p > '9')
211 return FALSE;
212 ++p;
213 --len;
214 }
215 return TRUE;
216}
217
823edd99
GS
218/* count the number of "'"s and "\"s in string */
219static I32
5aaab254 220num_q(const char *s, STRLEN slen)
823edd99 221{
eb578fdb 222 I32 ret = 0;
6c1ab3c2
SR
223
224 while (slen > 0) {
823edd99
GS
225 if (*s == '\'' || *s == '\\')
226 ++ret;
227 ++s;
6c1ab3c2 228 --slen;
823edd99
GS
229 }
230 return ret;
231}
232
233
234/* returns number of chars added to escape "'"s and "\"s in s */
235/* slen number of characters in s will be escaped */
236/* destination must be long enough for additional chars */
237static I32
5aaab254 238esc_q(char *d, const char *s, STRLEN slen)
823edd99 239{
eb578fdb 240 I32 ret = 0;
6cde4e94 241
823edd99
GS
242 while (slen > 0) {
243 switch (*s) {
244 case '\'':
245 case '\\':
246 *d = '\\';
247 ++d; ++ret;
c67159e1 248 /* FALLTHROUGH */
823edd99
GS
249 default:
250 *d = *s;
251 ++d; ++s; --slen;
252 break;
253 }
254 }
255 return ret;
256}
257
9baac1a3 258/* this function is also misused for implementing $Useqq */
dc71dc59 259static I32
9baac1a3 260esc_q_utf8(pTHX_ SV* sv, const char *src, STRLEN slen, I32 do_utf8, I32 useqq)
dc71dc59 261{
9061c4b9
AL
262 char *r, *rstart;
263 const char *s = src;
264 const char * const send = src + slen;
f052740f
NC
265 STRLEN j, cur = SvCUR(sv);
266 /* Could count 128-255 and 256+ in two variables, if we want to
267 be like &qquote and make a distinction. */
268 STRLEN grow = 0; /* bytes needed to represent chars 128+ */
269 /* STRLEN topbit_grow = 0; bytes needed to represent chars 128-255 */
270 STRLEN backslashes = 0;
271 STRLEN single_quotes = 0;
272 STRLEN qq_escapables = 0; /* " $ @ will need a \ in "" strings. */
273 STRLEN normal = 0;
7d3a730e 274 int increment;
f052740f 275
31ac59b6
KW
276 for (s = src; s < send; s += increment) { /* Sizing pass */
277 UV k = *(U8*)s;
7d3a730e 278
31ac59b6 279 increment = 1; /* Will override if necessary for utf-8 */
dbf00f69 280
31ac59b6
KW
281 if (isPRINT(k)) {
282 if (k == '\\') {
283 backslashes++;
284 } else if (k == '\'') {
285 single_quotes++;
286 } else if (k == '"' || k == '$' || k == '@') {
287 qq_escapables++;
288 } else {
289 normal++;
290 }
291 }
292 else if (! isASCII(k) && k > ' ') {
293 /* High ordinal non-printable code point. (The test that k is
294 * above SPACE should be optimized out by the compiler on
295 * non-EBCDIC platforms; otherwise we could put an #ifdef around
296 * it, but it's better to have just a single code path when
297 * possible. All but one of the non-ASCII EBCDIC controls are low
298 * ordinal; that one is the only one above SPACE.)
299 *
300 * If UTF-8, output as hex, regardless of useqq. This means there
301 * is an overhead of 4 chars '\x{}'. Then count the number of hex
302 * digits. */
303 if (do_utf8) {
304 k = utf8_to_uvchr_buf((U8*)s, (U8*) send, NULL);
305
306 /* treat invalid utf8 byte by byte. This loop iteration gets the
307 * first byte */
308 increment = (k == 0 && *s != '\0') ? 1 : UTF8SKIP(s);
309
310 grow += 4 + (k <= 0xFF ? 2 : k <= 0xFFF ? 3 : k <= 0xFFFF ? 4 :
f052740f 311#if UVSIZE == 4
31ac59b6 312 8 /* We may allocate a bit more than the minimum here. */
f052740f 313#else
31ac59b6 314 k <= 0xFFFFFFFF ? 8 : UVSIZE * 4
9baac1a3 315#endif
31ac59b6
KW
316 );
317 }
318 else if (useqq) { /* Not utf8, must be <= 0xFF, hence 2 hex
319 * digits. */
320 grow += 4 + 2;
321 }
322 else { /* Non-qq generates 3 octal digits plus backslash */
323 grow += 4;
324 }
325 } /* End of high-ordinal non-printable */
326 else if (! useqq) { /* Low ordinal, non-printable, non-qq just
327 * outputs the raw char */
f052740f
NC
328 normal++;
329 }
31ac59b6
KW
330 else { /* Is qq, low ordinal, non-printable. Output escape
331 * sequences */
332 if ( k == '\a' || k == '\b' || k == '\t' || k == '\n' || k == '\r'
333 || k == '\f' || k == ESC_NATIVE)
334 {
335 grow += 2; /* 1 char plus backslash */
336 }
337 else /* The other low ordinals are output as an octal escape
338 * sequence */
339 if (s + 1 >= send || ( *(U8*)(s+1) >= '0'
340 && *(U8*)(s+1) <= '9'))
341 {
342 /* When the following character is a digit, use 3 octal digits
343 * plus backslash, as using fewer digits would concatenate the
344 * following char into this one */
345 grow += 4;
346 }
347 else if (k <= 7) {
348 grow += 2; /* 1 octal digit, plus backslash */
349 }
350 else if (k <= 077) {
351 grow += 3; /* 2 octal digits plus backslash */
352 }
353 else {
354 grow += 4; /* 3 octal digits plus backslash */
355 }
356 }
357 } /* End of size-calculating loop */
358
9baac1a3 359 if (grow || useqq) {
f052740f 360 /* We have something needing hex. 3 is ""\0 */
34231210
NC
361 sv_grow(sv, cur + 3 + grow + 2*backslashes + single_quotes
362 + 2*qq_escapables + normal);
f052740f
NC
363 rstart = r = SvPVX(sv) + cur;
364
365 *r++ = '"';
366
31ac59b6
KW
367 for (s = src; s < send; s += increment) {
368 UV k;
369
370 if (do_utf8
371 && ! isASCII(*s)
372 /* Exclude non-ASCII low ordinal controls. This should be
373 * optimized out by the compiler on ASCII platforms; if not
374 * could wrap it in a #ifdef EBCDIC, but better to avoid
375 * #if's if possible */
376 && *(U8*)s > ' '
377 ) {
378
379 /* When in UTF-8, we output all non-ascii chars as \x{}
380 * reqardless of useqq, except for the low ordinal controls on
381 * EBCDIC platforms */
382 k = utf8_to_uvchr_buf((U8*)s, (U8*) send, NULL);
383
384 /* treat invalid utf8 byte by byte. This loop iteration gets the
385 * first byte */
386 increment = (k == 0 && *s != '\0') ? 1 : UTF8SKIP(s);
387
388#if PERL_VERSION < 10
389 sprintf(r, "\\x{%"UVxf"}", k);
390 r += strlen(r);
391 /* my_sprintf is not supported by ppport.h */
392#else
393 r = r + my_sprintf(r, "\\x{%"UVxf"}", k);
394#endif
395 continue;
396 }
397
398 /* Here 1) isn't UTF-8; or
399 * 2) the current character is ASCII; or
400 * 3) it is an EBCDIC platform and is a low ordinal
401 * non-ASCII control.
402 * In each case the character occupies just one byte */
403 k = *(U8*)s;
404 increment = 1;
405
406 if (isPRINT(k)) {
407 /* These need a backslash escape */
408 if (k == '"' || k == '\\' || k == '$' || k == '@') {
409 *r++ = '\\';
410 }
9baac1a3 411
7c436af3 412 *r++ = (char)k;
f052740f 413 }
31ac59b6
KW
414 else if (! useqq) { /* non-qq, non-printable, low-ordinal is
415 * output raw */
416 *r++ = (char)k;
417 }
418 else { /* Is qq means use escape sequences */
dbf00f69
TC
419 bool next_is_digit;
420
9baac1a3
SR
421 *r++ = '\\';
422 switch (k) {
31ac59b6
KW
423 case '\a': *r++ = 'a'; break;
424 case '\b': *r++ = 'b'; break;
425 case '\t': *r++ = 't'; break;
426 case '\n': *r++ = 'n'; break;
427 case '\f': *r++ = 'f'; break;
428 case '\r': *r++ = 'r'; break;
429 case ESC_NATIVE: *r++ = 'e'; break;
f1c45943 430 default:
dbf00f69
TC
431
432 /* only ASCII digits matter here, which are invariant,
433 * since we only encode characters \377 and under, or
434 * \x177 and under for a unicode string
435 */
31ac59b6
KW
436 next_is_digit = (s + 1 >= send )
437 ? FALSE
438 : (*(U8*)(s+1) >= '0' && *(U8*)(s+1) <= '9');
dbf00f69 439
f1c45943
TC
440 /* faster than
441 * r = r + my_sprintf(r, "%o", k);
442 */
dbf00f69 443 if (k <= 7 && !next_is_digit) {
f1c45943 444 *r++ = (char)k + '0';
dbf00f69 445 } else if (k <= 63 && !next_is_digit) {
f1c45943
TC
446 *r++ = (char)(k>>3) + '0';
447 *r++ = (char)(k&7) + '0';
448 } else {
449 *r++ = (char)(k>>6) + '0';
450 *r++ = (char)((k&63)>>3) + '0';
451 *r++ = (char)(k&7) + '0';
452 }
9baac1a3 453 }
f1c45943 454 }
f052740f
NC
455 }
456 *r++ = '"';
457 } else {
458 /* Single quotes. */
34231210
NC
459 sv_grow(sv, cur + 3 + 2*backslashes + 2*single_quotes
460 + qq_escapables + normal);
f052740f
NC
461 rstart = r = SvPVX(sv) + cur;
462 *r++ = '\'';
463 for (s = src; s < send; s ++) {
9061c4b9 464 const char k = *s;
f052740f
NC
465 if (k == '\'' || k == '\\')
466 *r++ = '\\';
467 *r++ = k;
468 }
469 *r++ = '\'';
dc71dc59 470 }
f052740f
NC
471 *r = '\0';
472 j = r - rstart;
473 SvCUR_set(sv, cur + j);
dc71dc59
JH
474
475 return j;
476}
477
823edd99
GS
478/* append a repeated string to an SV */
479static SV *
aa07b2f6 480sv_x(pTHX_ SV *sv, const char *str, STRLEN len, I32 n)
823edd99 481{
9061c4b9 482 if (!sv)
c2b90b61 483 sv = newSVpvs("");
65e66c80 484#ifdef DEBUGGING
823edd99
GS
485 else
486 assert(SvTYPE(sv) >= SVt_PV);
65e66c80 487#endif
823edd99
GS
488
489 if (n > 0) {
490 SvGROW(sv, len*n + SvCUR(sv) + 1);
491 if (len == 1) {
9061c4b9 492 char * const start = SvPVX(sv) + SvCUR(sv);
b162af07 493 SvCUR_set(sv, SvCUR(sv) + n);
823edd99
GS
494 start[n] = '\0';
495 while (n > 0)
496 start[--n] = str[0];
497 }
498 else
499 while (n > 0) {
500 sv_catpvn(sv, str, len);
501 --n;
502 }
503 }
504 return sv;
505}
506
507/*
508 * This ought to be split into smaller functions. (it is one long function since
509 * it exactly parallels the perl version, which was one long thing for
510 * efficiency raisins.) Ugggh!
511 */
512static I32
aa07b2f6 513DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
920781c4 514 AV *postav, I32 *levelp, SV *apad, SV *sortkeys, const Style *style)
823edd99
GS
515{
516 char tmpbuf[128];
c70927a6 517 Size_t i;
e52c0e5a
NC
518 char *c, *r, *realpack;
519#ifdef DD_USE_OLD_ID_FORMAT
520 char id[128];
521#else
522 UV id_buffer;
523 char *const id = (char *)&id_buffer;
524#endif
823edd99 525 SV **svp;
7820172a 526 SV *sv, *ipad, *ival;
823edd99 527 SV *blesspad = Nullsv;
7d49f689 528 AV *seenentry = NULL;
823edd99
GS
529 char *iname;
530 STRLEN inamelen, idlen = 0;
823edd99 531 U32 realtype;
4ab99479
YO
532 bool no_bless = 0; /* when a qr// is blessed into Regexp we dont want to bless it.
533 in later perls we should actually check the classname of the
534 engine. this gets tricky as it involves lexical issues that arent so
535 easy to resolve */
536 bool is_regex = 0; /* we are dumping a regex, we need to know this before we bless */
823edd99
GS
537
538 if (!val)
539 return 0;
540
a9ccbcd7 541 /* If the output buffer has less than some arbitrary amount of space
a4e0d239
NC
542 remaining, then enlarge it. For the test case (25M of output),
543 *1.1 was slower, *2.0 was the same, so the first guess of 1.5 is
544 deemed to be good enough. */
545 if (SvTYPE(retval) >= SVt_PV && (SvLEN(retval) - SvCUR(retval)) < 42) {
97c52c3c 546 sv_grow(retval, SvCUR(retval) * 3 / 2);
9fa5cce2 547 }
a4e0d239 548
823edd99 549 realtype = SvTYPE(val);
6cde4e94 550
823edd99
GS
551 if (SvGMAGICAL(val))
552 mg_get(val);
823edd99
GS
553 if (SvROK(val)) {
554
c5f7c514
ST
555 /* If a freeze method is provided and the object has it, call
556 it. Warn on errors. */
920781c4
AC
557 if (SvOBJECT(SvRV(val)) && style->freezer &&
558 SvPOK(style->freezer) && SvCUR(style->freezer) &&
559 gv_fetchmeth(SvSTASH(SvRV(val)), SvPVX_const(style->freezer),
560 SvCUR(style->freezer), -1) != NULL)
823edd99
GS
561 {
562 dSP; ENTER; SAVETMPS; PUSHMARK(sp);
563 XPUSHs(val); PUTBACK;
920781c4 564 i = perl_call_method(SvPVX_const(style->freezer), G_EVAL|G_VOID|G_DISCARD);
823edd99 565 SPAGAIN;
7820172a 566 if (SvTRUE(ERRSV))
35c1215d 567 warn("WARNING(Freezer method call failed): %"SVf"", ERRSV);
823edd99 568 PUTBACK; FREETMPS; LEAVE;
823edd99
GS
569 }
570
571 ival = SvRV(val);
823edd99 572 realtype = SvTYPE(ival);
e52c0e5a 573#ifdef DD_USE_OLD_ID_FORMAT
f5def3a2 574 idlen = my_snprintf(id, sizeof(id), "0x%"UVxf, PTR2UV(ival));
e52c0e5a
NC
575#else
576 id_buffer = PTR2UV(ival);
577 idlen = sizeof(id_buffer);
578#endif
823edd99 579 if (SvOBJECT(ival))
bfcb3514 580 realpack = HvNAME_get(SvSTASH(ival));
823edd99 581 else
9849c14c 582 realpack = NULL;
7820172a
GS
583
584 /* if it has a name, we need to either look it up, or keep a tab
585 * on it so we know when we hit it later
586 */
587 if (namelen) {
588 if ((svp = hv_fetch(seenhv, id, idlen, FALSE))
589 && (sv = *svp) && SvROK(sv) && (seenentry = (AV*)SvRV(sv)))
590 {
591 SV *othername;
592 if ((svp = av_fetch(seenentry, 0, FALSE))
593 && (othername = *svp))
594 {
920781c4 595 if (style->purity && *levelp > 0) {
7820172a
GS
596 SV *postentry;
597
598 if (realtype == SVt_PVHV)
46e2868e 599 sv_catpvs(retval, "{}");
7820172a 600 else if (realtype == SVt_PVAV)
46e2868e 601 sv_catpvs(retval, "[]");
7820172a 602 else
46e2868e 603 sv_catpvs(retval, "do{my $o}");
7820172a 604 postentry = newSVpvn(name, namelen);
46e2868e 605 sv_catpvs(postentry, " = ");
7820172a
GS
606 sv_catsv(postentry, othername);
607 av_push(postav, postentry);
608 }
609 else {
610 if (name[0] == '@' || name[0] == '%') {
aa07b2f6
SP
611 if ((SvPVX_const(othername))[0] == '\\' &&
612 (SvPVX_const(othername))[1] == name[0]) {
613 sv_catpvn(retval, SvPVX_const(othername)+1,
7820172a
GS
614 SvCUR(othername)-1);
615 }
616 else {
617 sv_catpvn(retval, name, 1);
46e2868e 618 sv_catpvs(retval, "{");
7820172a 619 sv_catsv(retval, othername);
46e2868e 620 sv_catpvs(retval, "}");
7820172a 621 }
823edd99 622 }
7820172a 623 else
823edd99 624 sv_catsv(retval, othername);
823edd99 625 }
7820172a
GS
626 return 1;
627 }
628 else {
e52c0e5a 629#ifdef DD_USE_OLD_ID_FORMAT
7820172a 630 warn("ref name not found for %s", id);
e52c0e5a
NC
631#else
632 warn("ref name not found for 0x%"UVxf, PTR2UV(ival));
633#endif
7820172a 634 return 0;
823edd99 635 }
823edd99 636 }
7820172a
GS
637 else { /* store our name and continue */
638 SV *namesv;
639 if (name[0] == '@' || name[0] == '%') {
c2b90b61 640 namesv = newSVpvs("\\");
7820172a
GS
641 sv_catpvn(namesv, name, namelen);
642 }
643 else if (realtype == SVt_PVCV && name[0] == '*') {
c2b90b61 644 namesv = newSVpvs("\\");
7820172a
GS
645 sv_catpvn(namesv, name, namelen);
646 (SvPVX(namesv))[1] = '&';
647 }
648 else
649 namesv = newSVpvn(name, namelen);
650 seenentry = newAV();
651 av_push(seenentry, namesv);
652 (void)SvREFCNT_inc(val);
653 av_push(seenentry, val);
383d9087 654 (void)hv_store(seenhv, id, idlen,
fec5e1eb 655 newRV_inc((SV*)seenentry), 0);
7820172a 656 SvREFCNT_dec(seenentry);
823edd99 657 }
823edd99 658 }
4ab99479
YO
659 /* regexps dont have to be blessed into package "Regexp"
660 * they can be blessed into any package.
661 */
662#if PERL_VERSION < 8
663 if (realpack && *realpack == 'R' && strEQ(realpack, "Regexp"))
664#elif PERL_VERSION < 11
bd2db5df 665 if (realpack && realtype == SVt_PVMG && mg_find(ival, PERL_MAGIC_qr))
4ab99479
YO
666#else
667 if (realpack && realtype == SVt_REGEXP)
668#endif
669 {
670 is_regex = 1;
671 if (strEQ(realpack, "Regexp"))
672 no_bless = 1;
673 else
674 no_bless = 0;
675 }
a2126434
JN
676
677 /* If purity is not set and maxdepth is set, then check depth:
678 * if we have reached maximum depth, return the string
679 * representation of the thing we are currently examining
6cde4e94 680 * at this depth (i.e., 'Foo=ARRAY(0xdeadbeef)').
a2126434 681 */
920781c4 682 if (!style->purity && style->maxdepth > 0 && *levelp >= style->maxdepth) {
a2126434 683 STRLEN vallen;
9061c4b9 684 const char * const valstr = SvPV(val,vallen);
46e2868e 685 sv_catpvs(retval, "'");
a2126434 686 sv_catpvn(retval, valstr, vallen);
46e2868e 687 sv_catpvs(retval, "'");
a2126434
JN
688 return 1;
689 }
690
920781c4
AC
691 if (style->maxrecurse > 0 && *levelp >= style->maxrecurse) {
692 croak("Recursion limit of %" IVdf " exceeded", style->maxrecurse);
19be3be6
TC
693 }
694
4ab99479 695 if (realpack && !no_bless) { /* we have a blessed ref */
a2126434 696 STRLEN blesslen;
920781c4 697 const char * const blessstr = SvPV(style->bless, blesslen);
a2126434 698 sv_catpvn(retval, blessstr, blesslen);
46e2868e 699 sv_catpvs(retval, "( ");
920781c4 700 if (style->indent >= 2) {
a2126434
JN
701 blesspad = apad;
702 apad = newSVsv(apad);
703 sv_x(aTHX_ apad, " ", 1, blesslen+2);
823edd99
GS
704 }
705 }
706
7894fbab 707 (*levelp)++;
920781c4 708 ipad = sv_x(aTHX_ Nullsv, SvPVX_const(style->xpad), SvCUR(style->xpad), *levelp);
7894fbab 709
4ab99479
YO
710 if (is_regex)
711 {
712 STRLEN rlen;
b183d514
TC
713 SV *sv_pattern = NULL;
714 SV *sv_flags = NULL;
715 CV *re_pattern_cv;
716 const char *rval;
717 const char *rend;
718 const char *slash;
719
720 if ((re_pattern_cv = get_cv("re::regexp_pattern", 0))) {
721 dSP;
722 I32 count;
723 ENTER;
724 SAVETMPS;
725 PUSHMARK(SP);
726 XPUSHs(val);
727 PUTBACK;
728 count = call_sv((SV*)re_pattern_cv, G_ARRAY);
729 SPAGAIN;
730 if (count >= 2) {
731 sv_flags = POPs;
732 sv_pattern = POPs;
733 SvREFCNT_inc(sv_flags);
734 SvREFCNT_inc(sv_pattern);
735 }
736 PUTBACK;
737 FREETMPS;
738 LEAVE;
739 if (sv_pattern) {
740 sv_2mortal(sv_pattern);
741 sv_2mortal(sv_flags);
742 }
743 }
744 else {
745 sv_pattern = val;
746 }
316ebaf2 747 assert(sv_pattern);
b183d514
TC
748 rval = SvPV(sv_pattern, rlen);
749 rend = rval+rlen;
750 slash = rval;
46e2868e 751 sv_catpvs(retval, "qr/");
de5ef703
FC
752 for (;slash < rend; slash++) {
753 if (*slash == '\\') { ++slash; continue; }
754 if (*slash == '/') {
4ab99479 755 sv_catpvn(retval, rval, slash-rval);
46e2868e 756 sv_catpvs(retval, "\\/");
4ab99479
YO
757 rlen -= slash-rval+1;
758 rval = slash+1;
de5ef703 759 }
4ab99479
YO
760 }
761 sv_catpvn(retval, rval, rlen);
46e2868e 762 sv_catpvs(retval, "/");
b183d514
TC
763 if (sv_flags)
764 sv_catsv(retval, sv_flags);
4ab99479
YO
765 }
766 else if (
d1dd14d1
JH
767#if PERL_VERSION < 9
768 realtype <= SVt_PVBM
769#else
770 realtype <= SVt_PVMG
771#endif
772 ) { /* scalar ref */
c2b90b61 773 SV * const namesv = newSVpvs("${");
7820172a 774 sv_catpvn(namesv, name, namelen);
46e2868e 775 sv_catpvs(namesv, "}");
6cde4e94 776 if (realpack) { /* blessed */
46e2868e 777 sv_catpvs(retval, "do{\\(my $o = ");
aa07b2f6 778 DD_dump(aTHX_ ival, SvPVX_const(namesv), SvCUR(namesv), retval, seenhv,
920781c4 779 postav, levelp, apad, sortkeys, style);
46e2868e 780 sv_catpvs(retval, ")}");
7820172a 781 } /* plain */
823edd99 782 else {
46e2868e 783 sv_catpvs(retval, "\\");
aa07b2f6 784 DD_dump(aTHX_ ival, SvPVX_const(namesv), SvCUR(namesv), retval, seenhv,
920781c4 785 postav, levelp, apad, sortkeys, style);
823edd99 786 }
7820172a
GS
787 SvREFCNT_dec(namesv);
788 }
789 else if (realtype == SVt_PVGV) { /* glob ref */
c2b90b61 790 SV * const namesv = newSVpvs("*{");
7820172a 791 sv_catpvn(namesv, name, namelen);
46e2868e
DIM
792 sv_catpvs(namesv, "}");
793 sv_catpvs(retval, "\\");
aa07b2f6 794 DD_dump(aTHX_ ival, SvPVX_const(namesv), SvCUR(namesv), retval, seenhv,
920781c4 795 postav, levelp, apad, sortkeys, style);
7820172a 796 SvREFCNT_dec(namesv);
823edd99
GS
797 }
798 else if (realtype == SVt_PVAV) {
799 SV *totpad;
c70927a6
FC
800 SSize_t ix = 0;
801 const SSize_t ixmax = av_len((AV *)ival);
6cde4e94 802
9061c4b9 803 SV * const ixsv = newSViv(0);
823edd99
GS
804 /* allowing for a 24 char wide array index */
805 New(0, iname, namelen+28, char);
806 (void)strcpy(iname, name);
807 inamelen = namelen;
808 if (name[0] == '@') {
46e2868e 809 sv_catpvs(retval, "(");
823edd99
GS
810 iname[0] = '$';
811 }
812 else {
46e2868e 813 sv_catpvs(retval, "[");
7820172a
GS
814 /* omit "->" in $foo{bar}->[0], but not in ${$foo}->[0] */
815 /*if (namelen > 0
816 && name[namelen-1] != ']' && name[namelen-1] != '}'
817 && (namelen < 4 || (name[1] != '{' && name[2] != '{')))*/
818 if ((namelen > 0
819 && name[namelen-1] != ']' && name[namelen-1] != '}')
820 || (namelen > 4
821 && (name[1] == '{'
822 || (name[0] == '\\' && name[2] == '{'))))
823 {
823edd99
GS
824 iname[inamelen++] = '-'; iname[inamelen++] = '>';
825 iname[inamelen] = '\0';
826 }
827 }
828 if (iname[0] == '*' && iname[inamelen-1] == '}' && inamelen >= 8 &&
829 (instr(iname+inamelen-8, "{SCALAR}") ||
830 instr(iname+inamelen-7, "{ARRAY}") ||
831 instr(iname+inamelen-6, "{HASH}"))) {
832 iname[inamelen++] = '-'; iname[inamelen++] = '>';
833 }
834 iname[inamelen++] = '['; iname[inamelen] = '\0';
920781c4
AC
835 totpad = newSVsv(style->sep);
836 sv_catsv(totpad, style->pad);
823edd99
GS
837 sv_catsv(totpad, apad);
838
839 for (ix = 0; ix <= ixmax; ++ix) {
840 STRLEN ilen;
841 SV *elem;
842 svp = av_fetch((AV*)ival, ix, FALSE);
843 if (svp)
844 elem = *svp;
845 else
3280af22 846 elem = &PL_sv_undef;
823edd99
GS
847
848 ilen = inamelen;
849 sv_setiv(ixsv, ix);
02c14053
AC
850#if PERL_VERSION < 10
851 (void) sprintf(iname+ilen, "%"IVdf, (IV)ix);
852 ilen = strlen(iname);
853#else
f5def3a2 854 ilen = ilen + my_sprintf(iname+ilen, "%"IVdf, (IV)ix);
02c14053 855#endif
823edd99 856 iname[ilen++] = ']'; iname[ilen] = '\0';
920781c4 857 if (style->indent >= 3) {
823edd99
GS
858 sv_catsv(retval, totpad);
859 sv_catsv(retval, ipad);
46e2868e 860 sv_catpvs(retval, "#");
823edd99
GS
861 sv_catsv(retval, ixsv);
862 }
863 sv_catsv(retval, totpad);
864 sv_catsv(retval, ipad);
cea2e8a9 865 DD_dump(aTHX_ elem, iname, ilen, retval, seenhv, postav,
920781c4 866 levelp, apad, sortkeys, style);
823edd99 867 if (ix < ixmax)
46e2868e 868 sv_catpvs(retval, ",");
823edd99
GS
869 }
870 if (ixmax >= 0) {
920781c4 871 SV * const opad = sv_x(aTHX_ Nullsv, SvPVX_const(style->xpad), SvCUR(style->xpad), (*levelp)-1);
823edd99
GS
872 sv_catsv(retval, totpad);
873 sv_catsv(retval, opad);
874 SvREFCNT_dec(opad);
875 }
876 if (name[0] == '@')
46e2868e 877 sv_catpvs(retval, ")");
823edd99 878 else
46e2868e 879 sv_catpvs(retval, "]");
823edd99
GS
880 SvREFCNT_dec(ixsv);
881 SvREFCNT_dec(totpad);
882 Safefree(iname);
883 }
884 else if (realtype == SVt_PVHV) {
885 SV *totpad, *newapad;
9061c4b9 886 SV *sname;
14975c41 887 HE *entry = NULL;
823edd99
GS
888 char *key;
889 I32 klen;
890 SV *hval;
7d49f689 891 AV *keys = NULL;
6cde4e94 892
9061c4b9 893 SV * const iname = newSVpvn(name, namelen);
823edd99 894 if (name[0] == '%') {
46e2868e 895 sv_catpvs(retval, "(");
823edd99
GS
896 (SvPVX(iname))[0] = '$';
897 }
898 else {
46e2868e 899 sv_catpvs(retval, "{");
7820172a
GS
900 /* omit "->" in $foo[0]->{bar}, but not in ${$foo}->{bar} */
901 if ((namelen > 0
902 && name[namelen-1] != ']' && name[namelen-1] != '}')
903 || (namelen > 4
904 && (name[1] == '{'
905 || (name[0] == '\\' && name[2] == '{'))))
906 {
46e2868e 907 sv_catpvs(iname, "->");
823edd99
GS
908 }
909 }
910 if (name[0] == '*' && name[namelen-1] == '}' && namelen >= 8 &&
911 (instr(name+namelen-8, "{SCALAR}") ||
912 instr(name+namelen-7, "{ARRAY}") ||
913 instr(name+namelen-6, "{HASH}"))) {
46e2868e 914 sv_catpvs(iname, "->");
823edd99 915 }
46e2868e 916 sv_catpvs(iname, "{");
920781c4
AC
917 totpad = newSVsv(style->sep);
918 sv_catsv(totpad, style->pad);
823edd99 919 sv_catsv(totpad, apad);
6cde4e94 920
e9105f86
BI
921 /* If requested, get a sorted/filtered array of hash keys */
922 if (sortkeys) {
923 if (sortkeys == &PL_sv_yes) {
fec5e1eb 924#if PERL_VERSION < 8
c2b90b61 925 sortkeys = sv_2mortal(newSVpvs("Data::Dumper::_sortkeys"));
fec5e1eb 926#else
e9105f86
BI
927 keys = newAV();
928 (void)hv_iterinit((HV*)ival);
20d72259 929 while ((entry = hv_iternext((HV*)ival))) {
e9105f86 930 sv = hv_iterkeysv(entry);
63c7889e 931 (void)SvREFCNT_inc(sv);
e9105f86
BI
932 av_push(keys, sv);
933 }
dc36ed0c 934# ifdef USE_LOCALE_COLLATE
d6ded950
KW
935# ifdef IN_LC /* Use this if available */
936 if (IN_LC(LC_COLLATE))
937# else
938 if (IN_LOCALE)
939# endif
940 {
941 sortsv(AvARRAY(keys),
942 av_len(keys)+1,
943 Perl_sv_cmp_locale);
944 }
945 else
fec5e1eb 946# endif
02a99678 947#endif
d6ded950
KW
948 {
949 sortsv(AvARRAY(keys),
950 av_len(keys)+1,
951 Perl_sv_cmp);
952 }
e9105f86 953 }
fec5e1eb 954 if (sortkeys != &PL_sv_yes) {
e9105f86
BI
955 dSP; ENTER; SAVETMPS; PUSHMARK(sp);
956 XPUSHs(sv_2mortal(newRV_inc(ival))); PUTBACK;
957 i = perl_call_sv(sortkeys, G_SCALAR | G_EVAL);
958 SPAGAIN;
959 if (i) {
960 sv = POPs;
961 if (SvROK(sv) && (SvTYPE(SvRV(sv)) == SVt_PVAV))
962 keys = (AV*)SvREFCNT_inc(SvRV(sv));
963 }
964 if (! keys)
965 warn("Sortkeys subroutine did not return ARRAYREF\n");
966 PUTBACK; FREETMPS; LEAVE;
967 }
968 if (keys)
969 sv_2mortal((SV*)keys);
970 }
971 else
972 (void)hv_iterinit((HV*)ival);
ecfc8647
AMS
973
974 /* foreach (keys %hash) */
975 for (i = 0; 1; i++) {
fdce9ba9
NC
976 char *nkey;
977 char *nkey_buffer = NULL;
823edd99 978 I32 nticks = 0;
dc71dc59
JH
979 SV* keysv;
980 STRLEN keylen;
fdce9ba9 981 I32 nlen;
dc71dc59 982 bool do_utf8 = FALSE;
ecfc8647 983
27688d77 984 if (sortkeys) {
c70927a6 985 if (!(keys && (SSize_t)i <= av_len(keys))) break;
27688d77
YST
986 } else {
987 if (!(entry = hv_iternext((HV *)ival))) break;
988 }
ecfc8647 989
823edd99 990 if (i)
46e2868e 991 sv_catpvs(retval, ",");
e9105f86
BI
992
993 if (sortkeys) {
994 char *key;
995 svp = av_fetch(keys, i, FALSE);
4bbdbd51 996 keysv = svp ? *svp : sv_newmortal();
e9105f86 997 key = SvPV(keysv, keylen);
d075f8ed 998 svp = hv_fetch((HV*)ival, key,
c33e8be1 999 SvUTF8(keysv) ? -(I32)keylen : (I32)keylen, 0);
4bbdbd51 1000 hval = svp ? *svp : sv_newmortal();
e9105f86
BI
1001 }
1002 else {
1003 keysv = hv_iterkeysv(entry);
1004 hval = hv_iterval((HV*)ival, entry);
1005 }
1006
dc71dc59 1007 key = SvPV(keysv, keylen);
8738e0c0 1008 do_utf8 = DO_UTF8(keysv);
dc71dc59
JH
1009 klen = keylen;
1010
fdce9ba9
NC
1011 sv_catsv(retval, totpad);
1012 sv_catsv(retval, ipad);
5b50ddc0
TC
1013 /* The (very)
1014 old logic was first to check utf8 flag, and if utf8 always
fdce9ba9
NC
1015 call esc_q_utf8. This caused test to break under -Mutf8,
1016 because there even strings like 'c' have utf8 flag on.
1017 Hence with quotekeys == 0 the XS code would still '' quote
1018 them based on flags, whereas the perl code would not,
1019 based on regexps.
5b50ddc0
TC
1020
1021 The old logic checked that the string was a valid
1022 perl glob name (foo::bar), which isn't safe under
1023 strict, and differs from the perl code which only
1024 accepts simple identifiers.
1025
1026 With the fix for [perl #120384] I chose to make
1027 their handling of key quoting compatible between XS
1028 and perl.
1029 */
920781c4
AC
1030 if (style->quotekeys || key_needs_quote(key,keylen)) {
1031 if (do_utf8 || style->useqq) {
fdce9ba9 1032 STRLEN ocur = SvCUR(retval);
920781c4 1033 nlen = esc_q_utf8(aTHX_ retval, key, klen, do_utf8, style->useqq);
fdce9ba9
NC
1034 nkey = SvPVX(retval) + ocur;
1035 }
1036 else {
dc71dc59 1037 nticks = num_q(key, klen);
fdce9ba9
NC
1038 New(0, nkey_buffer, klen+nticks+3, char);
1039 nkey = nkey_buffer;
dc71dc59
JH
1040 nkey[0] = '\'';
1041 if (nticks)
1042 klen += esc_q(nkey+1, key, klen);
1043 else
1044 (void)Copy(key, nkey+1, klen, char);
1045 nkey[++klen] = '\'';
1046 nkey[++klen] = '\0';
fdce9ba9
NC
1047 nlen = klen;
1048 sv_catpvn(retval, nkey, klen);
dc71dc59 1049 }
fdce9ba9
NC
1050 }
1051 else {
1052 nkey = key;
1053 nlen = klen;
1054 sv_catpvn(retval, nkey, klen);
dc71dc59 1055 }
fdce9ba9
NC
1056 sname = newSVsv(iname);
1057 sv_catpvn(sname, nkey, nlen);
46e2868e 1058 sv_catpvs(sname, "}");
fdce9ba9 1059
920781c4
AC
1060 sv_catsv(retval, style->pair);
1061 if (style->indent >= 2) {
823edd99
GS
1062 char *extra;
1063 I32 elen = 0;
1064 newapad = newSVsv(apad);
1065 New(0, extra, klen+4+1, char);
1066 while (elen < (klen+4))
1067 extra[elen++] = ' ';
1068 extra[elen] = '\0';
1069 sv_catpvn(newapad, extra, elen);
1070 Safefree(extra);
1071 }
1072 else
1073 newapad = apad;
1074
aa07b2f6 1075 DD_dump(aTHX_ hval, SvPVX_const(sname), SvCUR(sname), retval, seenhv,
920781c4 1076 postav, levelp, newapad, sortkeys, style);
823edd99 1077 SvREFCNT_dec(sname);
fdce9ba9 1078 Safefree(nkey_buffer);
920781c4 1079 if (style->indent >= 2)
823edd99
GS
1080 SvREFCNT_dec(newapad);
1081 }
1082 if (i) {
920781c4
AC
1083 SV *opad = sv_x(aTHX_ Nullsv, SvPVX_const(style->xpad),
1084 SvCUR(style->xpad), *levelp-1);
823edd99
GS
1085 sv_catsv(retval, totpad);
1086 sv_catsv(retval, opad);
1087 SvREFCNT_dec(opad);
1088 }
1089 if (name[0] == '%')
46e2868e 1090 sv_catpvs(retval, ")");
823edd99 1091 else
46e2868e 1092 sv_catpvs(retval, "}");
823edd99
GS
1093 SvREFCNT_dec(iname);
1094 SvREFCNT_dec(totpad);
1095 }
1096 else if (realtype == SVt_PVCV) {
46e2868e 1097 sv_catpvs(retval, "sub { \"DUMMY\" }");
920781c4 1098 if (style->purity)
823edd99
GS
1099 warn("Encountered CODE ref, using dummy placeholder");
1100 }
1101 else {
4f12661d 1102 warn("cannot handle ref type %d", (int)realtype);
823edd99
GS
1103 }
1104
4ab99479 1105 if (realpack && !no_bless) { /* free blessed allocs */
d0c214fd
AF
1106 I32 plen;
1107 I32 pticks;
1108
920781c4 1109 if (style->indent >= 2) {
823edd99
GS
1110 SvREFCNT_dec(apad);
1111 apad = blesspad;
1112 }
46e2868e 1113 sv_catpvs(retval, ", '");
d0c214fd
AF
1114
1115 plen = strlen(realpack);
1116 pticks = num_q(realpack, plen);
34baf60a 1117 if (pticks) { /* needs escaping */
d0c214fd
AF
1118 char *npack;
1119 char *npack_buffer = NULL;
1120
1121 New(0, npack_buffer, plen+pticks+1, char);
1122 npack = npack_buffer;
1123 plen += esc_q(npack, realpack, plen);
1124 npack[plen] = '\0';
1125
1126 sv_catpvn(retval, npack, plen);
1127 Safefree(npack_buffer);
1128 }
1129 else {
1130 sv_catpvn(retval, realpack, strlen(realpack));
1131 }
46e2868e 1132 sv_catpvs(retval, "' )");
920781c4 1133 if (style->toaster && SvPOK(style->toaster) && SvCUR(style->toaster)) {
46e2868e 1134 sv_catpvs(retval, "->");
920781c4 1135 sv_catsv(retval, style->toaster);
46e2868e 1136 sv_catpvs(retval, "()");
823edd99
GS
1137 }
1138 }
1139 SvREFCNT_dec(ipad);
1140 (*levelp)--;
1141 }
1142 else {
1143 STRLEN i;
d036e907 1144 const MAGIC *mg;
823edd99
GS
1145
1146 if (namelen) {
e52c0e5a 1147#ifdef DD_USE_OLD_ID_FORMAT
f5def3a2 1148 idlen = my_snprintf(id, sizeof(id), "0x%"UVxf, PTR2UV(val));
e52c0e5a
NC
1149#else
1150 id_buffer = PTR2UV(val);
1151 idlen = sizeof(id_buffer);
1152#endif
f5def3a2 1153 if ((svp = hv_fetch(seenhv, id, idlen, FALSE)) &&
823edd99 1154 (sv = *svp) && SvROK(sv) &&
7820172a
GS
1155 (seenentry = (AV*)SvRV(sv)))
1156 {
823edd99 1157 SV *othername;
7820172a
GS
1158 if ((svp = av_fetch(seenentry, 0, FALSE)) && (othername = *svp)
1159 && (svp = av_fetch(seenentry, 2, FALSE)) && *svp && SvIV(*svp) > 0)
1160 {
46e2868e 1161 sv_catpvs(retval, "${");
823edd99 1162 sv_catsv(retval, othername);
46e2868e 1163 sv_catpvs(retval, "}");
823edd99
GS
1164 return 1;
1165 }
1166 }
d424882c
SM
1167 /* If we're allowed to keep only a sparse "seen" hash
1168 * (IOW, the user does not expect it to contain everything
1169 * after the dump, then only store in seen hash if the SV
1170 * ref count is larger than 1. If it's 1, then we know that
bdf6ae42
SM
1171 * there is no other reference, duh. This is an optimization.
1172 * Note that we'd have to check for weak-refs, too, but this is
1173 * already the branch for non-refs only. */
920781c4 1174 else if (val != &PL_sv_undef && (!style->use_sparse_seen_hash || SvREFCNT(val) > 1)) {
c2b90b61 1175 SV * const namesv = newSVpvs("\\");
823edd99
GS
1176 sv_catpvn(namesv, name, namelen);
1177 seenentry = newAV();
1178 av_push(seenentry, namesv);
fec5e1eb 1179 av_push(seenentry, newRV_inc(val));
383d9087 1180 (void)hv_store(seenhv, id, idlen, newRV_inc((SV*)seenentry), 0);
823edd99
GS
1181 SvREFCNT_dec(seenentry);
1182 }
1183 }
7820172a 1184
fec5e1eb 1185 if (DD_is_integer(val)) {
823edd99 1186 STRLEN len;
0e8b3009 1187 if (SvIsUV(val))
f5def3a2 1188 len = my_snprintf(tmpbuf, sizeof(tmpbuf), "%"UVuf, SvUV(val));
0e8b3009 1189 else
f5def3a2 1190 len = my_snprintf(tmpbuf, sizeof(tmpbuf), "%"IVdf, SvIV(val));
c4cce848
NC
1191 if (SvPOK(val)) {
1192 /* Need to check to see if this is a string such as " 0".
31ac59b6 1193 I'm assuming from sprintf isn't going to clash with utf8. */
c4cce848 1194 STRLEN pvlen;
9061c4b9 1195 const char * const pv = SvPV(val, pvlen);
c4cce848
NC
1196 if (pvlen != len || memNE(pv, tmpbuf, len))
1197 goto integer_came_from_string;
1198 }
1199 if (len > 10) {
1200 /* Looks like we're on a 64 bit system. Make it a string so that
1201 if a 32 bit system reads the number it will cope better. */
1202 sv_catpvf(retval, "'%s'", tmpbuf);
1203 } else
1204 sv_catpvn(retval, tmpbuf, len);
823edd99
GS
1205 }
1206 else if (realtype == SVt_PVGV) {/* GLOBs can end up with scribbly names */
1207 c = SvPV(val, i);
ecf0432f 1208 if(i) ++c, --i; /* just get the name */
823edd99
GS
1209 if (i >= 6 && strncmp(c, "main::", 6) == 0) {
1210 c += 4;
ad08c923
FC
1211#if PERL_VERSION < 7
1212 if (i == 6 || (i == 7 && c[6] == '\0'))
1213#else
1214 if (i == 6)
1215#endif
1216 i = 0; else i -= 4;
823edd99 1217 }
5b50ddc0 1218 if (globname_needs_quote(c,i)) {
95869c09
FC
1219#ifdef GvNAMEUTF8
1220 if (GvNAMEUTF8(val)) {
1221 sv_grow(retval, SvCUR(retval)+2);
1222 r = SvPVX(retval)+SvCUR(retval);
1223 r[0] = '*'; r[1] = '{';
1224 SvCUR_set(retval, SvCUR(retval)+2);
920781c4 1225 esc_q_utf8(aTHX_ retval, c, i, 1, style->useqq);
95869c09
FC
1226 sv_grow(retval, SvCUR(retval)+2);
1227 r = SvPVX(retval)+SvCUR(retval);
1228 r[0] = '}'; r[1] = '\0';
1229 i = 1;
1230 }
1231 else
1232#endif
1233 {
823edd99
GS
1234 sv_grow(retval, SvCUR(retval)+6+2*i);
1235 r = SvPVX(retval)+SvCUR(retval);
1236 r[0] = '*'; r[1] = '{'; r[2] = '\'';
1237 i += esc_q(r+3, c, i);
1238 i += 3;
1239 r[i++] = '\''; r[i++] = '}';
1240 r[i] = '\0';
95869c09 1241 }
823edd99
GS
1242 }
1243 else {
1244 sv_grow(retval, SvCUR(retval)+i+2);
1245 r = SvPVX(retval)+SvCUR(retval);
1246 r[0] = '*'; strcpy(r+1, c);
1247 i++;
1248 }
7820172a 1249 SvCUR_set(retval, SvCUR(retval)+i);
823edd99 1250
920781c4 1251 if (style->purity) {
27da23d5
JH
1252 static const char* const entries[] = { "{SCALAR}", "{ARRAY}", "{HASH}" };
1253 static const STRLEN sizes[] = { 8, 7, 6 };
823edd99 1254 SV *e;
c2b90b61
DIM
1255 SV * const nname = newSVpvs("");
1256 SV * const newapad = newSVpvs("");
9061c4b9 1257 GV * const gv = (GV*)val;
823edd99
GS
1258 I32 j;
1259
1260 for (j=0; j<3; j++) {
1261 e = ((j == 0) ? GvSV(gv) : (j == 1) ? (SV*)GvAV(gv) : (SV*)GvHV(gv));
7820172a
GS
1262 if (!e)
1263 continue;
1264 if (j == 0 && !SvOK(e))
1265 continue;
1266
1267 {
823edd99 1268 I32 nlevel = 0;
7820172a 1269 SV *postentry = newSVpvn(r,i);
823edd99
GS
1270
1271 sv_setsv(nname, postentry);
1272 sv_catpvn(nname, entries[j], sizes[j]);
46e2868e 1273 sv_catpvs(postentry, " = ");
823edd99 1274 av_push(postav, postentry);
fec5e1eb 1275 e = newRV_inc(e);
823edd99 1276
b162af07 1277 SvCUR_set(newapad, 0);
920781c4 1278 if (style->indent >= 2)
cea2e8a9 1279 (void)sv_x(aTHX_ newapad, " ", 1, SvCUR(postentry));
823edd99 1280
aa07b2f6 1281 DD_dump(aTHX_ e, SvPVX_const(nname), SvCUR(nname), postentry,
920781c4 1282 seenhv, postav, &nlevel, newapad, sortkeys, style);
823edd99
GS
1283 SvREFCNT_dec(e);
1284 }
1285 }
1286
1287 SvREFCNT_dec(newapad);
1288 SvREFCNT_dec(nname);
1289 }
1290 }
7820172a 1291 else if (val == &PL_sv_undef || !SvOK(val)) {
46e2868e 1292 sv_catpvs(retval, "undef");
7820172a 1293 }
d036e907
FC
1294#ifdef SvVOK
1295 else if (SvMAGICAL(val) && (mg = mg_find(val, 'V'))) {
5fde9fa4 1296# if !defined(PL_vtbl_vstring) && PERL_VERSION < 17
d036e907
FC
1297 SV * const vecsv = sv_newmortal();
1298# if PERL_VERSION < 10
1299 scan_vstring(mg->mg_ptr, vecsv);
1300# else
1301 scan_vstring(mg->mg_ptr, mg->mg_ptr + mg->mg_len, vecsv);
1302# endif
1303 if (!sv_eq(vecsv, val)) goto integer_came_from_string;
1304# endif
1305 sv_catpvn(retval, (const char *)mg->mg_ptr, mg->mg_len);
1306 }
1307#endif
059639d5 1308
823edd99 1309 else {
c4cce848 1310 integer_came_from_string:
5b50ddc0
TC
1311 c = SvPV(val, i);
1312 /* the pure perl and XS non-qq outputs have historically been
1313 * different in this case, but for useqq, let's try to match
1314 * the pure perl code.
1315 * see [perl #74798]
1316 */
920781c4 1317 if (style->useqq && safe_decimal_number(c, i)) {
5b50ddc0
TC
1318 sv_catsv(retval, val);
1319 }
920781c4
AC
1320 else if (DO_UTF8(val) || style->useqq)
1321 i += esc_q_utf8(aTHX_ retval, c, i, DO_UTF8(val), style->useqq);
dc71dc59
JH
1322 else {
1323 sv_grow(retval, SvCUR(retval)+3+2*i); /* 3: ""\0 */
1324 r = SvPVX(retval) + SvCUR(retval);
1325 r[0] = '\'';
1326 i += esc_q(r+1, c, i);
1327 ++i;
1328 r[i++] = '\'';
1329 r[i] = '\0';
1330 SvCUR_set(retval, SvCUR(retval)+i);
1331 }
823edd99 1332 }
823edd99
GS
1333 }
1334
7820172a 1335 if (idlen) {
920781c4 1336 if (style->deepcopy)
7820172a
GS
1337 (void)hv_delete(seenhv, id, idlen, G_DISCARD);
1338 else if (namelen && seenentry) {
1339 SV *mark = *av_fetch(seenentry, 2, TRUE);
1340 sv_setiv(mark,1);
1341 }
1342 }
823edd99
GS
1343 return 1;
1344}
1345
1346
1347MODULE = Data::Dumper PACKAGE = Data::Dumper PREFIX = Data_Dumper_
1348
1349#
1350# This is the exact equivalent of Dump. Well, almost. The things that are
1351# different as of now (due to Laziness):
31ac59b6 1352# * doesn't do deparse yet.'
823edd99
GS
1353#
1354
1355void
1356Data_Dumper_Dumpxs(href, ...)
1357 SV *href;
1358 PROTOTYPE: $;$$
1359 PPCODE:
1360 {
1361 HV *hv;
1362 SV *retval, *valstr;
5c284bb0 1363 HV *seenhv = NULL;
823edd99
GS
1364 AV *postav, *todumpav, *namesav;
1365 I32 level = 0;
920781c4 1366 I32 terse = 0;
c70927a6 1367 SSize_t i, imax, postlen;
823edd99 1368 SV **svp;
920781c4
AC
1369 SV *apad = &PL_sv_undef;
1370 SV *sortkeys = &PL_sv_undef;
1371 Style style;
1372
1373 SV *name, *val = &PL_sv_undef, *varname = &PL_sv_undef;
823edd99 1374 char tmpbuf[1024];
59421c01 1375 I32 gimme = GIMME_V;
823edd99
GS
1376
1377 if (!SvROK(href)) { /* call new to get an object first */
0f1923bd
GS
1378 if (items < 2)
1379 croak("Usage: Data::Dumper::Dumpxs(PACKAGE, VAL_ARY_REF, [NAME_ARY_REF])");
823edd99
GS
1380
1381 ENTER;
1382 SAVETMPS;
1383
1384 PUSHMARK(sp);
6a876a42
SM
1385 EXTEND(SP, 3); /* 3 == max of all branches below */
1386 PUSHs(href);
1387 PUSHs(sv_2mortal(newSVsv(ST(1))));
0f1923bd 1388 if (items >= 3)
6a876a42 1389 PUSHs(sv_2mortal(newSVsv(ST(2))));
823edd99
GS
1390 PUTBACK;
1391 i = perl_call_method("new", G_SCALAR);
1392 SPAGAIN;
1393 if (i)
1394 href = newSVsv(POPs);
1395
1396 PUTBACK;
1397 FREETMPS;
1398 LEAVE;
1399 if (i)
1400 (void)sv_2mortal(href);
1401 }
1402
7d49f689 1403 todumpav = namesav = NULL;
920781c4
AC
1404 style.indent = 2;
1405 style.quotekeys = 1;
1406 style.maxrecurse = 1000;
1407 style.purity = style.deepcopy = style.useqq = style.maxdepth
1408 = style.use_sparse_seen_hash = 0;
1409 style.pad = style.xpad = style.sep = style.pair
1410 = style.freezer = style.toaster = style.bless = &PL_sv_undef;
5c284bb0 1411 seenhv = NULL;
823edd99 1412 name = sv_newmortal();
6cde4e94 1413
c2b90b61 1414 retval = newSVpvs("");
823edd99
GS
1415 if (SvROK(href)
1416 && (hv = (HV*)SvRV((SV*)href))
1417 && SvTYPE(hv) == SVt_PVHV) {
1418
1419 if ((svp = hv_fetch(hv, "seen", 4, FALSE)) && SvROK(*svp))
1420 seenhv = (HV*)SvRV(*svp);
d424882c 1421 else
920781c4 1422 style.use_sparse_seen_hash = 1;
d424882c 1423 if ((svp = hv_fetch(hv, "noseen", 6, FALSE)))
920781c4 1424 style.use_sparse_seen_hash = (SvOK(*svp) && SvIV(*svp) != 0);
823edd99
GS
1425 if ((svp = hv_fetch(hv, "todump", 6, FALSE)) && SvROK(*svp))
1426 todumpav = (AV*)SvRV(*svp);
1427 if ((svp = hv_fetch(hv, "names", 5, FALSE)) && SvROK(*svp))
1428 namesav = (AV*)SvRV(*svp);
1429 if ((svp = hv_fetch(hv, "indent", 6, FALSE)))
920781c4 1430 style.indent = SvIV(*svp);
823edd99 1431 if ((svp = hv_fetch(hv, "purity", 6, FALSE)))
920781c4 1432 style.purity = SvIV(*svp);
823edd99
GS
1433 if ((svp = hv_fetch(hv, "terse", 5, FALSE)))
1434 terse = SvTRUE(*svp);
1435 if ((svp = hv_fetch(hv, "useqq", 5, FALSE)))
920781c4 1436 style.useqq = SvTRUE(*svp);
823edd99 1437 if ((svp = hv_fetch(hv, "pad", 3, FALSE)))
920781c4 1438 style.pad = *svp;
823edd99 1439 if ((svp = hv_fetch(hv, "xpad", 4, FALSE)))
920781c4 1440 style.xpad = *svp;
823edd99
GS
1441 if ((svp = hv_fetch(hv, "apad", 4, FALSE)))
1442 apad = *svp;
1443 if ((svp = hv_fetch(hv, "sep", 3, FALSE)))
920781c4 1444 style.sep = *svp;
30b4f386 1445 if ((svp = hv_fetch(hv, "pair", 4, FALSE)))
920781c4 1446 style.pair = *svp;
823edd99
GS
1447 if ((svp = hv_fetch(hv, "varname", 7, FALSE)))
1448 varname = *svp;
1449 if ((svp = hv_fetch(hv, "freezer", 7, FALSE)))
920781c4 1450 style.freezer = *svp;
823edd99 1451 if ((svp = hv_fetch(hv, "toaster", 7, FALSE)))
920781c4 1452 style.toaster = *svp;
823edd99 1453 if ((svp = hv_fetch(hv, "deepcopy", 8, FALSE)))
920781c4 1454 style.deepcopy = SvTRUE(*svp);
823edd99 1455 if ((svp = hv_fetch(hv, "quotekeys", 9, FALSE)))
920781c4 1456 style.quotekeys = SvTRUE(*svp);
823edd99 1457 if ((svp = hv_fetch(hv, "bless", 5, FALSE)))
920781c4 1458 style.bless = *svp;
a2126434 1459 if ((svp = hv_fetch(hv, "maxdepth", 8, FALSE)))
920781c4 1460 style.maxdepth = SvIV(*svp);
19be3be6 1461 if ((svp = hv_fetch(hv, "maxrecurse", 10, FALSE)))
920781c4 1462 style.maxrecurse = SvIV(*svp);
e9105f86
BI
1463 if ((svp = hv_fetch(hv, "sortkeys", 8, FALSE))) {
1464 sortkeys = *svp;
1465 if (! SvTRUE(sortkeys))
1466 sortkeys = NULL;
1467 else if (! (SvROK(sortkeys) &&
1468 SvTYPE(SvRV(sortkeys)) == SVt_PVCV) )
1469 {
1470 /* flag to use qsortsv() for sorting hash keys */
1471 sortkeys = &PL_sv_yes;
1472 }
1473 }
823edd99
GS
1474 postav = newAV();
1475
1476 if (todumpav)
1477 imax = av_len(todumpav);
1478 else
1479 imax = -1;
c2b90b61 1480 valstr = newSVpvs("");
823edd99
GS
1481 for (i = 0; i <= imax; ++i) {
1482 SV *newapad;
6cde4e94 1483
823edd99
GS
1484 av_clear(postav);
1485 if ((svp = av_fetch(todumpav, i, FALSE)))
1486 val = *svp;
1487 else
3280af22 1488 val = &PL_sv_undef;
d20128b8 1489 if ((svp = av_fetch(namesav, i, TRUE))) {
823edd99 1490 sv_setsv(name, *svp);
d20128b8
RGS
1491 if (SvOK(*svp) && !SvPOK(*svp))
1492 (void)SvPV_nolen_const(name);
1493 }
823edd99 1494 else
8063af02 1495 (void)SvOK_off(name);
6cde4e94 1496
d20128b8 1497 if (SvPOK(name)) {
aa07b2f6 1498 if ((SvPVX_const(name))[0] == '*') {
823edd99
GS
1499 if (SvROK(val)) {
1500 switch (SvTYPE(SvRV(val))) {
1501 case SVt_PVAV:
1502 (SvPVX(name))[0] = '@';
1503 break;
1504 case SVt_PVHV:
1505 (SvPVX(name))[0] = '%';
1506 break;
1507 case SVt_PVCV:
1508 (SvPVX(name))[0] = '*';
1509 break;
1510 default:
1511 (SvPVX(name))[0] = '$';
1512 break;
1513 }
1514 }
1515 else
1516 (SvPVX(name))[0] = '$';
1517 }
aa07b2f6 1518 else if ((SvPVX_const(name))[0] != '$')
823edd99
GS
1519 sv_insert(name, 0, 0, "$", 1);
1520 }
1521 else {
9061c4b9 1522 STRLEN nchars;
823edd99
GS
1523 sv_setpvn(name, "$", 1);
1524 sv_catsv(name, varname);
f5def3a2 1525 nchars = my_snprintf(tmpbuf, sizeof(tmpbuf), "%"IVdf, (IV)(i+1));
823edd99
GS
1526 sv_catpvn(name, tmpbuf, nchars);
1527 }
6cde4e94 1528
920781c4 1529 if (style.indent >= 2 && !terse) {
9061c4b9 1530 SV * const tmpsv = sv_x(aTHX_ NULL, " ", 1, SvCUR(name)+3);
823edd99
GS
1531 newapad = newSVsv(apad);
1532 sv_catsv(newapad, tmpsv);
1533 SvREFCNT_dec(tmpsv);
1534 }
1535 else
1536 newapad = apad;
6cde4e94 1537
e3ec2293 1538 PUTBACK;
aa07b2f6 1539 DD_dump(aTHX_ val, SvPVX_const(name), SvCUR(name), valstr, seenhv,
920781c4 1540 postav, &level, newapad, sortkeys, &style);
e3ec2293 1541 SPAGAIN;
6cde4e94 1542
920781c4 1543 if (style.indent >= 2 && !terse)
823edd99
GS
1544 SvREFCNT_dec(newapad);
1545
1546 postlen = av_len(postav);
1547 if (postlen >= 0 || !terse) {
1548 sv_insert(valstr, 0, 0, " = ", 3);
aa07b2f6 1549 sv_insert(valstr, 0, 0, SvPVX_const(name), SvCUR(name));
46e2868e 1550 sv_catpvs(valstr, ";");
823edd99 1551 }
920781c4 1552 sv_catsv(retval, style.pad);
823edd99 1553 sv_catsv(retval, valstr);
920781c4 1554 sv_catsv(retval, style.sep);
823edd99 1555 if (postlen >= 0) {
c70927a6 1556 SSize_t i;
920781c4 1557 sv_catsv(retval, style.pad);
823edd99
GS
1558 for (i = 0; i <= postlen; ++i) {
1559 SV *elem;
1560 svp = av_fetch(postav, i, FALSE);
1561 if (svp && (elem = *svp)) {
1562 sv_catsv(retval, elem);
1563 if (i < postlen) {
46e2868e 1564 sv_catpvs(retval, ";");
920781c4
AC
1565 sv_catsv(retval, style.sep);
1566 sv_catsv(retval, style.pad);
823edd99
GS
1567 }
1568 }
1569 }
46e2868e 1570 sv_catpvs(retval, ";");
920781c4 1571 sv_catsv(retval, style.sep);
823edd99
GS
1572 }
1573 sv_setpvn(valstr, "", 0);
1574 if (gimme == G_ARRAY) {
1575 XPUSHs(sv_2mortal(retval));
1576 if (i < imax) /* not the last time thro ? */
c2b90b61 1577 retval = newSVpvs("");
823edd99
GS
1578 }
1579 }
1580 SvREFCNT_dec(postav);
1581 SvREFCNT_dec(valstr);
1582 }
1583 else
1584 croak("Call to new() method failed to return HASH ref");
59421c01 1585 if (gimme != G_ARRAY)
823edd99
GS
1586 XPUSHs(sv_2mortal(retval));
1587 }
d036e907
FC
1588
1589SV *
1590Data_Dumper__vstring(sv)
1591 SV *sv;
1592 PROTOTYPE: $
1593 CODE:
1594 {
1595#ifdef SvVOK
1596 const MAGIC *mg;
1597 RETVAL =
1598 SvMAGICAL(sv) && (mg = mg_find(sv, 'V'))
1599 ? newSVpvn((const char *)mg->mg_ptr, mg->mg_len)
1600 : &PL_sv_undef;
1601#else
1602 RETVAL = &PL_sv_undef;
1603#endif
1604 }
1605 OUTPUT: RETVAL