3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
18 * [p.653 of _The Lord of the Rings_, IV/iv: "Of Herbs and Stewed Rabbit"]
21 /* This file contains pp ("push/pop") functions that
22 * execute the opcodes that make up a perl program. A typical pp function
23 * expects to find its arguments on the stack, and usually pushes its
24 * results onto the stack, hence the 'pp' terminology. Each OP structure
25 * contains a pointer to the relevant pp_foo() function.
27 * This particular file just contains pp_pack() and pp_unpack(). See the
28 * other pp*.c files for the rest of the pp_ functions.
32 #define PERL_IN_PP_PACK_C
35 /* Types used by pack/unpack */
37 e_no_len, /* no length */
38 e_number, /* number, [] */
42 typedef struct tempsym {
43 const char* patptr; /* current template char */
44 const char* patend; /* one after last char */
45 const char* grpbeg; /* 1st char of ()-group */
46 const char* grpend; /* end of ()-group */
47 I32 code; /* template code (!<>) */
48 I32 length; /* length/repeat count */
49 howlen_t howlen; /* how length is given */
50 int level; /* () nesting level */
51 U32 flags; /* /=4, comma=2, pack=1 */
52 /* and group modifiers */
53 STRLEN strbeg; /* offset of group start */
54 struct tempsym *previous; /* previous group */
57 #define TEMPSYM_INIT(symptr, p, e, f) \
59 (symptr)->patptr = (p); \
60 (symptr)->patend = (e); \
61 (symptr)->grpbeg = NULL; \
62 (symptr)->grpend = NULL; \
63 (symptr)->grpend = NULL; \
65 (symptr)->length = 0; \
66 (symptr)->howlen = e_no_len; \
67 (symptr)->level = 0; \
68 (symptr)->flags = (f); \
69 (symptr)->strbeg = 0; \
70 (symptr)->previous = NULL; \
78 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
81 U8 bytes[sizeof(long double)];
88 /* Maximum number of bytes to which a byte can grow due to upgrade */
92 * Offset for integer pack/unpack.
94 * On architectures where I16 and I32 aren't really 16 and 32 bits,
95 * which for now are all Crays, pack and unpack have to play games.
99 * These values are required for portability of pack() output.
100 * If they're not right on your machine, then pack() and unpack()
101 * wouldn't work right anyway; you'll need to apply the Cray hack.
102 * (I'd like to check them with #if, but you can't use sizeof() in
103 * the preprocessor.) --???
106 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
107 defines are now in config.h. --Andy Dougherty April 1998
112 /* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
115 #if U16SIZE > SIZE16 || U32SIZE > SIZE32
116 # if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
117 # define OFF16(p) ((char*)(p))
118 # define OFF32(p) ((char*)(p))
120 # if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
121 # define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
122 # define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
124 ++++ bad cray byte order
128 # define OFF16(p) ((char *) (p))
129 # define OFF32(p) ((char *) (p))
132 #define PUSH16(utf8, cur, p, needs_swap) \
133 PUSH_BYTES(utf8, cur, OFF16(p), SIZE16, needs_swap)
134 #define PUSH32(utf8, cur, p, needs_swap) \
135 PUSH_BYTES(utf8, cur, OFF32(p), SIZE32, needs_swap)
137 #if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
138 # define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_LITTLE_ENDIAN)
139 #elif BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
140 # define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_BIG_ENDIAN)
142 # error "Unsupported byteorder"
143 /* Need to add code here to re-instate mixed endian support.
144 NEEDS_SWAP would need to hold a flag indicating which action to
145 take, and S_reverse_copy and the code in S_utf8_to_bytes would need
146 logic adding to deal with any mixed-endian transformations needed.
150 /* Only to be used inside a loop (see the break) */
151 #define SHIFT_BYTES(utf8, s, strend, buf, len, datumtype, needs_swap) \
153 if (UNLIKELY(utf8)) { \
154 if (!S_utf8_to_bytes(aTHX_ &s, strend, \
155 (char *) (buf), len, datumtype)) break; \
157 if (UNLIKELY(needs_swap)) \
158 S_reverse_copy(s, (char *) (buf), len); \
160 Copy(s, (char *) (buf), len, char); \
165 #define SHIFT16(utf8, s, strend, p, datumtype, needs_swap) \
166 SHIFT_BYTES(utf8, s, strend, OFF16(p), SIZE16, datumtype, needs_swap)
168 #define SHIFT32(utf8, s, strend, p, datumtype, needs_swap) \
169 SHIFT_BYTES(utf8, s, strend, OFF32(p), SIZE32, datumtype, needs_swap)
171 #define SHIFT_VAR(utf8, s, strend, var, datumtype, needs_swap) \
172 SHIFT_BYTES(utf8, s, strend, &(var), sizeof(var), datumtype, needs_swap)
174 #define PUSH_VAR(utf8, aptr, var, needs_swap) \
175 PUSH_BYTES(utf8, aptr, &(var), sizeof(var), needs_swap)
177 /* Avoid stack overflow due to pathological templates. 100 should be plenty. */
178 #define MAX_SUB_TEMPLATE_LEVEL 100
180 /* flags (note that type modifiers can also be used as flags!) */
181 #define FLAG_WAS_UTF8 0x40
182 #define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
183 #define FLAG_UNPACK_ONLY_ONE 0x10
184 #define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
185 #define FLAG_SLASH 0x04
186 #define FLAG_COMMA 0x02
187 #define FLAG_PACK 0x01
190 S_mul128(pTHX_ SV *sv, U8 m)
193 char *s = SvPV(sv, len);
196 PERL_ARGS_ASSERT_MUL128;
198 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
199 SV * const tmpNew = newSVpvs("0000000000");
201 sv_catsv(tmpNew, sv);
202 SvREFCNT_dec(sv); /* free old sv */
207 while (!*t) /* trailing '\0'? */
210 const U32 i = ((*t - '0') << 7) + m;
211 *(t--) = '0' + (char)(i % 10);
217 /* Explosives and implosives. */
219 #define ISUUCHAR(ch) (NATIVE_TO_LATIN1(ch) >= NATIVE_TO_LATIN1(' ') \
220 && NATIVE_TO_LATIN1(ch) < NATIVE_TO_LATIN1('a'))
223 #define TYPE_IS_SHRIEKING 0x100
224 #define TYPE_IS_BIG_ENDIAN 0x200
225 #define TYPE_IS_LITTLE_ENDIAN 0x400
226 #define TYPE_IS_PACK 0x800
227 #define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
228 #define TYPE_MODIFIERS(t) ((t) & ~0xFF)
229 #define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
231 # define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
232 # define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
234 # define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
236 #define PACK_SIZE_CANNOT_CSUM 0x80
237 #define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
238 #define PACK_SIZE_MASK 0x3F
240 #include "packsizetables.inc"
243 S_reverse_copy(const char *src, char *dest, STRLEN len)
251 utf8_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
259 val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
260 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
261 if (retlen == (STRLEN) -1)
263 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
264 (int) TYPE_NO_MODIFIERS(datumtype));
266 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
267 "Character in '%c' format wrapped in unpack",
268 (int) TYPE_NO_MODIFIERS(datumtype));
275 #define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
276 utf8_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
280 S_utf8_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
284 const char *from = *s;
286 const U32 flags = ckWARN(WARN_UTF8) ?
287 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
288 const bool needs_swap = NEEDS_SWAP(datumtype);
290 if (UNLIKELY(needs_swap))
293 for (;buf_len > 0; buf_len--) {
294 if (from >= end) return FALSE;
295 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
296 if (retlen == (STRLEN) -1) {
297 from += UTF8SKIP(from);
299 } else from += retlen;
304 if (UNLIKELY(needs_swap))
305 *(U8 *)--buf = (U8)val;
307 *(U8 *)buf++ = (U8)val;
309 /* We have enough characters for the buffer. Did we have problems ? */
312 /* Rewalk the string fragment while warning */
314 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
315 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
316 if (ptr >= end) break;
317 utf8n_to_uvchr((U8 *) ptr, end-ptr, &retlen, flags);
319 if (from > end) from = end;
322 Perl_ck_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
323 WARN_PACK : WARN_UNPACK),
324 "Character(s) in '%c' format wrapped in %s",
325 (int) TYPE_NO_MODIFIERS(datumtype),
326 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
333 S_my_bytes_to_utf8(const U8 *start, STRLEN len, char *dest, const bool needs_swap) {
334 PERL_ARGS_ASSERT_MY_BYTES_TO_UTF8;
336 if (UNLIKELY(needs_swap)) {
337 const U8 *p = start + len;
338 while (p-- > start) {
339 append_utf8_from_native_byte(*p, (U8 **) & dest);
342 const U8 * const end = start + len;
343 while (start < end) {
344 append_utf8_from_native_byte(*start, (U8 **) & dest);
351 #define PUSH_BYTES(utf8, cur, buf, len, needs_swap) \
353 if (UNLIKELY(utf8)) \
354 (cur) = my_bytes_to_utf8((U8 *) buf, len, (cur), needs_swap); \
356 if (UNLIKELY(needs_swap)) \
357 S_reverse_copy((char *)(buf), cur, len); \
359 Copy(buf, cur, len, char); \
364 #define GROWING(utf8, cat, start, cur, in_len) \
366 STRLEN glen = (in_len); \
367 if (utf8) glen *= UTF8_EXPAND; \
368 if ((cur) + glen >= (start) + SvLEN(cat)) { \
369 (start) = sv_exp_grow(cat, glen); \
370 (cur) = (start) + SvCUR(cat); \
374 #define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
376 const STRLEN glen = (in_len); \
378 if (utf8) gl *= UTF8_EXPAND; \
379 if ((cur) + gl >= (start) + SvLEN(cat)) { \
381 SvCUR_set((cat), (cur) - (start)); \
382 (start) = sv_exp_grow(cat, gl); \
383 (cur) = (start) + SvCUR(cat); \
385 PUSH_BYTES(utf8, cur, buf, glen, 0); \
388 #define PUSH_BYTE(utf8, s, byte) \
391 const U8 au8 = (byte); \
392 (s) = my_bytes_to_utf8(&au8, 1, (s), 0);\
393 } else *(U8 *)(s)++ = (byte); \
396 /* Only to be used inside a loop (see the break) */
397 #define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
400 if (str >= end) break; \
401 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
402 if (retlen == (STRLEN) -1) { \
404 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
409 static const char *_action( const tempsym_t* symptr )
411 return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
414 /* Returns the sizeof() struct described by pat */
416 S_measure_struct(pTHX_ tempsym_t* symptr)
420 PERL_ARGS_ASSERT_MEASURE_STRUCT;
422 while (next_symbol(symptr)) {
426 switch (symptr->howlen) {
428 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
432 /* e_no_len and e_number */
433 len = symptr->length;
437 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
440 /* endianness doesn't influence the size of a type */
441 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
443 Perl_croak(aTHX_ "Invalid type '%c' in %s",
444 (int)TYPE_NO_MODIFIERS(symptr->code),
446 case '.' | TYPE_IS_SHRIEKING:
447 case '@' | TYPE_IS_SHRIEKING:
451 case 'U': /* XXXX Is it correct? */
454 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
455 (int) TYPE_NO_MODIFIERS(symptr->code),
462 tempsym_t savsym = *symptr;
463 symptr->patptr = savsym.grpbeg;
464 symptr->patend = savsym.grpend;
465 /* XXXX Theoretically, we need to measure many times at
466 different positions, since the subexpression may contain
467 alignment commands, but be not of aligned length.
468 Need to detect this and croak(). */
469 size = measure_struct(symptr);
473 case 'X' | TYPE_IS_SHRIEKING:
474 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
476 if (!len) /* Avoid division by 0 */
478 len = total % len; /* Assumed: the start is aligned. */
483 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
485 case 'x' | TYPE_IS_SHRIEKING:
486 if (!len) /* Avoid division by 0 */
488 star = total % len; /* Assumed: the start is aligned. */
489 if (star) /* Other portable ways? */
513 size = sizeof(char*);
523 /* locate matching closing parenthesis or bracket
524 * returns char pointer to char after match, or NULL
527 S_group_end(pTHX_ const char *patptr, const char *patend, char ender)
529 PERL_ARGS_ASSERT_GROUP_END;
531 while (patptr < patend) {
532 const char c = *patptr++;
539 while (patptr < patend && *patptr != '\n')
543 patptr = group_end(patptr, patend, ')') + 1;
545 patptr = group_end(patptr, patend, ']') + 1;
547 Perl_croak(aTHX_ "No group ending character '%c' found in template",
549 NOT_REACHED; /* NOTREACHED */
553 /* Convert unsigned decimal number to binary.
554 * Expects a pointer to the first digit and address of length variable
555 * Advances char pointer to 1st non-digit char and returns number
558 S_get_num(pTHX_ const char *patptr, I32 *lenptr )
560 I32 len = *patptr++ - '0';
562 PERL_ARGS_ASSERT_GET_NUM;
564 while (isDIGIT(*patptr)) {
565 if (len >= 0x7FFFFFFF/10)
566 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
567 len = (len * 10) + (*patptr++ - '0');
573 /* The marvellous template parsing routine: Using state stored in *symptr,
574 * locates next template code and count
577 S_next_symbol(pTHX_ tempsym_t* symptr )
579 const char* patptr = symptr->patptr;
580 const char* const patend = symptr->patend;
582 PERL_ARGS_ASSERT_NEXT_SYMBOL;
584 symptr->flags &= ~FLAG_SLASH;
586 while (patptr < patend) {
587 if (isSPACE(*patptr))
589 else if (*patptr == '#') {
591 while (patptr < patend && *patptr != '\n')
596 /* We should have found a template code */
597 I32 code = *patptr++ & 0xFF;
598 U32 inherited_modifiers = 0;
600 if (code == ','){ /* grandfather in commas but with a warning */
601 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
602 symptr->flags |= FLAG_COMMA;
603 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
604 "Invalid type ',' in %s", _action( symptr ) );
609 /* for '(', skip to ')' */
611 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
612 Perl_croak(aTHX_ "()-group starts with a count in %s",
614 symptr->grpbeg = patptr;
615 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
616 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
617 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
621 /* look for group modifiers to inherit */
622 if (TYPE_ENDIANNESS(symptr->flags)) {
623 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
624 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
627 /* look for modifiers */
628 while (patptr < patend) {
633 modifier = TYPE_IS_SHRIEKING;
634 allowed = "sSiIlLxXnNvV@.";
637 modifier = TYPE_IS_BIG_ENDIAN;
638 allowed = ENDIANNESS_ALLOWED_TYPES;
641 modifier = TYPE_IS_LITTLE_ENDIAN;
642 allowed = ENDIANNESS_ALLOWED_TYPES;
653 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
654 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
655 allowed, _action( symptr ) );
657 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
658 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
659 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
660 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
661 TYPE_ENDIANNESS_MASK)
662 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
663 *patptr, _action( symptr ) );
665 if ((code & modifier)) {
666 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
667 "Duplicate modifier '%c' after '%c' in %s",
668 *patptr, (int) TYPE_NO_MODIFIERS(code),
676 /* inherit modifiers */
677 code |= inherited_modifiers;
679 /* look for count and/or / */
680 if (patptr < patend) {
681 if (isDIGIT(*patptr)) {
682 patptr = get_num( patptr, &symptr->length );
683 symptr->howlen = e_number;
685 } else if (*patptr == '*') {
687 symptr->howlen = e_star;
689 } else if (*patptr == '[') {
690 const char* lenptr = ++patptr;
691 symptr->howlen = e_number;
692 patptr = group_end( patptr, patend, ']' ) + 1;
693 /* what kind of [] is it? */
694 if (isDIGIT(*lenptr)) {
695 lenptr = get_num( lenptr, &symptr->length );
697 Perl_croak(aTHX_ "Malformed integer in [] in %s",
700 tempsym_t savsym = *symptr;
701 symptr->patend = patptr-1;
702 symptr->patptr = lenptr;
703 savsym.length = measure_struct(symptr);
707 symptr->howlen = e_no_len;
712 while (patptr < patend) {
713 if (isSPACE(*patptr))
715 else if (*patptr == '#') {
717 while (patptr < patend && *patptr != '\n')
722 if (*patptr == '/') {
723 symptr->flags |= FLAG_SLASH;
725 if (patptr < patend &&
726 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
727 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
734 /* at end - no count, no / */
735 symptr->howlen = e_no_len;
740 symptr->patptr = patptr;
744 symptr->patptr = patptr;
749 There is no way to cleanly handle the case where we should process the
750 string per byte in its upgraded form while it's really in downgraded form
751 (e.g. estimates like strend-s as an upper bound for the number of
752 characters left wouldn't work). So if we foresee the need of this
753 (pattern starts with U or contains U0), we want to work on the encoded
754 version of the string. Users are advised to upgrade their pack string
755 themselves if they need to do a lot of unpacks like this on it
758 need_utf8(const char *pat, const char *patend)
762 PERL_ARGS_ASSERT_NEED_UTF8;
764 while (pat < patend) {
767 pat = (const char *) memchr(pat, '\n', patend-pat);
768 if (!pat) return FALSE;
769 } else if (pat[0] == 'U') {
770 if (first || pat[1] == '0') return TRUE;
771 } else first = FALSE;
778 first_symbol(const char *pat, const char *patend) {
779 PERL_ARGS_ASSERT_FIRST_SYMBOL;
781 while (pat < patend) {
782 if (pat[0] != '#') return pat[0];
784 pat = (const char *) memchr(pat, '\n', patend-pat);
793 =head1 Pack and Unpack
795 =for apidoc unpackstring
797 The engine implementing the C<unpack()> Perl function.
799 Using the template C<pat..patend>, this function unpacks the string
800 C<s..strend> into a number of mortal SVs, which it pushes onto the perl
801 argument (C<@_>) stack (so you will need to issue a C<PUTBACK> before and
802 C<SPAGAIN> after the call to this function). It returns the number of
805 The C<strend> and C<patend> pointers should point to the byte following the
806 last character of each string.
808 Although this function returns its values on the perl argument stack, it
809 doesn't take any parameters from that stack (and thus in particular
810 there's no need to do a C<PUSHMARK> before calling it, unlike L</call_pv> for
816 Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
820 PERL_ARGS_ASSERT_UNPACKSTRING;
822 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
823 else if (need_utf8(pat, patend)) {
824 /* We probably should try to avoid this in case a scalar context call
825 wouldn't get to the "U0" */
826 STRLEN len = strend - s;
827 s = (char *) bytes_to_utf8((U8 *) s, &len);
830 flags |= FLAG_DO_UTF8;
833 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
834 flags |= FLAG_PARSE_UTF8;
836 TEMPSYM_INIT(&sym, pat, patend, flags);
838 return unpack_rec(&sym, s, s, strend, NULL );
842 S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
846 const I32 start_sp_offset = SP - PL_stack_base;
851 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
853 bool explicit_length;
854 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
855 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
857 PERL_ARGS_ASSERT_UNPACK_REC;
859 symptr->strbeg = s - strbeg;
861 while (next_symbol(symptr)) {
864 I32 datumtype = symptr->code;
866 /* do first one only unless in list context
867 / is implemented by unpacking the count, then popping it from the
868 stack, so must check that we're not in the middle of a / */
870 && (SP - PL_stack_base == start_sp_offset + 1)
871 && (datumtype != '/') ) /* XXX can this be omitted */
874 switch (howlen = symptr->howlen) {
876 len = strend - strbeg; /* long enough */
879 /* e_no_len and e_number */
880 len = symptr->length;
884 explicit_length = TRUE;
886 beyond = s >= strend;
888 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
890 /* props nonzero means we can process this letter. */
891 const long size = props & PACK_SIZE_MASK;
892 const long howmany = (strend - s) / size;
896 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
897 if (len && unpack_only_one) len = 1;
903 needs_swap = NEEDS_SWAP(datumtype);
905 switch(TYPE_NO_ENDIANNESS(datumtype)) {
907 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
910 if (howlen == e_no_len)
911 len = 16; /* len is not specified */
919 tempsym_t savsym = *symptr;
920 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
921 symptr->flags |= group_modifiers;
922 symptr->patend = savsym.grpend;
923 symptr->previous = &savsym;
926 if (len && unpack_only_one) len = 1;
928 symptr->patptr = savsym.grpbeg;
929 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
930 else symptr->flags &= ~FLAG_PARSE_UTF8;
931 unpack_rec(symptr, s, strbeg, strend, &s);
932 if (s == strend && savsym.howlen == e_star)
933 break; /* No way to continue */
936 savsym.flags = symptr->flags & ~group_modifiers;
940 case '.' | TYPE_IS_SHRIEKING:
944 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
945 if (howlen == e_star) from = strbeg;
946 else if (len <= 0) from = s;
948 tempsym_t *group = symptr;
950 while (--len && group) group = group->previous;
951 from = group ? strbeg + group->strbeg : strbeg;
954 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
955 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
959 case '@' | TYPE_IS_SHRIEKING:
961 s = strbeg + symptr->strbeg;
962 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
966 Perl_croak(aTHX_ "'@' outside of string in unpack");
971 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
974 Perl_croak(aTHX_ "'@' outside of string in unpack");
978 case 'X' | TYPE_IS_SHRIEKING:
979 if (!len) /* Avoid division by 0 */
982 const char *hop, *last;
986 hop += UTF8SKIP(hop);
993 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
997 len = (s - strbeg) % len;
1003 Perl_croak(aTHX_ "'X' outside of string in unpack");
1004 while (--s, UTF8_IS_CONTINUATION(*s)) {
1006 Perl_croak(aTHX_ "'X' outside of string in unpack");
1011 if (len > s - strbeg)
1012 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1016 case 'x' | TYPE_IS_SHRIEKING: {
1018 if (!len) /* Avoid division by 0 */
1020 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1021 else ai32 = (s - strbeg) % len;
1022 if (ai32 == 0) break;
1030 Perl_croak(aTHX_ "'x' outside of string in unpack");
1035 if (len > strend - s)
1036 Perl_croak(aTHX_ "'x' outside of string in unpack");
1041 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1047 /* Preliminary length estimate is assumed done in 'W' */
1048 if (len > strend - s) len = strend - s;
1054 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1055 if (hop >= strend) {
1057 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1062 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1064 } else if (len > strend - s)
1067 if (datumtype == 'Z') {
1068 /* 'Z' strips stuff after first null */
1069 const char *ptr, *end;
1071 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
1072 sv = newSVpvn(s, ptr-s);
1073 if (howlen == e_star) /* exact for 'Z*' */
1074 len = ptr-s + (ptr != strend ? 1 : 0);
1075 } else if (datumtype == 'A') {
1076 /* 'A' strips both nulls and spaces */
1078 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1079 for (ptr = s+len-1; ptr >= s; ptr--) {
1081 && !UTF8_IS_CONTINUATION(*ptr)
1082 && !isSPACE_utf8_safe(ptr, strend))
1087 if (ptr >= s) ptr += UTF8SKIP(ptr);
1090 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1092 for (ptr = s+len-1; ptr >= s; ptr--)
1093 if (*ptr != 0 && !isSPACE(*ptr)) break;
1096 sv = newSVpvn(s, ptr-s);
1097 } else sv = newSVpvn(s, len);
1101 /* Undo any upgrade done due to need_utf8() */
1102 if (!(symptr->flags & FLAG_WAS_UTF8))
1103 sv_utf8_downgrade(sv, 0);
1111 if (howlen == e_star || len > (strend - s) * 8)
1112 len = (strend - s) * 8;
1115 while (len >= 8 && s < strend) {
1116 cuv += PL_bitcount[utf8_to_byte(aTHX_ &s, strend, datumtype)];
1121 cuv += PL_bitcount[*(U8 *)s++];
1124 if (len && s < strend) {
1126 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1127 if (datumtype == 'b')
1129 if (bits & 1) cuv++;
1134 if (bits & 0x80) cuv++;
1141 sv = sv_2mortal(newSV(len ? len : 1));
1144 if (datumtype == 'b') {
1146 const I32 ai32 = len;
1147 for (len = 0; len < ai32; len++) {
1148 if (len & 7) bits >>= 1;
1150 if (s >= strend) break;
1151 bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
1152 } else bits = *(U8 *) s++;
1153 *str++ = bits & 1 ? '1' : '0';
1157 const I32 ai32 = len;
1158 for (len = 0; len < ai32; len++) {
1159 if (len & 7) bits <<= 1;
1161 if (s >= strend) break;
1162 bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
1163 } else bits = *(U8 *) s++;
1164 *str++ = bits & 0x80 ? '1' : '0';
1168 SvCUR_set(sv, str - SvPVX_const(sv));
1175 /* Preliminary length estimate, acceptable for utf8 too */
1176 if (howlen == e_star || len > (strend - s) * 2)
1177 len = (strend - s) * 2;
1179 sv = sv_2mortal(newSV(len ? len : 1));
1183 if (datumtype == 'h') {
1186 for (len = 0; len < ai32; len++) {
1187 if (len & 1) bits >>= 4;
1189 if (s >= strend) break;
1190 bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
1191 } else bits = * (U8 *) s++;
1193 *str++ = PL_hexdigit[bits & 15];
1197 const I32 ai32 = len;
1198 for (len = 0; len < ai32; len++) {
1199 if (len & 1) bits <<= 4;
1201 if (s >= strend) break;
1202 bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
1203 } else bits = *(U8 *) s++;
1205 *str++ = PL_hexdigit[(bits >> 4) & 15];
1210 SvCUR_set(sv, str - SvPVX_const(sv));
1217 if (explicit_length)
1218 /* Switch to "character" mode */
1219 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1224 while (len-- > 0 && s < strend) {
1229 aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1230 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1231 if (retlen == (STRLEN) -1)
1232 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1236 aint = *(U8 *)(s)++;
1237 if (aint >= 128 && datumtype != 'C') /* fake up signed chars */
1241 else if (checksum > bits_in_uv)
1242 cdouble += (NV)aint;
1250 while (len-- > 0 && s < strend) {
1252 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1253 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1254 if (retlen == (STRLEN) -1)
1255 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1259 else if (checksum > bits_in_uv)
1260 cdouble += (NV) val;
1264 } else if (!checksum)
1266 const U8 ch = *(U8 *) s++;
1269 else if (checksum > bits_in_uv)
1270 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1272 while (len-- > 0) cuv += *(U8 *) s++;
1276 if (explicit_length && howlen != e_star) {
1277 /* Switch to "bytes in UTF-8" mode */
1278 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
1280 /* Should be impossible due to the need_utf8() test */
1281 Perl_croak(aTHX_ "U0 mode on a byte string");
1285 if (len > strend - s) len = strend - s;
1287 if (len && unpack_only_one) len = 1;
1291 while (len-- > 0 && s < strend) {
1295 U8 result[UTF8_MAXLEN];
1296 const char *ptr = s;
1298 /* Bug: warns about bad utf8 even if we are short on bytes
1299 and will break out of the loop */
1300 if (!S_utf8_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1303 len = UTF8SKIP(result);
1304 if (!S_utf8_to_bytes(aTHX_ &ptr, strend,
1305 (char *) &result[1], len-1, 'U')) break;
1306 auv = NATIVE_TO_UNI(utf8n_to_uvchr(result,
1309 UTF8_ALLOW_DEFAULT));
1312 auv = NATIVE_TO_UNI(utf8n_to_uvchr((U8*)s,
1315 UTF8_ALLOW_DEFAULT));
1316 if (retlen == (STRLEN) -1)
1317 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1322 else if (checksum > bits_in_uv)
1323 cdouble += (NV) auv;
1328 case 's' | TYPE_IS_SHRIEKING:
1329 #if SHORTSIZE != SIZE16
1332 SHIFT_VAR(utf8, s, strend, ashort, datumtype, needs_swap);
1335 else if (checksum > bits_in_uv)
1336 cdouble += (NV)ashort;
1348 #if U16SIZE > SIZE16
1351 SHIFT16(utf8, s, strend, &ai16, datumtype, needs_swap);
1352 #if U16SIZE > SIZE16
1358 else if (checksum > bits_in_uv)
1359 cdouble += (NV)ai16;
1364 case 'S' | TYPE_IS_SHRIEKING:
1365 #if SHORTSIZE != SIZE16
1367 unsigned short aushort;
1368 SHIFT_VAR(utf8, s, strend, aushort, datumtype, needs_swap,
1372 else if (checksum > bits_in_uv)
1373 cdouble += (NV)aushort;
1386 #if U16SIZE > SIZE16
1389 SHIFT16(utf8, s, strend, &au16, datumtype, needs_swap);
1390 if (datumtype == 'n')
1391 au16 = PerlSock_ntohs(au16);
1392 if (datumtype == 'v')
1396 else if (checksum > bits_in_uv)
1397 cdouble += (NV) au16;
1402 case 'v' | TYPE_IS_SHRIEKING:
1403 case 'n' | TYPE_IS_SHRIEKING:
1406 # if U16SIZE > SIZE16
1409 SHIFT16(utf8, s, strend, &ai16, datumtype, needs_swap);
1410 /* There should never be any byte-swapping here. */
1411 assert(!TYPE_ENDIANNESS(datumtype));
1412 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
1413 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1414 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
1415 ai16 = (I16) vtohs((U16) ai16);
1418 else if (checksum > bits_in_uv)
1419 cdouble += (NV) ai16;
1425 case 'i' | TYPE_IS_SHRIEKING:
1428 SHIFT_VAR(utf8, s, strend, aint, datumtype, needs_swap);
1431 else if (checksum > bits_in_uv)
1432 cdouble += (NV)aint;
1438 case 'I' | TYPE_IS_SHRIEKING:
1441 SHIFT_VAR(utf8, s, strend, auint, datumtype, needs_swap);
1444 else if (checksum > bits_in_uv)
1445 cdouble += (NV)auint;
1453 SHIFT_VAR(utf8, s, strend, aiv, datumtype, needs_swap);
1456 else if (checksum > bits_in_uv)
1465 SHIFT_VAR(utf8, s, strend, auv, datumtype, needs_swap);
1468 else if (checksum > bits_in_uv)
1474 case 'l' | TYPE_IS_SHRIEKING:
1475 #if LONGSIZE != SIZE32
1478 SHIFT_VAR(utf8, s, strend, along, datumtype, needs_swap);
1481 else if (checksum > bits_in_uv)
1482 cdouble += (NV)along;
1493 #if U32SIZE > SIZE32
1496 SHIFT32(utf8, s, strend, &ai32, datumtype, needs_swap);
1497 #if U32SIZE > SIZE32
1498 if (ai32 > 2147483647) ai32 -= 4294967296;
1502 else if (checksum > bits_in_uv)
1503 cdouble += (NV)ai32;
1508 case 'L' | TYPE_IS_SHRIEKING:
1509 #if LONGSIZE != SIZE32
1511 unsigned long aulong;
1512 SHIFT_VAR(utf8, s, strend, aulong, datumtype, needs_swap);
1515 else if (checksum > bits_in_uv)
1516 cdouble += (NV)aulong;
1529 #if U32SIZE > SIZE32
1532 SHIFT32(utf8, s, strend, &au32, datumtype, needs_swap);
1533 if (datumtype == 'N')
1534 au32 = PerlSock_ntohl(au32);
1535 if (datumtype == 'V')
1539 else if (checksum > bits_in_uv)
1540 cdouble += (NV)au32;
1545 case 'V' | TYPE_IS_SHRIEKING:
1546 case 'N' | TYPE_IS_SHRIEKING:
1549 #if U32SIZE > SIZE32
1552 SHIFT32(utf8, s, strend, &ai32, datumtype, needs_swap);
1553 /* There should never be any byte swapping here. */
1554 assert(!TYPE_ENDIANNESS(datumtype));
1555 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1556 ai32 = (I32)PerlSock_ntohl((U32)ai32);
1557 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1558 ai32 = (I32)vtohl((U32)ai32);
1561 else if (checksum > bits_in_uv)
1562 cdouble += (NV)ai32;
1570 SHIFT_VAR(utf8, s, strend, aptr, datumtype, needs_swap);
1571 /* newSVpv generates undef if aptr is NULL */
1572 mPUSHs(newSVpv(aptr, 0));
1580 while (len > 0 && s < strend) {
1582 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1583 auv = (auv << 7) | (ch & 0x7f);
1584 /* UTF8_IS_XXXXX not right here because this is a BER, not
1585 * UTF-8 format - using constant 0x80 */
1593 if (++bytes >= sizeof(UV)) { /* promote to string */
1596 sv = Perl_newSVpvf(aTHX_ "%.*" UVuf,
1597 (int)TYPE_DIGITS(UV), auv);
1598 while (s < strend) {
1599 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1600 sv = mul128(sv, (U8)(ch & 0x7f));
1606 t = SvPV_nolen_const(sv);
1615 if ((s >= strend) && bytes)
1616 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
1620 if (symptr->howlen == e_star)
1621 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
1623 if (s + sizeof(char*) <= strend) {
1625 SHIFT_VAR(utf8, s, strend, aptr, datumtype, needs_swap);
1626 /* newSVpvn generates undef if aptr is NULL */
1627 PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP));
1630 #if defined(HAS_QUAD) && IVSIZE >= 8
1634 SHIFT_VAR(utf8, s, strend, aquad, datumtype, needs_swap);
1636 mPUSHs(newSViv((IV)aquad));
1637 else if (checksum > bits_in_uv)
1638 cdouble += (NV)aquad;
1646 SHIFT_VAR(utf8, s, strend, auquad, datumtype, needs_swap);
1648 mPUSHs(newSVuv((UV)auquad));
1649 else if (checksum > bits_in_uv)
1650 cdouble += (NV)auquad;
1656 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
1660 SHIFT_VAR(utf8, s, strend, afloat, datumtype, needs_swap);
1670 SHIFT_VAR(utf8, s, strend, adouble, datumtype, needs_swap);
1680 SHIFT_BYTES(utf8, s, strend, anv.bytes, sizeof(anv.bytes),
1681 datumtype, needs_swap);
1688 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
1692 SHIFT_BYTES(utf8, s, strend, aldouble.bytes,
1693 sizeof(aldouble.bytes), datumtype, needs_swap);
1694 /* The most common long double format, the x86 80-bit
1695 * extended precision, has either 2 or 6 unused bytes,
1696 * which may contain garbage, which may contain
1697 * unintentional data. While we do zero the bytes of
1698 * the long double data in pack(), here in unpack() we
1699 * don't, because it's really hard to envision that
1700 * reading the long double off aldouble would be
1701 * affected by the unused bytes.
1703 * Note that trying to unpack 'long doubles' of 'long
1704 * doubles' packed in another system is in the general
1705 * case doomed without having more detail. */
1707 mPUSHn(aldouble.ld);
1709 cdouble += aldouble.ld;
1715 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
1716 sv = sv_2mortal(newSV(l));
1717 if (l) SvPOK_on(sv);
1720 /* Note that all legal uuencoded strings are ASCII printables, so
1721 * have the same representation under UTF-8 vs not. This means we
1722 * can ignore UTF8ness on legal input. For illegal we stop at the
1723 * first failure, and don't report where/what that is, so again we
1724 * can ignore UTF8ness */
1726 while (s < strend && *s != ' ' && ISUUCHAR(*s)) {
1730 len = PL_uudmap[*(U8*)s++] & 077;
1732 if (s < strend && ISUUCHAR(*s))
1733 a = PL_uudmap[*(U8*)s++] & 077;
1736 if (s < strend && ISUUCHAR(*s))
1737 b = PL_uudmap[*(U8*)s++] & 077;
1740 if (s < strend && ISUUCHAR(*s))
1741 c = PL_uudmap[*(U8*)s++] & 077;
1744 if (s < strend && ISUUCHAR(*s))
1745 d = PL_uudmap[*(U8*)s++] & 077;
1748 hunk[0] = (char)((a << 2) | (b >> 4));
1749 hunk[1] = (char)((b << 4) | (c >> 2));
1750 hunk[2] = (char)((c << 6) | d);
1752 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
1757 else /* possible checksum byte */
1758 if (s + 1 < strend && s[1] == '\n')
1764 } /* End of switch */
1767 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
1768 (checksum > bits_in_uv &&
1769 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
1772 anv = (NV) (1 << (checksum & 15));
1773 while (checksum >= 16) {
1777 while (cdouble < 0.0)
1779 cdouble = Perl_modf(cdouble / anv, &trouble);
1780 #ifdef LONGDOUBLE_DOUBLEDOUBLE
1781 /* Workaround for powerpc doubledouble modfl bug:
1782 * close to 1.0L and -1.0L cdouble is 0, and trouble
1783 * is cdouble / anv. */
1784 if (trouble != Perl_ceil(trouble)) {
1786 if (cdouble > 1.0L) cdouble -= 1.0L;
1787 if (cdouble < -1.0L) cdouble += 1.0L;
1791 sv = newSVnv(cdouble);
1794 if (checksum < bits_in_uv) {
1795 UV mask = ((UV)1 << checksum) - 1;
1804 if (symptr->flags & FLAG_SLASH){
1805 if (SP - PL_stack_base - start_sp_offset <= 0)
1807 if( next_symbol(symptr) ){
1808 if( symptr->howlen == e_number )
1809 Perl_croak(aTHX_ "Count after length/code in unpack" );
1811 /* ...end of char buffer then no decent length available */
1812 Perl_croak(aTHX_ "length/code after end of string in unpack" );
1814 /* take top of stack (hope it's numeric) */
1817 Perl_croak(aTHX_ "Negative '/' count in unpack" );
1820 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
1822 datumtype = symptr->code;
1823 explicit_length = FALSE;
1831 return SP - PL_stack_base - start_sp_offset;
1841 const char *pat = SvPV_const(left, llen);
1842 const char *s = SvPV_const(right, rlen);
1843 const char *strend = s + rlen;
1844 const char *patend = pat + llen;
1848 cnt = unpackstring(pat, patend, s, strend,
1849 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
1850 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
1853 if ( !cnt && gimme == G_SCALAR )
1854 PUSHs(&PL_sv_undef);
1859 doencodes(U8 *h, const U8 *s, I32 len)
1861 *h++ = PL_uuemap[len];
1863 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1864 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
1865 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
1866 *h++ = PL_uuemap[(077 & (s[2] & 077))];
1871 const U8 r = (len > 1 ? s[1] : '\0');
1872 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1873 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
1874 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
1875 *h++ = PL_uuemap[0];
1882 S_is_an_int(pTHX_ const char *s, STRLEN l)
1884 SV *result = newSVpvn(s, l);
1885 char *const result_c = SvPV_nolen(result); /* convenience */
1886 char *out = result_c;
1890 PERL_ARGS_ASSERT_IS_AN_INT;
1898 SvREFCNT_dec(result);
1921 SvREFCNT_dec(result);
1927 SvCUR_set(result, out - result_c);
1931 /* pnum must be '\0' terminated */
1933 S_div128(pTHX_ SV *pnum, bool *done)
1936 char * const s = SvPV(pnum, len);
1940 PERL_ARGS_ASSERT_DIV128;
1944 const int i = m * 10 + (*t - '0');
1945 const int r = (i >> 7); /* r < 10 */
1953 SvCUR_set(pnum, (STRLEN) (t - s));
1958 =for apidoc packlist
1960 The engine implementing C<pack()> Perl function.
1966 Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, SV **beglist, SV **endlist )
1970 PERL_ARGS_ASSERT_PACKLIST;
1972 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
1974 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
1975 Also make sure any UTF8 flag is loaded */
1976 SvPV_force_nolen(cat);
1978 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
1980 (void)pack_rec( cat, &sym, beglist, endlist );
1983 /* like sv_utf8_upgrade, but also repoint the group start markers */
1985 marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
1988 const char *from_ptr, *from_start, *from_end, **marks, **m;
1989 char *to_start, *to_ptr;
1991 if (SvUTF8(sv)) return;
1993 from_start = SvPVX_const(sv);
1994 from_end = from_start + SvCUR(sv);
1995 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
1996 if (!NATIVE_BYTE_IS_INVARIANT(*from_ptr)) break;
1997 if (from_ptr == from_end) {
1998 /* Simple case: no character needs to be changed */
2003 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
2004 Newx(to_start, len, char);
2005 Copy(from_start, to_start, from_ptr-from_start, char);
2006 to_ptr = to_start + (from_ptr-from_start);
2008 Newx(marks, sym_ptr->level+2, const char *);
2009 for (group=sym_ptr; group; group = group->previous)
2010 marks[group->level] = from_start + group->strbeg;
2011 marks[sym_ptr->level+1] = from_end+1;
2012 for (m = marks; *m < from_ptr; m++)
2013 *m = to_start + (*m-from_start);
2015 for (;from_ptr < from_end; from_ptr++) {
2016 while (*m == from_ptr) *m++ = to_ptr;
2017 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
2021 while (*m == from_ptr) *m++ = to_ptr;
2022 if (m != marks + sym_ptr->level+1) {
2025 Perl_croak(aTHX_ "panic: marks beyond string end, m=%p, marks=%p, "
2026 "level=%d", m, marks, sym_ptr->level);
2028 for (group=sym_ptr; group; group = group->previous)
2029 group->strbeg = marks[group->level] - to_start;
2034 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
2035 from_start -= SvIVX(sv);
2038 SvFLAGS(sv) &= ~SVf_OOK;
2041 Safefree(from_start);
2042 SvPV_set(sv, to_start);
2043 SvCUR_set(sv, to_ptr - to_start);
2048 /* Exponential string grower. Makes string extension effectively O(n)
2049 needed says how many extra bytes we need (not counting the final '\0')
2050 Only grows the string if there is an actual lack of space
2053 S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
2054 const STRLEN cur = SvCUR(sv);
2055 const STRLEN len = SvLEN(sv);
2058 PERL_ARGS_ASSERT_SV_EXP_GROW;
2060 if (len - cur > needed) return SvPVX(sv);
2061 extend = needed > len ? needed : len;
2062 return SvGROW(sv, len+extend+1);
2066 S_sv_check_infnan(pTHX_ SV *sv, I32 datumtype)
2069 if (UNLIKELY(SvAMAGIC(sv)))
2071 if (UNLIKELY(isinfnansv(sv))) {
2072 const I32 c = TYPE_NO_MODIFIERS(datumtype);
2073 const NV nv = SvNV_nomg(sv);
2075 Perl_croak(aTHX_ "Cannot compress %" NVgf " in pack", nv);
2077 Perl_croak(aTHX_ "Cannot pack %" NVgf " with '%c'", nv, (int) c);
2082 #define SvIV_no_inf(sv,d) \
2083 ((sv) = S_sv_check_infnan(aTHX_ sv,d), SvIV_nomg(sv))
2084 #define SvUV_no_inf(sv,d) \
2085 ((sv) = S_sv_check_infnan(aTHX_ sv,d), SvUV_nomg(sv))
2089 S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
2091 tempsym_t lookahead;
2092 I32 items = endlist - beglist;
2093 bool found = next_symbol(symptr);
2094 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
2095 bool warn_utf8 = ckWARN(WARN_UTF8);
2098 PERL_ARGS_ASSERT_PACK_REC;
2100 if (symptr->level == 0 && found && symptr->code == 'U') {
2101 marked_upgrade(aTHX_ cat, symptr);
2102 symptr->flags |= FLAG_DO_UTF8;
2105 symptr->strbeg = SvCUR(cat);
2111 SV *lengthcode = NULL;
2112 I32 datumtype = symptr->code;
2113 howlen_t howlen = symptr->howlen;
2114 char *start = SvPVX(cat);
2115 char *cur = start + SvCUR(cat);
2118 #define NEXTFROM (lengthcode ? lengthcode : items > 0 ? (--items, *beglist++) : &PL_sv_no)
2119 #define PEEKFROM (lengthcode ? lengthcode : items > 0 ? *beglist : &PL_sv_no)
2123 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2127 /* e_no_len and e_number */
2128 len = symptr->length;
2133 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
2135 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2136 /* We can process this letter. */
2137 STRLEN size = props & PACK_SIZE_MASK;
2138 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2142 /* Look ahead for next symbol. Do we have code/code? */
2143 lookahead = *symptr;
2144 found = next_symbol(&lookahead);
2145 if (symptr->flags & FLAG_SLASH) {
2147 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
2148 if (strchr("aAZ", lookahead.code)) {
2149 if (lookahead.howlen == e_number) count = lookahead.length;
2152 count = sv_len_utf8(*beglist);
2155 if (lookahead.code == 'Z') count++;
2158 if (lookahead.howlen == e_number && lookahead.length < items)
2159 count = lookahead.length;
2162 lookahead.howlen = e_number;
2163 lookahead.length = count;
2164 lengthcode = sv_2mortal(newSViv(count));
2167 needs_swap = NEEDS_SWAP(datumtype);
2169 /* Code inside the switch must take care to properly update
2170 cat (CUR length and '\0' termination) if it updated *cur and
2171 doesn't simply leave using break */
2172 switch (TYPE_NO_ENDIANNESS(datumtype)) {
2174 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2175 (int) TYPE_NO_MODIFIERS(datumtype));
2177 Perl_croak(aTHX_ "'%%' may not be used in pack");
2179 case '.' | TYPE_IS_SHRIEKING:
2181 if (howlen == e_star) from = start;
2182 else if (len == 0) from = cur;
2184 tempsym_t *group = symptr;
2186 while (--len && group) group = group->previous;
2187 from = group ? start + group->strbeg : start;
2190 len = SvIV_no_inf(fromstr, datumtype);
2192 case '@' | TYPE_IS_SHRIEKING:
2194 from = start + symptr->strbeg;
2196 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
2198 while (len && from < cur) {
2199 from += UTF8SKIP(from);
2203 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2205 /* Here we know from == cur */
2207 GROWING(0, cat, start, cur, len);
2208 Zero(cur, len, char);
2210 } else if (from < cur) {
2213 } else goto no_change;
2221 if (len > 0) goto grow;
2222 if (len == 0) goto no_change;
2229 tempsym_t savsym = *symptr;
2230 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2231 symptr->flags |= group_modifiers;
2232 symptr->patend = savsym.grpend;
2234 symptr->previous = &lookahead;
2237 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2238 else symptr->flags &= ~FLAG_PARSE_UTF8;
2239 was_utf8 = SvUTF8(cat);
2240 symptr->patptr = savsym.grpbeg;
2241 beglist = pack_rec(cat, symptr, beglist, endlist);
2242 if (SvUTF8(cat) != was_utf8)
2243 /* This had better be an upgrade while in utf8==0 mode */
2246 if (savsym.howlen == e_star && beglist == endlist)
2247 break; /* No way to continue */
2249 items = endlist - beglist;
2250 lookahead.flags = symptr->flags & ~group_modifiers;
2253 case 'X' | TYPE_IS_SHRIEKING:
2254 if (!len) /* Avoid division by 0 */
2261 hop += UTF8SKIP(hop);
2268 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2272 len = (cur-start) % len;
2276 if (len < 1) goto no_change;
2280 Perl_croak(aTHX_ "'%c' outside of string in pack",
2281 (int) TYPE_NO_MODIFIERS(datumtype));
2282 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2284 Perl_croak(aTHX_ "'%c' outside of string in pack",
2285 (int) TYPE_NO_MODIFIERS(datumtype));
2291 if (cur - start < len)
2292 Perl_croak(aTHX_ "'%c' outside of string in pack",
2293 (int) TYPE_NO_MODIFIERS(datumtype));
2296 if (cur < start+symptr->strbeg) {
2297 /* Make sure group starts don't point into the void */
2299 const STRLEN length = cur-start;
2300 for (group = symptr;
2301 group && length < group->strbeg;
2302 group = group->previous) group->strbeg = length;
2303 lookahead.strbeg = length;
2306 case 'x' | TYPE_IS_SHRIEKING: {
2308 if (!len) /* Avoid division by 0 */
2310 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
2311 else ai32 = (cur - start) % len;
2312 if (ai32 == 0) goto no_change;
2324 aptr = SvPV_const(fromstr, fromlen);
2325 if (DO_UTF8(fromstr)) {
2326 const char *end, *s;
2328 if (!utf8 && !SvUTF8(cat)) {
2329 marked_upgrade(aTHX_ cat, symptr);
2330 lookahead.flags |= FLAG_DO_UTF8;
2331 lookahead.strbeg = symptr->strbeg;
2334 cur = start + SvCUR(cat);
2336 if (howlen == e_star) {
2337 if (utf8) goto string_copy;
2341 end = aptr + fromlen;
2342 fromlen = datumtype == 'Z' ? len-1 : len;
2343 while ((I32) fromlen > 0 && s < end) {
2348 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2351 if (datumtype == 'Z') len++;
2357 fromlen = len - fromlen;
2358 if (datumtype == 'Z') fromlen--;
2359 if (howlen == e_star) {
2361 if (datumtype == 'Z') len++;
2363 GROWING(0, cat, start, cur, len);
2364 if (!S_utf8_to_bytes(aTHX_ &aptr, end, cur, fromlen,
2365 datumtype | TYPE_IS_PACK))
2366 Perl_croak(aTHX_ "panic: predicted utf8 length not available, "
2367 "for '%c', aptr=%p end=%p cur=%p, fromlen=%" UVuf,
2368 (int)datumtype, aptr, end, cur, (UV)fromlen);
2372 if (howlen == e_star) {
2374 if (datumtype == 'Z') len++;
2376 if (len <= (I32) fromlen) {
2378 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2380 /* assumes a byte expands to at most UTF8_EXPAND bytes on
2382 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2383 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
2385 while (fromlen > 0) {
2386 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
2392 if (howlen == e_star) {
2394 if (datumtype == 'Z') len++;
2396 if (len <= (I32) fromlen) {
2398 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2400 GROWING(0, cat, start, cur, len);
2401 Copy(aptr, cur, fromlen, char);
2405 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2412 const char *str, *end;
2419 str = SvPV_const(fromstr, fromlen);
2420 end = str + fromlen;
2421 if (DO_UTF8(fromstr)) {
2423 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2425 utf8_source = FALSE;
2426 utf8_flags = 0; /* Unused, but keep compilers happy */
2428 if (howlen == e_star) len = fromlen;
2429 field_len = (len+7)/8;
2430 GROWING(utf8, cat, start, cur, field_len);
2431 if (len > (I32)fromlen) len = fromlen;
2434 if (datumtype == 'B')
2438 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2440 } else bits |= *str++ & 1;
2441 if (l & 7) bits <<= 1;
2443 PUSH_BYTE(utf8, cur, bits);
2448 /* datumtype == 'b' */
2452 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2453 if (val & 1) bits |= 0x80;
2454 } else if (*str++ & 1)
2456 if (l & 7) bits >>= 1;
2458 PUSH_BYTE(utf8, cur, bits);
2464 if (datumtype == 'B')
2465 bits <<= 7 - (l & 7);
2467 bits >>= 7 - (l & 7);
2468 PUSH_BYTE(utf8, cur, bits);
2471 /* Determine how many chars are left in the requested field */
2473 if (howlen == e_star) field_len = 0;
2474 else field_len -= l;
2475 Zero(cur, field_len, char);
2481 const char *str, *end;
2488 str = SvPV_const(fromstr, fromlen);
2489 end = str + fromlen;
2490 if (DO_UTF8(fromstr)) {
2492 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2494 utf8_source = FALSE;
2495 utf8_flags = 0; /* Unused, but keep compilers happy */
2497 if (howlen == e_star) len = fromlen;
2498 field_len = (len+1)/2;
2499 GROWING(utf8, cat, start, cur, field_len);
2500 if (!utf8_source && len > (I32)fromlen) len = fromlen;
2503 if (datumtype == 'H')
2507 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2508 if (val < 256 && isALPHA(val))
2509 bits |= (val + 9) & 0xf;
2512 } else if (isALPHA(*str))
2513 bits |= (*str++ + 9) & 0xf;
2515 bits |= *str++ & 0xf;
2516 if (l & 1) bits <<= 4;
2518 PUSH_BYTE(utf8, cur, bits);
2526 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2527 if (val < 256 && isALPHA(val))
2528 bits |= ((val + 9) & 0xf) << 4;
2530 bits |= (val & 0xf) << 4;
2531 } else if (isALPHA(*str))
2532 bits |= ((*str++ + 9) & 0xf) << 4;
2534 bits |= (*str++ & 0xf) << 4;
2535 if (l & 1) bits >>= 4;
2537 PUSH_BYTE(utf8, cur, bits);
2543 PUSH_BYTE(utf8, cur, bits);
2546 /* Determine how many chars are left in the requested field */
2548 if (howlen == e_star) field_len = 0;
2549 else field_len -= l;
2550 Zero(cur, field_len, char);
2558 aiv = SvIV_no_inf(fromstr, datumtype);
2559 if ((-128 > aiv || aiv > 127))
2560 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2561 "Character in 'c' format wrapped in pack");
2562 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
2567 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2573 aiv = SvIV_no_inf(fromstr, datumtype);
2574 if ((0 > aiv || aiv > 0xff))
2575 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2576 "Character in 'C' format wrapped in pack");
2577 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
2582 U8 in_bytes = (U8)IN_BYTES;
2584 end = start+SvLEN(cat)-1;
2585 if (utf8) end -= UTF8_MAXLEN-1;
2589 auv = SvUV_no_inf(fromstr, datumtype);
2590 if (in_bytes) auv = auv % 0x100;
2595 SvCUR_set(cat, cur - start);
2597 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2598 end = start+SvLEN(cat)-UTF8_MAXLEN;
2600 cur = (char *) uvchr_to_utf8_flags((U8 *) cur, auv, 0);
2605 SvCUR_set(cat, cur - start);
2606 marked_upgrade(aTHX_ cat, symptr);
2607 lookahead.flags |= FLAG_DO_UTF8;
2608 lookahead.strbeg = symptr->strbeg;
2611 cur = start + SvCUR(cat);
2612 end = start+SvLEN(cat)-UTF8_MAXLEN;
2615 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2616 "Character in 'W' format wrapped in pack");
2621 SvCUR_set(cat, cur - start);
2622 GROWING(0, cat, start, cur, len+1);
2623 end = start+SvLEN(cat)-1;
2625 *(U8 *) cur++ = (U8)auv;
2634 if (!(symptr->flags & FLAG_DO_UTF8)) {
2635 marked_upgrade(aTHX_ cat, symptr);
2636 lookahead.flags |= FLAG_DO_UTF8;
2637 lookahead.strbeg = symptr->strbeg;
2643 end = start+SvLEN(cat);
2644 if (!utf8) end -= UTF8_MAXLEN;
2648 auv = SvUV_no_inf(fromstr, datumtype);
2650 U8 buffer[UTF8_MAXLEN], *endb;
2651 endb = uvchr_to_utf8_flags(buffer, UNI_TO_NATIVE(auv), 0);
2652 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
2654 SvCUR_set(cat, cur - start);
2655 GROWING(0, cat, start, cur,
2656 len+(endb-buffer)*UTF8_EXPAND);
2657 end = start+SvLEN(cat);
2659 cur = my_bytes_to_utf8(buffer, endb-buffer, cur, 0);
2663 SvCUR_set(cat, cur - start);
2664 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2665 end = start+SvLEN(cat)-UTF8_MAXLEN;
2667 cur = (char *) uvchr_to_utf8_flags((U8 *) cur,
2674 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
2680 anv = SvNV(fromstr);
2681 # if (defined(VMS) && !defined(_IEEE_FP)) || defined(DOUBLE_IS_VAX_FLOAT)
2682 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
2683 * on Alpha; fake it if we don't have them.
2687 else if (anv < -FLT_MAX)
2689 else afloat = (float)anv;
2691 # if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
2693 afloat = (float)NV_NAN;
2697 /* a simple cast to float is undefined if outside
2698 * the range of values that can be represented */
2699 afloat = (float)(anv > FLT_MAX ? NV_INF :
2700 anv < -FLT_MAX ? -NV_INF : anv);
2703 PUSH_VAR(utf8, cur, afloat, needs_swap);
2711 anv = SvNV(fromstr);
2712 # if (defined(VMS) && !defined(_IEEE_FP)) || defined(DOUBLE_IS_VAX_FLOAT)
2713 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
2714 * on Alpha; fake it if we don't have them.
2718 else if (anv < -DBL_MAX)
2720 else adouble = (double)anv;
2722 adouble = (double)anv;
2724 PUSH_VAR(utf8, cur, adouble, needs_swap);
2729 Zero(&anv, 1, NV); /* can be long double with unused bits */
2733 /* to work round a gcc/x86 bug; don't use SvNV */
2734 anv.nv = sv_2nv(fromstr);
2735 # if defined(LONGDOUBLE_X86_80_BIT) && defined(USE_LONG_DOUBLE) \
2736 && LONG_DOUBLESIZE > 10
2737 /* GCC sometimes overwrites the padding in the
2739 Zero(anv.bytes+10, sizeof(anv.bytes) - 10, U8);
2742 anv.nv = SvNV(fromstr);
2744 PUSH_BYTES(utf8, cur, anv.bytes, sizeof(anv.bytes), needs_swap);
2748 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2751 /* long doubles can have unused bits, which may be nonzero */
2752 Zero(&aldouble, 1, long double);
2756 /* to work round a gcc/x86 bug; don't use SvNV */
2757 aldouble.ld = (long double)sv_2nv(fromstr);
2758 # if defined(LONGDOUBLE_X86_80_BIT) && LONG_DOUBLESIZE > 10
2759 /* GCC sometimes overwrites the padding in the
2761 Zero(aldouble.bytes+10, sizeof(aldouble.bytes) - 10, U8);
2764 aldouble.ld = (long double)SvNV(fromstr);
2766 PUSH_BYTES(utf8, cur, aldouble.bytes, sizeof(aldouble.bytes),
2772 case 'n' | TYPE_IS_SHRIEKING:
2777 ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
2778 ai16 = PerlSock_htons(ai16);
2779 PUSH16(utf8, cur, &ai16, FALSE);
2782 case 'v' | TYPE_IS_SHRIEKING:
2787 ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
2789 PUSH16(utf8, cur, &ai16, FALSE);
2792 case 'S' | TYPE_IS_SHRIEKING:
2793 #if SHORTSIZE != SIZE16
2795 unsigned short aushort;
2797 aushort = SvUV_no_inf(fromstr, datumtype);
2798 PUSH_VAR(utf8, cur, aushort, needs_swap);
2808 au16 = (U16)SvUV_no_inf(fromstr, datumtype);
2809 PUSH16(utf8, cur, &au16, needs_swap);
2812 case 's' | TYPE_IS_SHRIEKING:
2813 #if SHORTSIZE != SIZE16
2817 ashort = SvIV_no_inf(fromstr, datumtype);
2818 PUSH_VAR(utf8, cur, ashort, needs_swap);
2828 ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
2829 PUSH16(utf8, cur, &ai16, needs_swap);
2833 case 'I' | TYPE_IS_SHRIEKING:
2837 auint = SvUV_no_inf(fromstr, datumtype);
2838 PUSH_VAR(utf8, cur, auint, needs_swap);
2845 aiv = SvIV_no_inf(fromstr, datumtype);
2846 PUSH_VAR(utf8, cur, aiv, needs_swap);
2853 auv = SvUV_no_inf(fromstr, datumtype);
2854 PUSH_VAR(utf8, cur, auv, needs_swap);
2861 S_sv_check_infnan(aTHX_ fromstr, datumtype);
2862 anv = SvNV_nomg(fromstr);
2866 SvCUR_set(cat, cur - start);
2867 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
2870 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
2871 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
2872 any negative IVs will have already been got by the croak()
2873 above. IOK is untrue for fractions, so we test them
2874 against UV_MAX_P1. */
2875 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
2876 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
2877 char *in = buf + sizeof(buf);
2878 UV auv = SvUV_nomg(fromstr);
2881 *--in = (char)((auv & 0x7f) | 0x80);
2884 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
2885 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2886 in, (buf + sizeof(buf)) - in);
2887 } else if (SvPOKp(fromstr))
2889 else if (SvNOKp(fromstr)) {
2890 /* 10**NV_MAX_10_EXP is the largest power of 10
2891 so 10**(NV_MAX_10_EXP+1) is definitely unrepresentable
2892 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
2893 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
2894 And with that many bytes only Inf can overflow.
2895 Some C compilers are strict about integral constant
2896 expressions so we conservatively divide by a slightly
2897 smaller integer instead of multiplying by the exact
2898 floating-point value.
2900 #ifdef NV_MAX_10_EXP
2901 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
2902 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
2904 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
2905 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
2907 char *in = buf + sizeof(buf);
2909 anv = Perl_floor(anv);
2911 const NV next = Perl_floor(anv / 128);
2912 if (in <= buf) /* this cannot happen ;-) */
2913 Perl_croak(aTHX_ "Cannot compress integer in pack");
2914 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
2917 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
2918 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2919 in, (buf + sizeof(buf)) - in);
2928 /* Copy string and check for compliance */
2929 from = SvPV_nomg_const(fromstr, len);
2930 if ((norm = is_an_int(from, len)) == NULL)
2931 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
2933 Newx(result, len, char);
2936 while (!done) *--in = div128(norm, &done) | 0x80;
2937 result[len - 1] &= 0x7F; /* clear continue bit */
2938 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2939 in, (result + len) - in);
2941 SvREFCNT_dec(norm); /* free norm */
2946 case 'i' | TYPE_IS_SHRIEKING:
2950 aint = SvIV_no_inf(fromstr, datumtype);
2951 PUSH_VAR(utf8, cur, aint, needs_swap);
2954 case 'N' | TYPE_IS_SHRIEKING:
2959 au32 = SvUV_no_inf(fromstr, datumtype);
2960 au32 = PerlSock_htonl(au32);
2961 PUSH32(utf8, cur, &au32, FALSE);
2964 case 'V' | TYPE_IS_SHRIEKING:
2969 au32 = SvUV_no_inf(fromstr, datumtype);
2971 PUSH32(utf8, cur, &au32, FALSE);
2974 case 'L' | TYPE_IS_SHRIEKING:
2975 #if LONGSIZE != SIZE32
2977 unsigned long aulong;
2979 aulong = SvUV_no_inf(fromstr, datumtype);
2980 PUSH_VAR(utf8, cur, aulong, needs_swap);
2990 au32 = SvUV_no_inf(fromstr, datumtype);
2991 PUSH32(utf8, cur, &au32, needs_swap);
2994 case 'l' | TYPE_IS_SHRIEKING:
2995 #if LONGSIZE != SIZE32
2999 along = SvIV_no_inf(fromstr, datumtype);
3000 PUSH_VAR(utf8, cur, along, needs_swap);
3010 ai32 = SvIV_no_inf(fromstr, datumtype);
3011 PUSH32(utf8, cur, &ai32, needs_swap);
3014 #if defined(HAS_QUAD) && IVSIZE >= 8
3019 auquad = (Uquad_t) SvUV_no_inf(fromstr, datumtype);
3020 PUSH_VAR(utf8, cur, auquad, needs_swap);
3027 aquad = (Quad_t)SvIV_no_inf(fromstr, datumtype);
3028 PUSH_VAR(utf8, cur, aquad, needs_swap);
3033 len = 1; /* assume SV is correct length */
3034 GROWING(utf8, cat, start, cur, sizeof(char *));
3041 SvGETMAGIC(fromstr);
3042 if (!SvOK(fromstr)) aptr = NULL;
3044 /* XXX better yet, could spirit away the string to
3045 * a safe spot and hang on to it until the result
3046 * of pack() (and all copies of the result) are
3049 if (((SvTEMP(fromstr) && SvREFCNT(fromstr) == 1)
3050 || (SvPADTMP(fromstr) &&
3051 !SvREADONLY(fromstr)))) {
3052 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3053 "Attempt to pack pointer to temporary value");
3055 if (SvPOK(fromstr) || SvNIOK(fromstr))
3056 aptr = SvPV_nomg_const_nolen(fromstr);
3058 aptr = SvPV_force_flags_nolen(fromstr, 0);
3060 PUSH_VAR(utf8, cur, aptr, needs_swap);
3064 const char *aptr, *aend;
3068 if (len <= 2) len = 45;
3069 else len = len / 3 * 3;
3071 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3072 "Field too wide in 'u' format in pack");
3075 aptr = SvPV_const(fromstr, fromlen);
3076 from_utf8 = DO_UTF8(fromstr);
3078 aend = aptr + fromlen;
3079 fromlen = sv_len_utf8_nomg(fromstr);
3080 } else aend = NULL; /* Unused, but keep compilers happy */
3081 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
3082 while (fromlen > 0) {
3085 U8 hunk[1+63/3*4+1];
3087 if ((I32)fromlen > len)
3093 if (!S_utf8_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3094 'u' | TYPE_IS_PACK)) {
3096 SvCUR_set(cat, cur - start);
3097 Perl_croak(aTHX_ "panic: string is shorter than advertised, "
3098 "aptr=%p, aend=%p, buffer=%p, todo=%ld",
3099 aptr, aend, buffer, (long) todo);
3101 end = doencodes(hunk, (const U8 *)buffer, todo);
3103 end = doencodes(hunk, (const U8 *)aptr, todo);
3106 PUSH_BYTES(utf8, cur, hunk, end-hunk, 0);
3113 SvCUR_set(cat, cur - start);
3115 *symptr = lookahead;
3124 dSP; dMARK; dORIGMARK; dTARGET;
3127 SV *pat_sv = *++MARK;
3128 const char *pat = SvPV_const(pat_sv, fromlen);
3129 const char *patend = pat + fromlen;
3135 packlist(cat, pat, patend, MARK, SP + 1);
3144 * ex: set ts=8 sts=4 sw=4 et: