3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
18 * [p.653 of _The Lord of the Rings_, IV/iv: "Of Herbs and Stewed Rabbit"]
21 /* This file contains pp ("push/pop") functions that
22 * execute the opcodes that make up a perl program. A typical pp function
23 * expects to find its arguments on the stack, and usually pushes its
24 * results onto the stack, hence the 'pp' terminology. Each OP structure
25 * contains a pointer to the relevant pp_foo() function.
27 * This particular file just contains pp_pack() and pp_unpack(). See the
28 * other pp*.c files for the rest of the pp_ functions.
32 #define PERL_IN_PP_PACK_C
35 /* Types used by pack/unpack */
37 e_no_len, /* no length */
38 e_number, /* number, [] */
42 typedef struct tempsym {
43 const char* patptr; /* current template char */
44 const char* patend; /* one after last char */
45 const char* grpbeg; /* 1st char of ()-group */
46 const char* grpend; /* end of ()-group */
47 I32 code; /* template code (!<>) */
48 U32 flags; /* /=4, comma=2, pack=1 */
49 /* and group modifiers */
50 SSize_t length; /* length/repeat count */
51 howlen_t howlen; /* how length is given */
52 int level; /* () nesting level */
53 STRLEN strbeg; /* offset of group start */
54 struct tempsym *previous; /* previous group */
57 #define TEMPSYM_INIT(symptr, p, e, f) \
59 (symptr)->patptr = (p); \
60 (symptr)->patend = (e); \
61 (symptr)->grpbeg = NULL; \
62 (symptr)->grpend = NULL; \
63 (symptr)->grpend = NULL; \
65 (symptr)->length = 0; \
66 (symptr)->howlen = e_no_len; \
67 (symptr)->level = 0; \
68 (symptr)->flags = (f); \
69 (symptr)->strbeg = 0; \
70 (symptr)->previous = NULL; \
78 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
81 U8 bytes[sizeof(long double)];
88 /* Maximum number of bytes to which a byte can grow due to upgrade */
92 * Offset for integer pack/unpack.
94 * On architectures where I16 and I32 aren't really 16 and 32 bits,
95 * which for now are all Crays, pack and unpack have to play games.
99 * These values are required for portability of pack() output.
100 * If they're not right on your machine, then pack() and unpack()
101 * wouldn't work right anyway; you'll need to apply the Cray hack.
102 * (I'd like to check them with #if, but you can't use sizeof() in
103 * the preprocessor.) --???
106 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
107 defines are now in config.h. --Andy Dougherty April 1998
112 /* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
115 #if U16SIZE <= SIZE16 && U32SIZE <= SIZE32
116 # define OFF16(p) ((char *) (p))
117 # define OFF32(p) ((char *) (p))
118 #elif BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
119 # define OFF16(p) ((char*)(p))
120 # define OFF32(p) ((char*)(p))
121 #elif BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
122 # define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
123 # define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
125 # error "bad cray byte order"
128 #define PUSH16(utf8, cur, p, needs_swap) \
129 PUSH_BYTES(utf8, cur, OFF16(p), SIZE16, needs_swap)
130 #define PUSH32(utf8, cur, p, needs_swap) \
131 PUSH_BYTES(utf8, cur, OFF32(p), SIZE32, needs_swap)
133 #if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
134 # define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_LITTLE_ENDIAN)
135 #elif BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
136 # define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_BIG_ENDIAN)
138 # error "Unsupported byteorder"
139 /* Need to add code here to re-instate mixed endian support.
140 NEEDS_SWAP would need to hold a flag indicating which action to
141 take, and S_reverse_copy and the code in S_utf8_to_bytes would need
142 logic adding to deal with any mixed-endian transformations needed.
146 /* Only to be used inside a loop (see the break) */
147 #define SHIFT_BYTES(utf8, s, strend, buf, len, datumtype, needs_swap) \
149 if (UNLIKELY(utf8)) { \
150 if (!S_utf8_to_bytes(aTHX_ &s, strend, \
151 (char *) (buf), len, datumtype)) break; \
153 if (UNLIKELY(needs_swap)) \
154 S_reverse_copy(s, (char *) (buf), len); \
156 Copy(s, (char *) (buf), len, char); \
161 #define SHIFT16(utf8, s, strend, p, datumtype, needs_swap) \
162 SHIFT_BYTES(utf8, s, strend, OFF16(p), SIZE16, datumtype, needs_swap)
164 #define SHIFT32(utf8, s, strend, p, datumtype, needs_swap) \
165 SHIFT_BYTES(utf8, s, strend, OFF32(p), SIZE32, datumtype, needs_swap)
167 #define SHIFT_VAR(utf8, s, strend, var, datumtype, needs_swap) \
168 SHIFT_BYTES(utf8, s, strend, &(var), sizeof(var), datumtype, needs_swap)
170 #define PUSH_VAR(utf8, aptr, var, needs_swap) \
171 PUSH_BYTES(utf8, aptr, &(var), sizeof(var), needs_swap)
173 /* Avoid stack overflow due to pathological templates. 100 should be plenty. */
174 #define MAX_SUB_TEMPLATE_LEVEL 100
176 /* flags (note that type modifiers can also be used as flags!) */
177 #define FLAG_WAS_UTF8 0x40
178 #define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
179 #define FLAG_UNPACK_ONLY_ONE 0x10
180 #define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
181 #define FLAG_SLASH 0x04
182 #define FLAG_COMMA 0x02
183 #define FLAG_PACK 0x01
186 S_mul128(pTHX_ SV *sv, U8 m)
189 char *s = SvPV(sv, len);
192 PERL_ARGS_ASSERT_MUL128;
194 if (! memBEGINs(s, len, "0000")) { /* need to grow sv */
195 SV * const tmpNew = newSVpvs("0000000000");
197 sv_catsv(tmpNew, sv);
198 SvREFCNT_dec(sv); /* free old sv */
203 while (!*t) /* trailing '\0'? */
206 const U32 i = ((*t - '0') << 7) + m;
207 *(t--) = '0' + (char)(i % 10);
213 /* Explosives and implosives. */
215 #define ISUUCHAR(ch) (NATIVE_TO_LATIN1(ch) >= NATIVE_TO_LATIN1(' ') \
216 && NATIVE_TO_LATIN1(ch) < NATIVE_TO_LATIN1('a'))
219 #define TYPE_IS_SHRIEKING 0x100
220 #define TYPE_IS_BIG_ENDIAN 0x200
221 #define TYPE_IS_LITTLE_ENDIAN 0x400
222 #define TYPE_IS_PACK 0x800
223 #define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
224 #define TYPE_MODIFIERS(t) ((t) & ~0xFF)
225 #define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
227 # define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
228 # define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
230 # define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
232 #define PACK_SIZE_CANNOT_CSUM 0x80
233 #define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
234 #define PACK_SIZE_MASK 0x3F
236 #include "packsizetables.inc"
239 S_reverse_copy(const char *src, char *dest, STRLEN len)
247 utf8_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
255 val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
256 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
257 if (retlen == (STRLEN) -1)
259 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
260 (int) TYPE_NO_MODIFIERS(datumtype));
262 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
263 "Character in '%c' format wrapped in unpack",
264 (int) TYPE_NO_MODIFIERS(datumtype));
271 #define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
272 utf8_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
276 S_utf8_to_bytes(pTHX_ const char **s, const char *end, const char *buf, SSize_t buf_len, I32 datumtype)
280 const char *from = *s;
282 const U32 flags = ckWARN(WARN_UTF8) ?
283 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
284 const bool needs_swap = NEEDS_SWAP(datumtype);
286 if (UNLIKELY(needs_swap))
289 for (;buf_len > 0; buf_len--) {
290 if (from >= end) return FALSE;
291 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
292 if (retlen == (STRLEN) -1) {
293 from += UTF8SKIP(from);
295 } else from += retlen;
300 if (UNLIKELY(needs_swap))
301 *(U8 *)--buf = (U8)val;
303 *(U8 *)buf++ = (U8)val;
305 /* We have enough characters for the buffer. Did we have problems ? */
308 /* Rewalk the string fragment while warning */
310 const U32 flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
311 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
312 if (ptr >= end) break;
313 utf8n_to_uvchr((U8 *) ptr, end-ptr, &retlen, flags);
315 if (from > end) from = end;
318 Perl_ck_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
319 WARN_PACK : WARN_UNPACK),
320 "Character(s) in '%c' format wrapped in %s",
321 (int) TYPE_NO_MODIFIERS(datumtype),
322 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
329 S_my_bytes_to_utf8(const U8 *start, STRLEN len, char *dest, const bool needs_swap) {
330 PERL_ARGS_ASSERT_MY_BYTES_TO_UTF8;
332 if (UNLIKELY(needs_swap)) {
333 const U8 *p = start + len;
334 while (p-- > start) {
335 append_utf8_from_native_byte(*p, (U8 **) & dest);
338 const U8 * const end = start + len;
339 while (start < end) {
340 append_utf8_from_native_byte(*start, (U8 **) & dest);
347 #define PUSH_BYTES(utf8, cur, buf, len, needs_swap) \
349 if (UNLIKELY(utf8)) \
350 (cur) = my_bytes_to_utf8((U8 *) buf, len, (cur), needs_swap); \
352 if (UNLIKELY(needs_swap)) \
353 S_reverse_copy((char *)(buf), cur, len); \
355 Copy(buf, cur, len, char); \
360 #define GROWING(utf8, cat, start, cur, in_len) \
362 STRLEN glen = (in_len); \
363 if (utf8) glen *= UTF8_EXPAND; \
364 if ((cur) + glen >= (start) + SvLEN(cat)) { \
365 (start) = sv_exp_grow(cat, glen); \
366 (cur) = (start) + SvCUR(cat); \
370 #define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
372 const STRLEN glen = (in_len); \
374 if (utf8) gl *= UTF8_EXPAND; \
375 if ((cur) + gl >= (start) + SvLEN(cat)) { \
377 SvCUR_set((cat), (cur) - (start)); \
378 (start) = sv_exp_grow(cat, gl); \
379 (cur) = (start) + SvCUR(cat); \
381 PUSH_BYTES(utf8, cur, buf, glen, 0); \
384 #define PUSH_BYTE(utf8, s, byte) \
387 const U8 au8 = (byte); \
388 (s) = my_bytes_to_utf8(&au8, 1, (s), 0);\
389 } else *(U8 *)(s)++ = (byte); \
392 /* Only to be used inside a loop (see the break) */
393 #define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
396 if (str >= end) break; \
397 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
398 if (retlen == (STRLEN) -1) { \
400 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
405 static const char *_action( const tempsym_t* symptr )
407 return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
410 /* Returns the sizeof() struct described by pat */
412 S_measure_struct(pTHX_ tempsym_t* symptr)
416 PERL_ARGS_ASSERT_MEASURE_STRUCT;
418 while (next_symbol(symptr)) {
421 switch (symptr->howlen) {
423 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
427 /* e_no_len and e_number */
428 len = symptr->length;
432 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
435 /* endianness doesn't influence the size of a type */
436 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
438 Perl_croak(aTHX_ "Invalid type '%c' in %s",
439 (int)TYPE_NO_MODIFIERS(symptr->code),
441 case '.' | TYPE_IS_SHRIEKING:
442 case '@' | TYPE_IS_SHRIEKING:
446 case 'U': /* XXXX Is it correct? */
449 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
450 (int) TYPE_NO_MODIFIERS(symptr->code),
457 tempsym_t savsym = *symptr;
458 symptr->patptr = savsym.grpbeg;
459 symptr->patend = savsym.grpend;
460 /* XXXX Theoretically, we need to measure many times at
461 different positions, since the subexpression may contain
462 alignment commands, but be not of aligned length.
463 Need to detect this and croak(). */
464 size = measure_struct(symptr);
468 case 'X' | TYPE_IS_SHRIEKING:
469 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
471 if (!len) /* Avoid division by 0 */
473 len = total % len; /* Assumed: the start is aligned. */
478 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
480 case 'x' | TYPE_IS_SHRIEKING:
481 if (!len) /* Avoid division by 0 */
483 star = total % len; /* Assumed: the start is aligned. */
484 if (star) /* Other portable ways? */
508 size = sizeof(char*);
518 /* locate matching closing parenthesis or bracket
519 * returns char pointer to char after match, or NULL
522 S_group_end(pTHX_ const char *patptr, const char *patend, char ender)
524 PERL_ARGS_ASSERT_GROUP_END;
526 while (patptr < patend) {
527 const char c = *patptr++;
534 while (patptr < patend && *patptr != '\n')
538 patptr = group_end(patptr, patend, ')') + 1;
540 patptr = group_end(patptr, patend, ']') + 1;
542 Perl_croak(aTHX_ "No group ending character '%c' found in template",
544 NOT_REACHED; /* NOTREACHED */
548 /* Convert unsigned decimal number to binary.
549 * Expects a pointer to the first digit and address of length variable
550 * Advances char pointer to 1st non-digit char and returns number
553 S_get_num(pTHX_ const char *patptr, SSize_t *lenptr )
555 SSize_t len = *patptr++ - '0';
557 PERL_ARGS_ASSERT_GET_NUM;
559 while (isDIGIT(*patptr)) {
560 SSize_t nlen = (len * 10) + (*patptr++ - '0');
561 if (nlen < 0 || nlen/10 != len)
562 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
569 /* The marvellous template parsing routine: Using state stored in *symptr,
570 * locates next template code and count
573 S_next_symbol(pTHX_ tempsym_t* symptr )
575 const char* patptr = symptr->patptr;
576 const char* const patend = symptr->patend;
578 PERL_ARGS_ASSERT_NEXT_SYMBOL;
580 symptr->flags &= ~FLAG_SLASH;
582 while (patptr < patend) {
583 if (isSPACE(*patptr))
585 else if (*patptr == '#') {
587 while (patptr < patend && *patptr != '\n')
592 /* We should have found a template code */
593 I32 code = *patptr++ & 0xFF;
594 U32 inherited_modifiers = 0;
596 if (code == ','){ /* grandfather in commas but with a warning */
597 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
598 symptr->flags |= FLAG_COMMA;
599 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
600 "Invalid type ',' in %s", _action( symptr ) );
605 /* for '(', skip to ')' */
607 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
608 Perl_croak(aTHX_ "()-group starts with a count in %s",
610 symptr->grpbeg = patptr;
611 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
612 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
613 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
617 /* look for group modifiers to inherit */
618 if (TYPE_ENDIANNESS(symptr->flags)) {
619 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
620 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
623 /* look for modifiers */
624 while (patptr < patend) {
629 modifier = TYPE_IS_SHRIEKING;
630 allowed = "sSiIlLxXnNvV@.";
633 modifier = TYPE_IS_BIG_ENDIAN;
634 allowed = ENDIANNESS_ALLOWED_TYPES;
637 modifier = TYPE_IS_LITTLE_ENDIAN;
638 allowed = ENDIANNESS_ALLOWED_TYPES;
649 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
650 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
651 allowed, _action( symptr ) );
653 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
654 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
655 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
656 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
657 TYPE_ENDIANNESS_MASK)
658 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
659 *patptr, _action( symptr ) );
661 if ((code & modifier)) {
662 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
663 "Duplicate modifier '%c' after '%c' in %s",
664 *patptr, (int) TYPE_NO_MODIFIERS(code),
672 /* inherit modifiers */
673 code |= inherited_modifiers;
675 /* look for count and/or / */
676 if (patptr < patend) {
677 if (isDIGIT(*patptr)) {
678 patptr = get_num( patptr, &symptr->length );
679 symptr->howlen = e_number;
681 } else if (*patptr == '*') {
683 symptr->howlen = e_star;
685 } else if (*patptr == '[') {
686 const char* lenptr = ++patptr;
687 symptr->howlen = e_number;
688 patptr = group_end( patptr, patend, ']' ) + 1;
689 /* what kind of [] is it? */
690 if (isDIGIT(*lenptr)) {
691 lenptr = get_num( lenptr, &symptr->length );
693 Perl_croak(aTHX_ "Malformed integer in [] in %s",
696 tempsym_t savsym = *symptr;
697 symptr->patend = patptr-1;
698 symptr->patptr = lenptr;
699 savsym.length = measure_struct(symptr);
703 symptr->howlen = e_no_len;
708 while (patptr < patend) {
709 if (isSPACE(*patptr))
711 else if (*patptr == '#') {
713 while (patptr < patend && *patptr != '\n')
718 if (*patptr == '/') {
719 symptr->flags |= FLAG_SLASH;
721 if (patptr < patend &&
722 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
723 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
730 /* at end - no count, no / */
731 symptr->howlen = e_no_len;
736 symptr->patptr = patptr;
740 symptr->patptr = patptr;
745 There is no way to cleanly handle the case where we should process the
746 string per byte in its upgraded form while it's really in downgraded form
747 (e.g. estimates like strend-s as an upper bound for the number of
748 characters left wouldn't work). So if we foresee the need of this
749 (pattern starts with U or contains U0), we want to work on the encoded
750 version of the string. Users are advised to upgrade their pack string
751 themselves if they need to do a lot of unpacks like this on it
754 need_utf8(const char *pat, const char *patend)
758 PERL_ARGS_ASSERT_NEED_UTF8;
760 while (pat < patend) {
763 pat = (const char *) memchr(pat, '\n', patend-pat);
764 if (!pat) return FALSE;
765 } else if (pat[0] == 'U') {
766 if (first || pat[1] == '0') return TRUE;
767 } else first = FALSE;
774 first_symbol(const char *pat, const char *patend) {
775 PERL_ARGS_ASSERT_FIRST_SYMBOL;
777 while (pat < patend) {
778 if (pat[0] != '#') return pat[0];
780 pat = (const char *) memchr(pat, '\n', patend-pat);
789 =head1 Pack and Unpack
791 =for apidoc unpackstring
793 The engine implementing the C<unpack()> Perl function.
795 Using the template C<pat..patend>, this function unpacks the string
796 C<s..strend> into a number of mortal SVs, which it pushes onto the perl
797 argument (C<@_>) stack (so you will need to issue a C<PUTBACK> before and
798 C<SPAGAIN> after the call to this function). It returns the number of
801 The C<strend> and C<patend> pointers should point to the byte following the
802 last character of each string.
804 Although this function returns its values on the perl argument stack, it
805 doesn't take any parameters from that stack (and thus in particular
806 there's no need to do a C<PUSHMARK> before calling it, unlike L</call_pv> for
812 Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
816 PERL_ARGS_ASSERT_UNPACKSTRING;
818 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
819 else if (need_utf8(pat, patend)) {
820 /* We probably should try to avoid this in case a scalar context call
821 wouldn't get to the "U0" */
822 STRLEN len = strend - s;
823 s = (char *) bytes_to_utf8((U8 *) s, &len);
826 flags |= FLAG_DO_UTF8;
829 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
830 flags |= FLAG_PARSE_UTF8;
832 TEMPSYM_INIT(&sym, pat, patend, flags);
834 return unpack_rec(&sym, s, s, strend, NULL );
838 S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
842 const SSize_t start_sp_offset = SP - PL_stack_base;
844 SSize_t checksum = 0;
847 const SSize_t bits_in_uv = CHAR_BIT * sizeof(cuv);
849 bool explicit_length;
850 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
851 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
853 PERL_ARGS_ASSERT_UNPACK_REC;
855 symptr->strbeg = s - strbeg;
857 while (next_symbol(symptr)) {
860 I32 datumtype = symptr->code;
862 /* do first one only unless in list context
863 / is implemented by unpacking the count, then popping it from the
864 stack, so must check that we're not in the middle of a / */
866 && (SP - PL_stack_base == start_sp_offset + 1)
867 && (datumtype != '/') ) /* XXX can this be omitted */
870 switch (howlen = symptr->howlen) {
872 len = strend - strbeg; /* long enough */
875 /* e_no_len and e_number */
876 len = symptr->length;
880 explicit_length = TRUE;
882 beyond = s >= strend;
884 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
886 /* props nonzero means we can process this letter. */
887 const SSize_t size = props & PACK_SIZE_MASK;
888 const SSize_t howmany = (strend - s) / size;
892 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
893 if (len && unpack_only_one) len = 1;
899 needs_swap = NEEDS_SWAP(datumtype);
901 switch(TYPE_NO_ENDIANNESS(datumtype)) {
903 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
906 if (howlen == e_no_len)
907 len = 16; /* len is not specified */
915 tempsym_t savsym = *symptr;
916 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
917 symptr->flags |= group_modifiers;
918 symptr->patend = savsym.grpend;
919 symptr->previous = &savsym;
922 if (len && unpack_only_one) len = 1;
924 symptr->patptr = savsym.grpbeg;
925 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
926 else symptr->flags &= ~FLAG_PARSE_UTF8;
927 unpack_rec(symptr, s, strbeg, strend, &s);
928 if (s == strend && savsym.howlen == e_star)
929 break; /* No way to continue */
932 savsym.flags = symptr->flags & ~group_modifiers;
936 case '.' | TYPE_IS_SHRIEKING:
940 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
941 if (howlen == e_star) from = strbeg;
942 else if (len <= 0) from = s;
944 tempsym_t *group = symptr;
946 while (--len && group) group = group->previous;
947 from = group ? strbeg + group->strbeg : strbeg;
950 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
951 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
955 case '@' | TYPE_IS_SHRIEKING:
957 s = strbeg + symptr->strbeg;
958 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
962 Perl_croak(aTHX_ "'@' outside of string in unpack");
967 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
970 Perl_croak(aTHX_ "'@' outside of string in unpack");
974 case 'X' | TYPE_IS_SHRIEKING:
975 if (!len) /* Avoid division by 0 */
978 const char *hop, *last;
982 hop += UTF8SKIP(hop);
989 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
993 len = (s - strbeg) % len;
999 Perl_croak(aTHX_ "'X' outside of string in unpack");
1000 while (--s, UTF8_IS_CONTINUATION(*s)) {
1002 Perl_croak(aTHX_ "'X' outside of string in unpack");
1007 if (len > s - strbeg)
1008 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1012 case 'x' | TYPE_IS_SHRIEKING: {
1014 if (!len) /* Avoid division by 0 */
1016 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1017 else ai32 = (s - strbeg) % len;
1018 if (ai32 == 0) break;
1026 Perl_croak(aTHX_ "'x' outside of string in unpack");
1031 if (len > strend - s)
1032 Perl_croak(aTHX_ "'x' outside of string in unpack");
1037 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1043 /* Preliminary length estimate is assumed done in 'W' */
1044 if (len > strend - s) len = strend - s;
1050 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1051 if (hop >= strend) {
1053 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1058 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1060 } else if (len > strend - s)
1063 if (datumtype == 'Z') {
1064 /* 'Z' strips stuff after first null */
1065 const char *ptr, *end;
1067 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
1068 sv = newSVpvn(s, ptr-s);
1069 if (howlen == e_star) /* exact for 'Z*' */
1070 len = ptr-s + (ptr != strend ? 1 : 0);
1071 } else if (datumtype == 'A') {
1072 /* 'A' strips both nulls and spaces */
1074 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1075 for (ptr = s+len-1; ptr >= s; ptr--) {
1077 && !UTF8_IS_CONTINUATION(*ptr)
1078 && !isSPACE_utf8_safe(ptr, strend))
1083 if (ptr >= s) ptr += UTF8SKIP(ptr);
1086 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1088 for (ptr = s+len-1; ptr >= s; ptr--)
1089 if (*ptr != 0 && !isSPACE(*ptr)) break;
1092 sv = newSVpvn(s, ptr-s);
1093 } else sv = newSVpvn(s, len);
1097 /* Undo any upgrade done due to need_utf8() */
1098 if (!(symptr->flags & FLAG_WAS_UTF8))
1099 sv_utf8_downgrade(sv, 0);
1107 if (howlen == e_star || len > (strend - s) * 8)
1108 len = (strend - s) * 8;
1111 while (len >= 8 && s < strend) {
1112 cuv += PL_bitcount[utf8_to_byte(aTHX_ &s, strend, datumtype)];
1117 cuv += PL_bitcount[*(U8 *)s++];
1120 if (len && s < strend) {
1122 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1123 if (datumtype == 'b')
1125 if (bits & 1) cuv++;
1130 if (bits & 0x80) cuv++;
1137 sv = sv_2mortal(newSV(len ? len : 1));
1140 if (datumtype == 'b') {
1142 const SSize_t ai32 = len;
1143 for (len = 0; len < ai32; len++) {
1144 if (len & 7) bits >>= 1;
1146 if (s >= strend) break;
1147 bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
1148 } else bits = *(U8 *) s++;
1149 *str++ = bits & 1 ? '1' : '0';
1153 const SSize_t ai32 = len;
1154 for (len = 0; len < ai32; len++) {
1155 if (len & 7) bits <<= 1;
1157 if (s >= strend) break;
1158 bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
1159 } else bits = *(U8 *) s++;
1160 *str++ = bits & 0x80 ? '1' : '0';
1164 SvCUR_set(sv, str - SvPVX_const(sv));
1171 /* Preliminary length estimate, acceptable for utf8 too */
1172 if (howlen == e_star || len > (strend - s) * 2)
1173 len = (strend - s) * 2;
1175 sv = sv_2mortal(newSV(len ? len : 1));
1179 if (datumtype == 'h') {
1182 for (len = 0; len < ai32; len++) {
1183 if (len & 1) bits >>= 4;
1185 if (s >= strend) break;
1186 bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
1187 } else bits = * (U8 *) s++;
1189 *str++ = PL_hexdigit[bits & 15];
1193 const SSize_t ai32 = len;
1194 for (len = 0; len < ai32; len++) {
1195 if (len & 1) bits <<= 4;
1197 if (s >= strend) break;
1198 bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
1199 } else bits = *(U8 *) s++;
1201 *str++ = PL_hexdigit[(bits >> 4) & 15];
1206 SvCUR_set(sv, str - SvPVX_const(sv));
1213 if (explicit_length)
1214 /* Switch to "character" mode */
1215 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1220 while (len-- > 0 && s < strend) {
1225 aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1226 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1227 if (retlen == (STRLEN) -1)
1228 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1232 aint = *(U8 *)(s)++;
1233 if (aint >= 128 && datumtype != 'C') /* fake up signed chars */
1237 else if (checksum > bits_in_uv)
1238 cdouble += (NV)aint;
1246 while (len-- > 0 && s < strend) {
1248 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1249 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1250 if (retlen == (STRLEN) -1)
1251 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1255 else if (checksum > bits_in_uv)
1256 cdouble += (NV) val;
1260 } else if (!checksum)
1262 const U8 ch = *(U8 *) s++;
1265 else if (checksum > bits_in_uv)
1266 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1268 while (len-- > 0) cuv += *(U8 *) s++;
1272 if (explicit_length && howlen != e_star) {
1273 /* Switch to "bytes in UTF-8" mode */
1274 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
1276 /* Should be impossible due to the need_utf8() test */
1277 Perl_croak(aTHX_ "U0 mode on a byte string");
1281 if (len > strend - s) len = strend - s;
1283 if (len && unpack_only_one) len = 1;
1287 while (len-- > 0 && s < strend) {
1291 U8 result[UTF8_MAXLEN+1];
1292 const char *ptr = s;
1294 /* Bug: warns about bad utf8 even if we are short on bytes
1295 and will break out of the loop */
1296 if (!S_utf8_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1299 len = UTF8SKIP(result);
1300 if (!S_utf8_to_bytes(aTHX_ &ptr, strend,
1301 (char *) &result[1], len-1, 'U')) break;
1302 auv = NATIVE_TO_UNI(utf8n_to_uvchr(result,
1305 UTF8_ALLOW_DEFAULT));
1308 auv = NATIVE_TO_UNI(utf8n_to_uvchr((U8*)s,
1311 UTF8_ALLOW_DEFAULT));
1312 if (retlen == (STRLEN) -1)
1313 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1318 else if (checksum > bits_in_uv)
1319 cdouble += (NV) auv;
1324 case 's' | TYPE_IS_SHRIEKING:
1325 #if SHORTSIZE != SIZE16
1328 SHIFT_VAR(utf8, s, strend, ashort, datumtype, needs_swap);
1331 else if (checksum > bits_in_uv)
1332 cdouble += (NV)ashort;
1344 #if U16SIZE > SIZE16
1347 SHIFT16(utf8, s, strend, &ai16, datumtype, needs_swap);
1348 #if U16SIZE > SIZE16
1354 else if (checksum > bits_in_uv)
1355 cdouble += (NV)ai16;
1360 case 'S' | TYPE_IS_SHRIEKING:
1361 #if SHORTSIZE != SIZE16
1363 unsigned short aushort;
1364 SHIFT_VAR(utf8, s, strend, aushort, datumtype, needs_swap,
1368 else if (checksum > bits_in_uv)
1369 cdouble += (NV)aushort;
1382 #if U16SIZE > SIZE16
1385 SHIFT16(utf8, s, strend, &au16, datumtype, needs_swap);
1386 if (datumtype == 'n')
1387 au16 = PerlSock_ntohs(au16);
1388 if (datumtype == 'v')
1392 else if (checksum > bits_in_uv)
1393 cdouble += (NV) au16;
1398 case 'v' | TYPE_IS_SHRIEKING:
1399 case 'n' | TYPE_IS_SHRIEKING:
1402 # if U16SIZE > SIZE16
1405 SHIFT16(utf8, s, strend, &ai16, datumtype, needs_swap);
1406 /* There should never be any byte-swapping here. */
1407 assert(!TYPE_ENDIANNESS(datumtype));
1408 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
1409 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1410 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
1411 ai16 = (I16) vtohs((U16) ai16);
1414 else if (checksum > bits_in_uv)
1415 cdouble += (NV) ai16;
1421 case 'i' | TYPE_IS_SHRIEKING:
1424 SHIFT_VAR(utf8, s, strend, aint, datumtype, needs_swap);
1427 else if (checksum > bits_in_uv)
1428 cdouble += (NV)aint;
1434 case 'I' | TYPE_IS_SHRIEKING:
1437 SHIFT_VAR(utf8, s, strend, auint, datumtype, needs_swap);
1440 else if (checksum > bits_in_uv)
1441 cdouble += (NV)auint;
1449 SHIFT_VAR(utf8, s, strend, aiv, datumtype, needs_swap);
1452 else if (checksum > bits_in_uv)
1461 SHIFT_VAR(utf8, s, strend, auv, datumtype, needs_swap);
1464 else if (checksum > bits_in_uv)
1470 case 'l' | TYPE_IS_SHRIEKING:
1471 #if LONGSIZE != SIZE32
1474 SHIFT_VAR(utf8, s, strend, along, datumtype, needs_swap);
1477 else if (checksum > bits_in_uv)
1478 cdouble += (NV)along;
1489 #if U32SIZE > SIZE32
1492 SHIFT32(utf8, s, strend, &ai32, datumtype, needs_swap);
1493 #if U32SIZE > SIZE32
1494 if (ai32 > 2147483647) ai32 -= 4294967296;
1498 else if (checksum > bits_in_uv)
1499 cdouble += (NV)ai32;
1504 case 'L' | TYPE_IS_SHRIEKING:
1505 #if LONGSIZE != SIZE32
1507 unsigned long aulong;
1508 SHIFT_VAR(utf8, s, strend, aulong, datumtype, needs_swap);
1511 else if (checksum > bits_in_uv)
1512 cdouble += (NV)aulong;
1525 #if U32SIZE > SIZE32
1528 SHIFT32(utf8, s, strend, &au32, datumtype, needs_swap);
1529 if (datumtype == 'N')
1530 au32 = PerlSock_ntohl(au32);
1531 if (datumtype == 'V')
1535 else if (checksum > bits_in_uv)
1536 cdouble += (NV)au32;
1541 case 'V' | TYPE_IS_SHRIEKING:
1542 case 'N' | TYPE_IS_SHRIEKING:
1545 #if U32SIZE > SIZE32
1548 SHIFT32(utf8, s, strend, &ai32, datumtype, needs_swap);
1549 /* There should never be any byte swapping here. */
1550 assert(!TYPE_ENDIANNESS(datumtype));
1551 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1552 ai32 = (I32)PerlSock_ntohl((U32)ai32);
1553 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1554 ai32 = (I32)vtohl((U32)ai32);
1557 else if (checksum > bits_in_uv)
1558 cdouble += (NV)ai32;
1566 SHIFT_VAR(utf8, s, strend, aptr, datumtype, needs_swap);
1567 /* newSVpv generates undef if aptr is NULL */
1568 mPUSHs(newSVpv(aptr, 0));
1576 while (len > 0 && s < strend) {
1578 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1579 auv = (auv << 7) | (ch & 0x7f);
1580 /* UTF8_IS_XXXXX not right here because this is a BER, not
1581 * UTF-8 format - using constant 0x80 */
1589 if (++bytes >= sizeof(UV)) { /* promote to string */
1592 sv = Perl_newSVpvf(aTHX_ "%.*" UVuf,
1593 (int)TYPE_DIGITS(UV), auv);
1594 while (s < strend) {
1595 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1596 sv = mul128(sv, (U8)(ch & 0x7f));
1602 t = SvPV_nolen_const(sv);
1611 if ((s >= strend) && bytes)
1612 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
1616 if (symptr->howlen == e_star)
1617 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
1619 if (s + sizeof(char*) <= strend) {
1621 SHIFT_VAR(utf8, s, strend, aptr, datumtype, needs_swap);
1622 /* newSVpvn generates undef if aptr is NULL */
1623 PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP));
1626 #if defined(HAS_QUAD) && IVSIZE >= 8
1630 SHIFT_VAR(utf8, s, strend, aquad, datumtype, needs_swap);
1632 mPUSHs(newSViv((IV)aquad));
1633 else if (checksum > bits_in_uv)
1634 cdouble += (NV)aquad;
1642 SHIFT_VAR(utf8, s, strend, auquad, datumtype, needs_swap);
1644 mPUSHs(newSVuv((UV)auquad));
1645 else if (checksum > bits_in_uv)
1646 cdouble += (NV)auquad;
1652 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
1656 SHIFT_VAR(utf8, s, strend, afloat, datumtype, needs_swap);
1666 SHIFT_VAR(utf8, s, strend, adouble, datumtype, needs_swap);
1676 SHIFT_BYTES(utf8, s, strend, anv.bytes, sizeof(anv.bytes),
1677 datumtype, needs_swap);
1684 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
1688 SHIFT_BYTES(utf8, s, strend, aldouble.bytes,
1689 sizeof(aldouble.bytes), datumtype, needs_swap);
1690 /* The most common long double format, the x86 80-bit
1691 * extended precision, has either 2 or 6 unused bytes,
1692 * which may contain garbage, which may contain
1693 * unintentional data. While we do zero the bytes of
1694 * the long double data in pack(), here in unpack() we
1695 * don't, because it's really hard to envision that
1696 * reading the long double off aldouble would be
1697 * affected by the unused bytes.
1699 * Note that trying to unpack 'long doubles' of 'long
1700 * doubles' packed in another system is in the general
1701 * case doomed without having more detail. */
1703 mPUSHn(aldouble.ld);
1705 cdouble += aldouble.ld;
1711 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
1712 sv = sv_2mortal(newSV(l));
1713 if (l) SvPOK_on(sv);
1716 /* Note that all legal uuencoded strings are ASCII printables, so
1717 * have the same representation under UTF-8 vs not. This means we
1718 * can ignore UTF8ness on legal input. For illegal we stop at the
1719 * first failure, and don't report where/what that is, so again we
1720 * can ignore UTF8ness */
1722 while (s < strend && *s != ' ' && ISUUCHAR(*s)) {
1726 len = PL_uudmap[*(U8*)s++] & 077;
1728 if (s < strend && ISUUCHAR(*s))
1729 a = PL_uudmap[*(U8*)s++] & 077;
1732 if (s < strend && ISUUCHAR(*s))
1733 b = PL_uudmap[*(U8*)s++] & 077;
1736 if (s < strend && ISUUCHAR(*s))
1737 c = PL_uudmap[*(U8*)s++] & 077;
1740 if (s < strend && ISUUCHAR(*s))
1741 d = PL_uudmap[*(U8*)s++] & 077;
1744 hunk[0] = (char)((a << 2) | (b >> 4));
1745 hunk[1] = (char)((b << 4) | (c >> 2));
1746 hunk[2] = (char)((c << 6) | d);
1748 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
1753 else /* possible checksum byte */
1754 if (s + 1 < strend && s[1] == '\n')
1760 } /* End of switch */
1763 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
1764 (checksum > bits_in_uv &&
1765 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
1768 anv = (NV) (1 << (checksum & 15));
1769 while (checksum >= 16) {
1773 while (cdouble < 0.0)
1775 cdouble = Perl_modf(cdouble / anv, &trouble);
1776 #ifdef LONGDOUBLE_DOUBLEDOUBLE
1777 /* Workaround for powerpc doubledouble modfl bug:
1778 * close to 1.0L and -1.0L cdouble is 0, and trouble
1779 * is cdouble / anv. */
1780 if (trouble != Perl_ceil(trouble)) {
1782 if (cdouble > 1.0L) cdouble -= 1.0L;
1783 if (cdouble < -1.0L) cdouble += 1.0L;
1787 sv = newSVnv(cdouble);
1790 if (checksum < bits_in_uv) {
1791 UV mask = ((UV)1 << checksum) - 1;
1800 if (symptr->flags & FLAG_SLASH){
1801 if (SP - PL_stack_base - start_sp_offset <= 0)
1803 if( next_symbol(symptr) ){
1804 if( symptr->howlen == e_number )
1805 Perl_croak(aTHX_ "Count after length/code in unpack" );
1807 /* ...end of char buffer then no decent length available */
1808 Perl_croak(aTHX_ "length/code after end of string in unpack" );
1810 /* take top of stack (hope it's numeric) */
1813 Perl_croak(aTHX_ "Negative '/' count in unpack" );
1816 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
1818 datumtype = symptr->code;
1819 explicit_length = FALSE;
1827 return SP - PL_stack_base - start_sp_offset;
1837 const char *pat = SvPV_const(left, llen);
1838 const char *s = SvPV_const(right, rlen);
1839 const char *strend = s + rlen;
1840 const char *patend = pat + llen;
1844 cnt = unpackstring(pat, patend, s, strend,
1845 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
1846 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
1849 if ( !cnt && gimme == G_SCALAR )
1850 PUSHs(&PL_sv_undef);
1855 doencodes(U8 *h, const U8 *s, SSize_t len)
1857 *h++ = PL_uuemap[len];
1859 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1860 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
1861 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
1862 *h++ = PL_uuemap[(077 & (s[2] & 077))];
1867 const U8 r = (len > 1 ? s[1] : '\0');
1868 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1869 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
1870 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
1871 *h++ = PL_uuemap[0];
1878 S_is_an_int(pTHX_ const char *s, STRLEN l)
1880 SV *result = newSVpvn(s, l);
1881 char *const result_c = SvPV_nolen(result); /* convenience */
1882 char *out = result_c;
1886 PERL_ARGS_ASSERT_IS_AN_INT;
1894 SvREFCNT_dec(result);
1917 SvREFCNT_dec(result);
1923 SvCUR_set(result, out - result_c);
1927 /* pnum must be '\0' terminated */
1929 S_div128(pTHX_ SV *pnum, bool *done)
1932 char * const s = SvPV(pnum, len);
1936 PERL_ARGS_ASSERT_DIV128;
1940 const int i = m * 10 + (*t - '0');
1941 const int r = (i >> 7); /* r < 10 */
1949 SvCUR_set(pnum, (STRLEN) (t - s));
1954 =for apidoc packlist
1956 The engine implementing C<pack()> Perl function.
1962 Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, SV **beglist, SV **endlist )
1966 PERL_ARGS_ASSERT_PACKLIST;
1968 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
1970 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
1971 Also make sure any UTF8 flag is loaded */
1972 SvPV_force_nolen(cat);
1974 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
1976 (void)pack_rec( cat, &sym, beglist, endlist );
1979 /* like sv_utf8_upgrade, but also repoint the group start markers */
1981 marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
1984 const char *from_ptr, *from_start, *from_end, **marks, **m;
1985 char *to_start, *to_ptr;
1987 if (SvUTF8(sv)) return;
1989 from_start = SvPVX_const(sv);
1990 from_end = from_start + SvCUR(sv);
1991 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
1992 if (!NATIVE_BYTE_IS_INVARIANT(*from_ptr)) break;
1993 if (from_ptr == from_end) {
1994 /* Simple case: no character needs to be changed */
1999 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
2000 Newx(to_start, len, char);
2001 Copy(from_start, to_start, from_ptr-from_start, char);
2002 to_ptr = to_start + (from_ptr-from_start);
2004 Newx(marks, sym_ptr->level+2, const char *);
2005 for (group=sym_ptr; group; group = group->previous)
2006 marks[group->level] = from_start + group->strbeg;
2007 marks[sym_ptr->level+1] = from_end+1;
2008 for (m = marks; *m < from_ptr; m++)
2009 *m = to_start + (*m-from_start);
2011 for (;from_ptr < from_end; from_ptr++) {
2012 while (*m == from_ptr) *m++ = to_ptr;
2013 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
2017 while (*m == from_ptr) *m++ = to_ptr;
2018 if (m != marks + sym_ptr->level+1) {
2021 Perl_croak(aTHX_ "panic: marks beyond string end, m=%p, marks=%p, "
2022 "level=%d", m, marks, sym_ptr->level);
2024 for (group=sym_ptr; group; group = group->previous)
2025 group->strbeg = marks[group->level] - to_start;
2030 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
2031 from_start -= SvIVX(sv);
2034 SvFLAGS(sv) &= ~SVf_OOK;
2037 Safefree(from_start);
2038 SvPV_set(sv, to_start);
2039 SvCUR_set(sv, to_ptr - to_start);
2044 /* Exponential string grower. Makes string extension effectively O(n)
2045 needed says how many extra bytes we need (not counting the final '\0')
2046 Only grows the string if there is an actual lack of space
2049 S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
2050 const STRLEN cur = SvCUR(sv);
2051 const STRLEN len = SvLEN(sv);
2054 PERL_ARGS_ASSERT_SV_EXP_GROW;
2056 if (len - cur > needed) return SvPVX(sv);
2057 extend = needed > len ? needed : len;
2058 return SvGROW(sv, len+extend+1);
2062 S_sv_check_infnan(pTHX_ SV *sv, I32 datumtype)
2065 if (UNLIKELY(SvAMAGIC(sv)))
2067 if (UNLIKELY(isinfnansv(sv))) {
2068 const I32 c = TYPE_NO_MODIFIERS(datumtype);
2069 const NV nv = SvNV_nomg(sv);
2071 Perl_croak(aTHX_ "Cannot compress %" NVgf " in pack", nv);
2073 Perl_croak(aTHX_ "Cannot pack %" NVgf " with '%c'", nv, (int) c);
2078 #define SvIV_no_inf(sv,d) \
2079 ((sv) = S_sv_check_infnan(aTHX_ sv,d), SvIV_nomg(sv))
2080 #define SvUV_no_inf(sv,d) \
2081 ((sv) = S_sv_check_infnan(aTHX_ sv,d), SvUV_nomg(sv))
2085 S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
2087 tempsym_t lookahead;
2088 SSize_t items = endlist - beglist;
2089 bool found = next_symbol(symptr);
2090 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
2091 bool warn_utf8 = ckWARN(WARN_UTF8);
2094 PERL_ARGS_ASSERT_PACK_REC;
2096 if (symptr->level == 0 && found && symptr->code == 'U') {
2097 marked_upgrade(aTHX_ cat, symptr);
2098 symptr->flags |= FLAG_DO_UTF8;
2101 symptr->strbeg = SvCUR(cat);
2107 SV *lengthcode = NULL;
2108 I32 datumtype = symptr->code;
2109 howlen_t howlen = symptr->howlen;
2110 char *start = SvPVX(cat);
2111 char *cur = start + SvCUR(cat);
2114 #define NEXTFROM (lengthcode ? lengthcode : items > 0 ? (--items, *beglist++) : &PL_sv_no)
2115 #define PEEKFROM (lengthcode ? lengthcode : items > 0 ? *beglist : &PL_sv_no)
2119 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2123 /* e_no_len and e_number */
2124 len = symptr->length;
2129 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
2131 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2132 /* We can process this letter. */
2133 STRLEN size = props & PACK_SIZE_MASK;
2134 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2138 /* Look ahead for next symbol. Do we have code/code? */
2139 lookahead = *symptr;
2140 found = next_symbol(&lookahead);
2141 if (symptr->flags & FLAG_SLASH) {
2143 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
2144 if (strchr("aAZ", lookahead.code)) {
2145 if (lookahead.howlen == e_number) count = lookahead.length;
2148 count = sv_len_utf8(*beglist);
2151 if (lookahead.code == 'Z') count++;
2154 if (lookahead.howlen == e_number && lookahead.length < items)
2155 count = lookahead.length;
2158 lookahead.howlen = e_number;
2159 lookahead.length = count;
2160 lengthcode = sv_2mortal(newSViv(count));
2163 needs_swap = NEEDS_SWAP(datumtype);
2165 /* Code inside the switch must take care to properly update
2166 cat (CUR length and '\0' termination) if it updated *cur and
2167 doesn't simply leave using break */
2168 switch (TYPE_NO_ENDIANNESS(datumtype)) {
2170 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2171 (int) TYPE_NO_MODIFIERS(datumtype));
2173 Perl_croak(aTHX_ "'%%' may not be used in pack");
2175 case '.' | TYPE_IS_SHRIEKING:
2177 if (howlen == e_star) from = start;
2178 else if (len == 0) from = cur;
2180 tempsym_t *group = symptr;
2182 while (--len && group) group = group->previous;
2183 from = group ? start + group->strbeg : start;
2186 len = SvIV_no_inf(fromstr, datumtype);
2188 case '@' | TYPE_IS_SHRIEKING:
2190 from = start + symptr->strbeg;
2192 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
2194 while (len && from < cur) {
2195 from += UTF8SKIP(from);
2199 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2201 /* Here we know from == cur */
2203 GROWING(0, cat, start, cur, len);
2204 Zero(cur, len, char);
2206 } else if (from < cur) {
2209 } else goto no_change;
2217 if (len > 0) goto grow;
2218 if (len == 0) goto no_change;
2225 tempsym_t savsym = *symptr;
2226 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2227 symptr->flags |= group_modifiers;
2228 symptr->patend = savsym.grpend;
2230 symptr->previous = &lookahead;
2233 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2234 else symptr->flags &= ~FLAG_PARSE_UTF8;
2235 was_utf8 = SvUTF8(cat);
2236 symptr->patptr = savsym.grpbeg;
2237 beglist = pack_rec(cat, symptr, beglist, endlist);
2238 if (SvUTF8(cat) != was_utf8)
2239 /* This had better be an upgrade while in utf8==0 mode */
2242 if (savsym.howlen == e_star && beglist == endlist)
2243 break; /* No way to continue */
2245 items = endlist - beglist;
2246 lookahead.flags = symptr->flags & ~group_modifiers;
2249 case 'X' | TYPE_IS_SHRIEKING:
2250 if (!len) /* Avoid division by 0 */
2257 hop += UTF8SKIP(hop);
2264 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2268 len = (cur-start) % len;
2272 if (len < 1) goto no_change;
2276 Perl_croak(aTHX_ "'%c' outside of string in pack",
2277 (int) TYPE_NO_MODIFIERS(datumtype));
2278 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2280 Perl_croak(aTHX_ "'%c' outside of string in pack",
2281 (int) TYPE_NO_MODIFIERS(datumtype));
2287 if (cur - start < len)
2288 Perl_croak(aTHX_ "'%c' outside of string in pack",
2289 (int) TYPE_NO_MODIFIERS(datumtype));
2292 if (cur < start+symptr->strbeg) {
2293 /* Make sure group starts don't point into the void */
2295 const STRLEN length = cur-start;
2296 for (group = symptr;
2297 group && length < group->strbeg;
2298 group = group->previous) group->strbeg = length;
2299 lookahead.strbeg = length;
2302 case 'x' | TYPE_IS_SHRIEKING: {
2304 if (!len) /* Avoid division by 0 */
2306 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
2307 else ai32 = (cur - start) % len;
2308 if (ai32 == 0) goto no_change;
2320 aptr = SvPV_const(fromstr, fromlen);
2321 if (DO_UTF8(fromstr)) {
2322 const char *end, *s;
2324 if (!utf8 && !SvUTF8(cat)) {
2325 marked_upgrade(aTHX_ cat, symptr);
2326 lookahead.flags |= FLAG_DO_UTF8;
2327 lookahead.strbeg = symptr->strbeg;
2330 cur = start + SvCUR(cat);
2332 if (howlen == e_star) {
2333 if (utf8) goto string_copy;
2337 end = aptr + fromlen;
2338 fromlen = datumtype == 'Z' ? len-1 : len;
2339 while ((SSize_t) fromlen > 0 && s < end) {
2344 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2347 if (datumtype == 'Z') len++;
2353 fromlen = len - fromlen;
2354 if (datumtype == 'Z') fromlen--;
2355 if (howlen == e_star) {
2357 if (datumtype == 'Z') len++;
2359 GROWING(0, cat, start, cur, len);
2360 if (!S_utf8_to_bytes(aTHX_ &aptr, end, cur, fromlen,
2361 datumtype | TYPE_IS_PACK))
2362 Perl_croak(aTHX_ "panic: predicted utf8 length not available, "
2363 "for '%c', aptr=%p end=%p cur=%p, fromlen=%zu",
2364 (int)datumtype, aptr, end, cur, fromlen);
2368 if (howlen == e_star) {
2370 if (datumtype == 'Z') len++;
2372 if (len <= (SSize_t) fromlen) {
2374 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2376 /* assumes a byte expands to at most UTF8_EXPAND bytes on
2378 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2379 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
2381 while (fromlen > 0) {
2382 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
2388 if (howlen == e_star) {
2390 if (datumtype == 'Z') len++;
2392 if (len <= (SSize_t) fromlen) {
2394 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2396 GROWING(0, cat, start, cur, len);
2397 Copy(aptr, cur, fromlen, char);
2401 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2408 const char *str, *end;
2409 SSize_t l, field_len;
2415 str = SvPV_const(fromstr, fromlen);
2416 end = str + fromlen;
2417 if (DO_UTF8(fromstr)) {
2419 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2421 utf8_source = FALSE;
2422 utf8_flags = 0; /* Unused, but keep compilers happy */
2424 if (howlen == e_star) len = fromlen;
2425 field_len = (len+7)/8;
2426 GROWING(utf8, cat, start, cur, field_len);
2427 if (len > (SSize_t)fromlen) len = fromlen;
2430 if (datumtype == 'B')
2434 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2436 } else bits |= *str++ & 1;
2437 if (l & 7) bits <<= 1;
2439 PUSH_BYTE(utf8, cur, bits);
2444 /* datumtype == 'b' */
2448 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2449 if (val & 1) bits |= 0x80;
2450 } else if (*str++ & 1)
2452 if (l & 7) bits >>= 1;
2454 PUSH_BYTE(utf8, cur, bits);
2460 if (datumtype == 'B')
2461 bits <<= 7 - (l & 7);
2463 bits >>= 7 - (l & 7);
2464 PUSH_BYTE(utf8, cur, bits);
2467 /* Determine how many chars are left in the requested field */
2469 if (howlen == e_star) field_len = 0;
2470 else field_len -= l;
2471 Zero(cur, field_len, char);
2477 const char *str, *end;
2478 SSize_t l, field_len;
2484 str = SvPV_const(fromstr, fromlen);
2485 end = str + fromlen;
2486 if (DO_UTF8(fromstr)) {
2488 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2490 utf8_source = FALSE;
2491 utf8_flags = 0; /* Unused, but keep compilers happy */
2493 if (howlen == e_star) len = fromlen;
2494 field_len = (len+1)/2;
2495 GROWING(utf8, cat, start, cur, field_len);
2496 if (!utf8_source && len > (SSize_t)fromlen) len = fromlen;
2499 if (datumtype == 'H')
2503 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2504 if (val < 256 && isALPHA(val))
2505 bits |= (val + 9) & 0xf;
2508 } else if (isALPHA(*str))
2509 bits |= (*str++ + 9) & 0xf;
2511 bits |= *str++ & 0xf;
2512 if (l & 1) bits <<= 4;
2514 PUSH_BYTE(utf8, cur, bits);
2522 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2523 if (val < 256 && isALPHA(val))
2524 bits |= ((val + 9) & 0xf) << 4;
2526 bits |= (val & 0xf) << 4;
2527 } else if (isALPHA(*str))
2528 bits |= ((*str++ + 9) & 0xf) << 4;
2530 bits |= (*str++ & 0xf) << 4;
2531 if (l & 1) bits >>= 4;
2533 PUSH_BYTE(utf8, cur, bits);
2539 PUSH_BYTE(utf8, cur, bits);
2542 /* Determine how many chars are left in the requested field */
2544 if (howlen == e_star) field_len = 0;
2545 else field_len -= l;
2546 Zero(cur, field_len, char);
2554 aiv = SvIV_no_inf(fromstr, datumtype);
2555 if ((-128 > aiv || aiv > 127))
2556 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2557 "Character in 'c' format wrapped in pack");
2558 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
2563 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2569 aiv = SvIV_no_inf(fromstr, datumtype);
2570 if ((0 > aiv || aiv > 0xff))
2571 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2572 "Character in 'C' format wrapped in pack");
2573 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
2578 U8 in_bytes = (U8)IN_BYTES;
2580 end = start+SvLEN(cat)-1;
2581 if (utf8) end -= UTF8_MAXLEN-1;
2585 auv = SvUV_no_inf(fromstr, datumtype);
2586 if (in_bytes) auv = auv % 0x100;
2591 SvCUR_set(cat, cur - start);
2593 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2594 end = start+SvLEN(cat)-UTF8_MAXLEN;
2596 cur = (char *) uvchr_to_utf8_flags((U8 *) cur, auv, 0);
2601 SvCUR_set(cat, cur - start);
2602 marked_upgrade(aTHX_ cat, symptr);
2603 lookahead.flags |= FLAG_DO_UTF8;
2604 lookahead.strbeg = symptr->strbeg;
2607 cur = start + SvCUR(cat);
2608 end = start+SvLEN(cat)-UTF8_MAXLEN;
2611 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2612 "Character in 'W' format wrapped in pack");
2617 SvCUR_set(cat, cur - start);
2618 GROWING(0, cat, start, cur, len+1);
2619 end = start+SvLEN(cat)-1;
2621 *(U8 *) cur++ = (U8)auv;
2630 if (!(symptr->flags & FLAG_DO_UTF8)) {
2631 marked_upgrade(aTHX_ cat, symptr);
2632 lookahead.flags |= FLAG_DO_UTF8;
2633 lookahead.strbeg = symptr->strbeg;
2639 end = start+SvLEN(cat);
2640 if (!utf8) end -= UTF8_MAXLEN;
2644 auv = SvUV_no_inf(fromstr, datumtype);
2646 U8 buffer[UTF8_MAXLEN+1], *endb;
2647 endb = uvchr_to_utf8_flags(buffer, UNI_TO_NATIVE(auv), 0);
2648 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
2650 SvCUR_set(cat, cur - start);
2651 GROWING(0, cat, start, cur,
2652 len+(endb-buffer)*UTF8_EXPAND);
2653 end = start+SvLEN(cat);
2655 cur = my_bytes_to_utf8(buffer, endb-buffer, cur, 0);
2659 SvCUR_set(cat, cur - start);
2660 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2661 end = start+SvLEN(cat)-UTF8_MAXLEN;
2663 cur = (char *) uvchr_to_utf8_flags((U8 *) cur,
2670 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
2676 anv = SvNV(fromstr);
2677 # if (defined(VMS) && !defined(_IEEE_FP)) || defined(DOUBLE_IS_VAX_FLOAT)
2678 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
2679 * on Alpha; fake it if we don't have them.
2683 else if (anv < -FLT_MAX)
2685 else afloat = (float)anv;
2687 # if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
2689 afloat = (float)NV_NAN;
2693 /* a simple cast to float is undefined if outside
2694 * the range of values that can be represented */
2695 afloat = (float)(anv > FLT_MAX ? NV_INF :
2696 anv < -FLT_MAX ? -NV_INF : anv);
2699 PUSH_VAR(utf8, cur, afloat, needs_swap);
2707 anv = SvNV(fromstr);
2708 # if (defined(VMS) && !defined(_IEEE_FP)) || defined(DOUBLE_IS_VAX_FLOAT)
2709 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
2710 * on Alpha; fake it if we don't have them.
2714 else if (anv < -DBL_MAX)
2716 else adouble = (double)anv;
2718 adouble = (double)anv;
2720 PUSH_VAR(utf8, cur, adouble, needs_swap);
2725 Zero(&anv, 1, NV); /* can be long double with unused bits */
2729 /* to work round a gcc/x86 bug; don't use SvNV */
2730 anv.nv = sv_2nv(fromstr);
2731 # if defined(LONGDOUBLE_X86_80_BIT) && defined(USE_LONG_DOUBLE) \
2732 && LONG_DOUBLESIZE > 10
2733 /* GCC sometimes overwrites the padding in the
2735 Zero(anv.bytes+10, sizeof(anv.bytes) - 10, U8);
2738 anv.nv = SvNV(fromstr);
2740 PUSH_BYTES(utf8, cur, anv.bytes, sizeof(anv.bytes), needs_swap);
2744 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2747 /* long doubles can have unused bits, which may be nonzero */
2748 Zero(&aldouble, 1, long double);
2752 /* to work round a gcc/x86 bug; don't use SvNV */
2753 aldouble.ld = (long double)sv_2nv(fromstr);
2754 # if defined(LONGDOUBLE_X86_80_BIT) && LONG_DOUBLESIZE > 10
2755 /* GCC sometimes overwrites the padding in the
2757 Zero(aldouble.bytes+10, sizeof(aldouble.bytes) - 10, U8);
2760 aldouble.ld = (long double)SvNV(fromstr);
2762 PUSH_BYTES(utf8, cur, aldouble.bytes, sizeof(aldouble.bytes),
2768 case 'n' | TYPE_IS_SHRIEKING:
2773 ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
2774 ai16 = PerlSock_htons(ai16);
2775 PUSH16(utf8, cur, &ai16, FALSE);
2778 case 'v' | TYPE_IS_SHRIEKING:
2783 ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
2785 PUSH16(utf8, cur, &ai16, FALSE);
2788 case 'S' | TYPE_IS_SHRIEKING:
2789 #if SHORTSIZE != SIZE16
2791 unsigned short aushort;
2793 aushort = SvUV_no_inf(fromstr, datumtype);
2794 PUSH_VAR(utf8, cur, aushort, needs_swap);
2804 au16 = (U16)SvUV_no_inf(fromstr, datumtype);
2805 PUSH16(utf8, cur, &au16, needs_swap);
2808 case 's' | TYPE_IS_SHRIEKING:
2809 #if SHORTSIZE != SIZE16
2813 ashort = SvIV_no_inf(fromstr, datumtype);
2814 PUSH_VAR(utf8, cur, ashort, needs_swap);
2824 ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
2825 PUSH16(utf8, cur, &ai16, needs_swap);
2829 case 'I' | TYPE_IS_SHRIEKING:
2833 auint = SvUV_no_inf(fromstr, datumtype);
2834 PUSH_VAR(utf8, cur, auint, needs_swap);
2841 aiv = SvIV_no_inf(fromstr, datumtype);
2842 PUSH_VAR(utf8, cur, aiv, needs_swap);
2849 auv = SvUV_no_inf(fromstr, datumtype);
2850 PUSH_VAR(utf8, cur, auv, needs_swap);
2857 S_sv_check_infnan(aTHX_ fromstr, datumtype);
2858 anv = SvNV_nomg(fromstr);
2862 SvCUR_set(cat, cur - start);
2863 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
2866 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
2867 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
2868 any negative IVs will have already been got by the croak()
2869 above. IOK is untrue for fractions, so we test them
2870 against UV_MAX_P1. */
2871 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
2872 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
2873 char *in = buf + sizeof(buf);
2874 UV auv = SvUV_nomg(fromstr);
2877 *--in = (char)((auv & 0x7f) | 0x80);
2880 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
2881 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2882 in, (buf + sizeof(buf)) - in);
2883 } else if (SvPOKp(fromstr))
2885 else if (SvNOKp(fromstr)) {
2886 /* 10**NV_MAX_10_EXP is the largest power of 10
2887 so 10**(NV_MAX_10_EXP+1) is definitely unrepresentable
2888 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
2889 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
2890 And with that many bytes only Inf can overflow.
2891 Some C compilers are strict about integral constant
2892 expressions so we conservatively divide by a slightly
2893 smaller integer instead of multiplying by the exact
2894 floating-point value.
2896 #ifdef NV_MAX_10_EXP
2897 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
2898 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
2900 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
2901 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
2903 char *in = buf + sizeof(buf);
2905 anv = Perl_floor(anv);
2907 const NV next = Perl_floor(anv / 128);
2908 if (in <= buf) /* this cannot happen ;-) */
2909 Perl_croak(aTHX_ "Cannot compress integer in pack");
2910 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
2913 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
2914 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2915 in, (buf + sizeof(buf)) - in);
2924 /* Copy string and check for compliance */
2925 from = SvPV_nomg_const(fromstr, len);
2926 if ((norm = is_an_int(from, len)) == NULL)
2927 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
2929 Newx(result, len, char);
2932 while (!done) *--in = div128(norm, &done) | 0x80;
2933 result[len - 1] &= 0x7F; /* clear continue bit */
2934 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2935 in, (result + len) - in);
2937 SvREFCNT_dec(norm); /* free norm */
2942 case 'i' | TYPE_IS_SHRIEKING:
2946 aint = SvIV_no_inf(fromstr, datumtype);
2947 PUSH_VAR(utf8, cur, aint, needs_swap);
2950 case 'N' | TYPE_IS_SHRIEKING:
2955 au32 = SvUV_no_inf(fromstr, datumtype);
2956 au32 = PerlSock_htonl(au32);
2957 PUSH32(utf8, cur, &au32, FALSE);
2960 case 'V' | TYPE_IS_SHRIEKING:
2965 au32 = SvUV_no_inf(fromstr, datumtype);
2967 PUSH32(utf8, cur, &au32, FALSE);
2970 case 'L' | TYPE_IS_SHRIEKING:
2971 #if LONGSIZE != SIZE32
2973 unsigned long aulong;
2975 aulong = SvUV_no_inf(fromstr, datumtype);
2976 PUSH_VAR(utf8, cur, aulong, needs_swap);
2986 au32 = SvUV_no_inf(fromstr, datumtype);
2987 PUSH32(utf8, cur, &au32, needs_swap);
2990 case 'l' | TYPE_IS_SHRIEKING:
2991 #if LONGSIZE != SIZE32
2995 along = SvIV_no_inf(fromstr, datumtype);
2996 PUSH_VAR(utf8, cur, along, needs_swap);
3006 ai32 = SvIV_no_inf(fromstr, datumtype);
3007 PUSH32(utf8, cur, &ai32, needs_swap);
3010 #if defined(HAS_QUAD) && IVSIZE >= 8
3015 auquad = (Uquad_t) SvUV_no_inf(fromstr, datumtype);
3016 PUSH_VAR(utf8, cur, auquad, needs_swap);
3023 aquad = (Quad_t)SvIV_no_inf(fromstr, datumtype);
3024 PUSH_VAR(utf8, cur, aquad, needs_swap);
3029 len = 1; /* assume SV is correct length */
3030 GROWING(utf8, cat, start, cur, sizeof(char *));
3037 SvGETMAGIC(fromstr);
3038 if (!SvOK(fromstr)) aptr = NULL;
3040 /* XXX better yet, could spirit away the string to
3041 * a safe spot and hang on to it until the result
3042 * of pack() (and all copies of the result) are
3045 if (((SvTEMP(fromstr) && SvREFCNT(fromstr) == 1)
3046 || (SvPADTMP(fromstr) &&
3047 !SvREADONLY(fromstr)))) {
3048 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3049 "Attempt to pack pointer to temporary value");
3051 if (SvPOK(fromstr) || SvNIOK(fromstr))
3052 aptr = SvPV_nomg_const_nolen(fromstr);
3054 aptr = SvPV_force_flags_nolen(fromstr, 0);
3056 PUSH_VAR(utf8, cur, aptr, needs_swap);
3060 const char *aptr, *aend;
3064 if (len <= 2) len = 45;
3065 else len = len / 3 * 3;
3067 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3068 "Field too wide in 'u' format in pack");
3071 aptr = SvPV_const(fromstr, fromlen);
3072 from_utf8 = DO_UTF8(fromstr);
3074 aend = aptr + fromlen;
3075 fromlen = sv_len_utf8_nomg(fromstr);
3076 } else aend = NULL; /* Unused, but keep compilers happy */
3077 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
3078 while (fromlen > 0) {
3081 U8 hunk[1+63/3*4+1];
3083 if ((SSize_t)fromlen > len)
3089 if (!S_utf8_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3090 'u' | TYPE_IS_PACK)) {
3092 SvCUR_set(cat, cur - start);
3093 Perl_croak(aTHX_ "panic: string is shorter than advertised, "
3094 "aptr=%p, aend=%p, buffer=%p, todo=%zd",
3095 aptr, aend, buffer, todo);
3097 end = doencodes(hunk, (const U8 *)buffer, todo);
3099 end = doencodes(hunk, (const U8 *)aptr, todo);
3102 PUSH_BYTES(utf8, cur, hunk, end-hunk, 0);
3109 SvCUR_set(cat, cur - start);
3111 *symptr = lookahead;
3120 dSP; dMARK; dORIGMARK; dTARGET;
3123 SV *pat_sv = *++MARK;
3124 const char *pat = SvPV_const(pat_sv, fromlen);
3125 const char *patend = pat + fromlen;
3131 packlist(cat, pat, patend, MARK, SP + 1);
3140 * ex: set ts=8 sts=4 sw=4 et: