3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
18 * [p.653 of _The Lord of the Rings_, IV/iv: "Of Herbs and Stewed Rabbit"]
21 /* This file contains pp ("push/pop") functions that
22 * execute the opcodes that make up a perl program. A typical pp function
23 * expects to find its arguments on the stack, and usually pushes its
24 * results onto the stack, hence the 'pp' terminology. Each OP structure
25 * contains a pointer to the relevant pp_foo() function.
27 * This particular file just contains pp_pack() and pp_unpack(). See the
28 * other pp*.c files for the rest of the pp_ functions.
32 #define PERL_IN_PP_PACK_C
35 /* Types used by pack/unpack */
37 e_no_len, /* no length */
38 e_number, /* number, [] */
42 typedef struct tempsym {
43 const char* patptr; /* current template char */
44 const char* patend; /* one after last char */
45 const char* grpbeg; /* 1st char of ()-group */
46 const char* grpend; /* end of ()-group */
47 I32 code; /* template code (!<>) */
48 I32 length; /* length/repeat count */
49 howlen_t howlen; /* how length is given */
50 int level; /* () nesting level */
51 U32 flags; /* /=4, comma=2, pack=1 */
52 /* and group modifiers */
53 STRLEN strbeg; /* offset of group start */
54 struct tempsym *previous; /* previous group */
57 #define TEMPSYM_INIT(symptr, p, e, f) \
59 (symptr)->patptr = (p); \
60 (symptr)->patend = (e); \
61 (symptr)->grpbeg = NULL; \
62 (symptr)->grpend = NULL; \
63 (symptr)->grpend = NULL; \
65 (symptr)->length = 0; \
66 (symptr)->howlen = e_no_len; \
67 (symptr)->level = 0; \
68 (symptr)->flags = (f); \
69 (symptr)->strbeg = 0; \
70 (symptr)->previous = NULL; \
78 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
81 U8 bytes[sizeof(long double)];
88 /* Maximum number of bytes to which a byte can grow due to upgrade */
92 * Offset for integer pack/unpack.
94 * On architectures where I16 and I32 aren't really 16 and 32 bits,
95 * which for now are all Crays, pack and unpack have to play games.
99 * These values are required for portability of pack() output.
100 * If they're not right on your machine, then pack() and unpack()
101 * wouldn't work right anyway; you'll need to apply the Cray hack.
102 * (I'd like to check them with #if, but you can't use sizeof() in
103 * the preprocessor.) --???
106 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
107 defines are now in config.h. --Andy Dougherty April 1998
112 /* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
115 #if U16SIZE > SIZE16 || U32SIZE > SIZE32
116 # if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
117 # define OFF16(p) ((char*)(p))
118 # define OFF32(p) ((char*)(p))
120 # if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
121 # define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
122 # define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
124 ++++ bad cray byte order
128 # define OFF16(p) ((char *) (p))
129 # define OFF32(p) ((char *) (p))
132 #define PUSH16(utf8, cur, p, needs_swap) \
133 PUSH_BYTES(utf8, cur, OFF16(p), SIZE16, needs_swap)
134 #define PUSH32(utf8, cur, p, needs_swap) \
135 PUSH_BYTES(utf8, cur, OFF32(p), SIZE32, needs_swap)
137 #if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
138 # define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_LITTLE_ENDIAN)
139 #elif BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
140 # define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_BIG_ENDIAN)
142 # error "Unsupported byteorder"
143 /* Need to add code here to re-instate mixed endian support.
144 NEEDS_SWAP would need to hold a flag indicating which action to
145 take, and S_reverse_copy and the code in S_utf8_to_bytes would need
146 logic adding to deal with any mixed-endian transformations needed.
150 /* Only to be used inside a loop (see the break) */
151 #define SHIFT_BYTES(utf8, s, strend, buf, len, datumtype, needs_swap) \
153 if (UNLIKELY(utf8)) { \
154 if (!S_utf8_to_bytes(aTHX_ &s, strend, \
155 (char *) (buf), len, datumtype)) break; \
157 if (UNLIKELY(needs_swap)) \
158 S_reverse_copy(s, (char *) (buf), len); \
160 Copy(s, (char *) (buf), len, char); \
165 #define SHIFT16(utf8, s, strend, p, datumtype, needs_swap) \
166 SHIFT_BYTES(utf8, s, strend, OFF16(p), SIZE16, datumtype, needs_swap)
168 #define SHIFT32(utf8, s, strend, p, datumtype, needs_swap) \
169 SHIFT_BYTES(utf8, s, strend, OFF32(p), SIZE32, datumtype, needs_swap)
171 #define SHIFT_VAR(utf8, s, strend, var, datumtype, needs_swap) \
172 SHIFT_BYTES(utf8, s, strend, &(var), sizeof(var), datumtype, needs_swap)
174 #define PUSH_VAR(utf8, aptr, var, needs_swap) \
175 PUSH_BYTES(utf8, aptr, &(var), sizeof(var), needs_swap)
177 /* Avoid stack overflow due to pathological templates. 100 should be plenty. */
178 #define MAX_SUB_TEMPLATE_LEVEL 100
180 /* flags (note that type modifiers can also be used as flags!) */
181 #define FLAG_WAS_UTF8 0x40
182 #define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
183 #define FLAG_UNPACK_ONLY_ONE 0x10
184 #define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
185 #define FLAG_SLASH 0x04
186 #define FLAG_COMMA 0x02
187 #define FLAG_PACK 0x01
190 S_mul128(pTHX_ SV *sv, U8 m)
193 char *s = SvPV(sv, len);
196 PERL_ARGS_ASSERT_MUL128;
198 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
199 SV * const tmpNew = newSVpvs("0000000000");
201 sv_catsv(tmpNew, sv);
202 SvREFCNT_dec(sv); /* free old sv */
207 while (!*t) /* trailing '\0'? */
210 const U32 i = ((*t - '0') << 7) + m;
211 *(t--) = '0' + (char)(i % 10);
217 /* Explosives and implosives. */
219 #define ISUUCHAR(ch) (NATIVE_TO_LATIN1(ch) >= NATIVE_TO_LATIN1(' ') \
220 && NATIVE_TO_LATIN1(ch) < NATIVE_TO_LATIN1('a'))
223 #define TYPE_IS_SHRIEKING 0x100
224 #define TYPE_IS_BIG_ENDIAN 0x200
225 #define TYPE_IS_LITTLE_ENDIAN 0x400
226 #define TYPE_IS_PACK 0x800
227 #define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
228 #define TYPE_MODIFIERS(t) ((t) & ~0xFF)
229 #define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
231 # define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
232 # define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
234 # define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
236 #define PACK_SIZE_CANNOT_CSUM 0x80
237 #define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
238 #define PACK_SIZE_MASK 0x3F
240 #include "packsizetables.inc"
243 S_reverse_copy(const char *src, char *dest, STRLEN len)
251 utf8_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
254 UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
255 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
256 /* We try to process malformed UTF-8 as much as possible (preferably with
257 warnings), but these two mean we make no progress in the string and
258 might enter an infinite loop */
259 if (retlen == (STRLEN) -1 || retlen == 0)
260 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
261 (int) TYPE_NO_MODIFIERS(datumtype));
263 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
264 "Character in '%c' format wrapped in unpack",
265 (int) TYPE_NO_MODIFIERS(datumtype));
272 #define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
273 utf8_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
277 S_utf8_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
281 const char *from = *s;
283 const U32 flags = ckWARN(WARN_UTF8) ?
284 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
285 const bool needs_swap = NEEDS_SWAP(datumtype);
287 if (UNLIKELY(needs_swap))
290 for (;buf_len > 0; buf_len--) {
291 if (from >= end) return FALSE;
292 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
293 if (retlen == (STRLEN) -1 || retlen == 0) {
294 from += UTF8SKIP(from);
296 } else from += retlen;
301 if (UNLIKELY(needs_swap))
302 *(U8 *)--buf = (U8)val;
304 *(U8 *)buf++ = (U8)val;
306 /* We have enough characters for the buffer. Did we have problems ? */
309 /* Rewalk the string fragment while warning */
311 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
312 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
313 if (ptr >= end) break;
314 utf8n_to_uvchr((U8 *) ptr, end-ptr, &retlen, flags);
316 if (from > end) from = end;
319 Perl_ck_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
320 WARN_PACK : WARN_UNPACK),
321 "Character(s) in '%c' format wrapped in %s",
322 (int) TYPE_NO_MODIFIERS(datumtype),
323 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
330 S_my_bytes_to_utf8(const U8 *start, STRLEN len, char *dest, const bool needs_swap) {
331 PERL_ARGS_ASSERT_MY_BYTES_TO_UTF8;
333 if (UNLIKELY(needs_swap)) {
334 const U8 *p = start + len;
335 while (p-- > start) {
336 append_utf8_from_native_byte(*p, (U8 **) & dest);
339 const U8 * const end = start + len;
340 while (start < end) {
341 append_utf8_from_native_byte(*start, (U8 **) & dest);
348 #define PUSH_BYTES(utf8, cur, buf, len, needs_swap) \
350 if (UNLIKELY(utf8)) \
351 (cur) = my_bytes_to_utf8((U8 *) buf, len, (cur), needs_swap); \
353 if (UNLIKELY(needs_swap)) \
354 S_reverse_copy((char *)(buf), cur, len); \
356 Copy(buf, cur, len, char); \
361 #define GROWING(utf8, cat, start, cur, in_len) \
363 STRLEN glen = (in_len); \
364 if (utf8) glen *= UTF8_EXPAND; \
365 if ((cur) + glen >= (start) + SvLEN(cat)) { \
366 (start) = sv_exp_grow(cat, glen); \
367 (cur) = (start) + SvCUR(cat); \
371 #define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
373 const STRLEN glen = (in_len); \
375 if (utf8) gl *= UTF8_EXPAND; \
376 if ((cur) + gl >= (start) + SvLEN(cat)) { \
378 SvCUR_set((cat), (cur) - (start)); \
379 (start) = sv_exp_grow(cat, gl); \
380 (cur) = (start) + SvCUR(cat); \
382 PUSH_BYTES(utf8, cur, buf, glen, 0); \
385 #define PUSH_BYTE(utf8, s, byte) \
388 const U8 au8 = (byte); \
389 (s) = my_bytes_to_utf8(&au8, 1, (s), 0);\
390 } else *(U8 *)(s)++ = (byte); \
393 /* Only to be used inside a loop (see the break) */
394 #define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
397 if (str >= end) break; \
398 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
399 if (retlen == (STRLEN) -1 || retlen == 0) { \
401 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
406 static const char *_action( const tempsym_t* symptr )
408 return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
411 /* Returns the sizeof() struct described by pat */
413 S_measure_struct(pTHX_ tempsym_t* symptr)
417 PERL_ARGS_ASSERT_MEASURE_STRUCT;
419 while (next_symbol(symptr)) {
423 switch (symptr->howlen) {
425 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
429 /* e_no_len and e_number */
430 len = symptr->length;
434 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
437 /* endianness doesn't influence the size of a type */
438 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
440 Perl_croak(aTHX_ "Invalid type '%c' in %s",
441 (int)TYPE_NO_MODIFIERS(symptr->code),
443 case '.' | TYPE_IS_SHRIEKING:
444 case '@' | TYPE_IS_SHRIEKING:
448 case 'U': /* XXXX Is it correct? */
451 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
452 (int) TYPE_NO_MODIFIERS(symptr->code),
459 tempsym_t savsym = *symptr;
460 symptr->patptr = savsym.grpbeg;
461 symptr->patend = savsym.grpend;
462 /* XXXX Theoretically, we need to measure many times at
463 different positions, since the subexpression may contain
464 alignment commands, but be not of aligned length.
465 Need to detect this and croak(). */
466 size = measure_struct(symptr);
470 case 'X' | TYPE_IS_SHRIEKING:
471 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
473 if (!len) /* Avoid division by 0 */
475 len = total % len; /* Assumed: the start is aligned. */
480 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
482 case 'x' | TYPE_IS_SHRIEKING:
483 if (!len) /* Avoid division by 0 */
485 star = total % len; /* Assumed: the start is aligned. */
486 if (star) /* Other portable ways? */
510 size = sizeof(char*);
520 /* locate matching closing parenthesis or bracket
521 * returns char pointer to char after match, or NULL
524 S_group_end(pTHX_ const char *patptr, const char *patend, char ender)
526 PERL_ARGS_ASSERT_GROUP_END;
528 while (patptr < patend) {
529 const char c = *patptr++;
536 while (patptr < patend && *patptr != '\n')
540 patptr = group_end(patptr, patend, ')') + 1;
542 patptr = group_end(patptr, patend, ']') + 1;
544 Perl_croak(aTHX_ "No group ending character '%c' found in template",
546 NOT_REACHED; /* NOTREACHED */
550 /* Convert unsigned decimal number to binary.
551 * Expects a pointer to the first digit and address of length variable
552 * Advances char pointer to 1st non-digit char and returns number
555 S_get_num(pTHX_ const char *patptr, I32 *lenptr )
557 I32 len = *patptr++ - '0';
559 PERL_ARGS_ASSERT_GET_NUM;
561 while (isDIGIT(*patptr)) {
562 if (len >= 0x7FFFFFFF/10)
563 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
564 len = (len * 10) + (*patptr++ - '0');
570 /* The marvellous template parsing routine: Using state stored in *symptr,
571 * locates next template code and count
574 S_next_symbol(pTHX_ tempsym_t* symptr )
576 const char* patptr = symptr->patptr;
577 const char* const patend = symptr->patend;
579 PERL_ARGS_ASSERT_NEXT_SYMBOL;
581 symptr->flags &= ~FLAG_SLASH;
583 while (patptr < patend) {
584 if (isSPACE(*patptr))
586 else if (*patptr == '#') {
588 while (patptr < patend && *patptr != '\n')
593 /* We should have found a template code */
594 I32 code = *patptr++ & 0xFF;
595 U32 inherited_modifiers = 0;
597 if (code == ','){ /* grandfather in commas but with a warning */
598 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
599 symptr->flags |= FLAG_COMMA;
600 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
601 "Invalid type ',' in %s", _action( symptr ) );
606 /* for '(', skip to ')' */
608 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
609 Perl_croak(aTHX_ "()-group starts with a count in %s",
611 symptr->grpbeg = patptr;
612 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
613 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
614 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
618 /* look for group modifiers to inherit */
619 if (TYPE_ENDIANNESS(symptr->flags)) {
620 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
621 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
624 /* look for modifiers */
625 while (patptr < patend) {
630 modifier = TYPE_IS_SHRIEKING;
631 allowed = "sSiIlLxXnNvV@.";
634 modifier = TYPE_IS_BIG_ENDIAN;
635 allowed = ENDIANNESS_ALLOWED_TYPES;
638 modifier = TYPE_IS_LITTLE_ENDIAN;
639 allowed = ENDIANNESS_ALLOWED_TYPES;
650 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
651 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
652 allowed, _action( symptr ) );
654 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
655 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
656 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
657 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
658 TYPE_ENDIANNESS_MASK)
659 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
660 *patptr, _action( symptr ) );
662 if ((code & modifier)) {
663 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
664 "Duplicate modifier '%c' after '%c' in %s",
665 *patptr, (int) TYPE_NO_MODIFIERS(code),
673 /* inherit modifiers */
674 code |= inherited_modifiers;
676 /* look for count and/or / */
677 if (patptr < patend) {
678 if (isDIGIT(*patptr)) {
679 patptr = get_num( patptr, &symptr->length );
680 symptr->howlen = e_number;
682 } else if (*patptr == '*') {
684 symptr->howlen = e_star;
686 } else if (*patptr == '[') {
687 const char* lenptr = ++patptr;
688 symptr->howlen = e_number;
689 patptr = group_end( patptr, patend, ']' ) + 1;
690 /* what kind of [] is it? */
691 if (isDIGIT(*lenptr)) {
692 lenptr = get_num( lenptr, &symptr->length );
694 Perl_croak(aTHX_ "Malformed integer in [] in %s",
697 tempsym_t savsym = *symptr;
698 symptr->patend = patptr-1;
699 symptr->patptr = lenptr;
700 savsym.length = measure_struct(symptr);
704 symptr->howlen = e_no_len;
709 while (patptr < patend) {
710 if (isSPACE(*patptr))
712 else if (*patptr == '#') {
714 while (patptr < patend && *patptr != '\n')
719 if (*patptr == '/') {
720 symptr->flags |= FLAG_SLASH;
722 if (patptr < patend &&
723 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
724 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
731 /* at end - no count, no / */
732 symptr->howlen = e_no_len;
737 symptr->patptr = patptr;
741 symptr->patptr = patptr;
746 There is no way to cleanly handle the case where we should process the
747 string per byte in its upgraded form while it's really in downgraded form
748 (e.g. estimates like strend-s as an upper bound for the number of
749 characters left wouldn't work). So if we foresee the need of this
750 (pattern starts with U or contains U0), we want to work on the encoded
751 version of the string. Users are advised to upgrade their pack string
752 themselves if they need to do a lot of unpacks like this on it
755 need_utf8(const char *pat, const char *patend)
759 PERL_ARGS_ASSERT_NEED_UTF8;
761 while (pat < patend) {
764 pat = (const char *) memchr(pat, '\n', patend-pat);
765 if (!pat) return FALSE;
766 } else if (pat[0] == 'U') {
767 if (first || pat[1] == '0') return TRUE;
768 } else first = FALSE;
775 first_symbol(const char *pat, const char *patend) {
776 PERL_ARGS_ASSERT_FIRST_SYMBOL;
778 while (pat < patend) {
779 if (pat[0] != '#') return pat[0];
781 pat = (const char *) memchr(pat, '\n', patend-pat);
790 =head1 Pack and Unpack
792 =for apidoc unpackstring
794 The engine implementing the C<unpack()> Perl function.
796 Using the template C<pat..patend>, this function unpacks the string
797 C<s..strend> into a number of mortal SVs, which it pushes onto the perl
798 argument (C<@_>) stack (so you will need to issue a C<PUTBACK> before and
799 C<SPAGAIN> after the call to this function). It returns the number of
802 The C<strend> and C<patend> pointers should point to the byte following the
803 last character of each string.
805 Although this function returns its values on the perl argument stack, it
806 doesn't take any parameters from that stack (and thus in particular
807 there's no need to do a C<PUSHMARK> before calling it, unlike L</call_pv> for
813 Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
817 PERL_ARGS_ASSERT_UNPACKSTRING;
819 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
820 else if (need_utf8(pat, patend)) {
821 /* We probably should try to avoid this in case a scalar context call
822 wouldn't get to the "U0" */
823 STRLEN len = strend - s;
824 s = (char *) bytes_to_utf8((U8 *) s, &len);
827 flags |= FLAG_DO_UTF8;
830 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
831 flags |= FLAG_PARSE_UTF8;
833 TEMPSYM_INIT(&sym, pat, patend, flags);
835 return unpack_rec(&sym, s, s, strend, NULL );
839 S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
843 const I32 start_sp_offset = SP - PL_stack_base;
848 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
850 bool explicit_length;
851 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
852 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
854 PERL_ARGS_ASSERT_UNPACK_REC;
856 symptr->strbeg = s - strbeg;
858 while (next_symbol(symptr)) {
861 I32 datumtype = symptr->code;
863 /* do first one only unless in list context
864 / is implemented by unpacking the count, then popping it from the
865 stack, so must check that we're not in the middle of a / */
867 && (SP - PL_stack_base == start_sp_offset + 1)
868 && (datumtype != '/') ) /* XXX can this be omitted */
871 switch (howlen = symptr->howlen) {
873 len = strend - strbeg; /* long enough */
876 /* e_no_len and e_number */
877 len = symptr->length;
881 explicit_length = TRUE;
883 beyond = s >= strend;
885 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
887 /* props nonzero means we can process this letter. */
888 const long size = props & PACK_SIZE_MASK;
889 const long howmany = (strend - s) / size;
893 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
894 if (len && unpack_only_one) len = 1;
900 needs_swap = NEEDS_SWAP(datumtype);
902 switch(TYPE_NO_ENDIANNESS(datumtype)) {
904 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
907 if (howlen == e_no_len)
908 len = 16; /* len is not specified */
916 tempsym_t savsym = *symptr;
917 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
918 symptr->flags |= group_modifiers;
919 symptr->patend = savsym.grpend;
920 symptr->previous = &savsym;
923 if (len && unpack_only_one) len = 1;
925 symptr->patptr = savsym.grpbeg;
926 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
927 else symptr->flags &= ~FLAG_PARSE_UTF8;
928 unpack_rec(symptr, s, strbeg, strend, &s);
929 if (s == strend && savsym.howlen == e_star)
930 break; /* No way to continue */
933 savsym.flags = symptr->flags & ~group_modifiers;
937 case '.' | TYPE_IS_SHRIEKING:
941 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
942 if (howlen == e_star) from = strbeg;
943 else if (len <= 0) from = s;
945 tempsym_t *group = symptr;
947 while (--len && group) group = group->previous;
948 from = group ? strbeg + group->strbeg : strbeg;
951 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
952 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
956 case '@' | TYPE_IS_SHRIEKING:
958 s = strbeg + symptr->strbeg;
959 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
963 Perl_croak(aTHX_ "'@' outside of string in unpack");
968 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
971 Perl_croak(aTHX_ "'@' outside of string in unpack");
975 case 'X' | TYPE_IS_SHRIEKING:
976 if (!len) /* Avoid division by 0 */
979 const char *hop, *last;
983 hop += UTF8SKIP(hop);
990 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
994 len = (s - strbeg) % len;
1000 Perl_croak(aTHX_ "'X' outside of string in unpack");
1001 while (--s, UTF8_IS_CONTINUATION(*s)) {
1003 Perl_croak(aTHX_ "'X' outside of string in unpack");
1008 if (len > s - strbeg)
1009 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1013 case 'x' | TYPE_IS_SHRIEKING: {
1015 if (!len) /* Avoid division by 0 */
1017 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1018 else ai32 = (s - strbeg) % len;
1019 if (ai32 == 0) break;
1027 Perl_croak(aTHX_ "'x' outside of string in unpack");
1032 if (len > strend - s)
1033 Perl_croak(aTHX_ "'x' outside of string in unpack");
1038 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1044 /* Preliminary length estimate is assumed done in 'W' */
1045 if (len > strend - s) len = strend - s;
1051 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1052 if (hop >= strend) {
1054 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1059 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1061 } else if (len > strend - s)
1064 if (datumtype == 'Z') {
1065 /* 'Z' strips stuff after first null */
1066 const char *ptr, *end;
1068 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
1069 sv = newSVpvn(s, ptr-s);
1070 if (howlen == e_star) /* exact for 'Z*' */
1071 len = ptr-s + (ptr != strend ? 1 : 0);
1072 } else if (datumtype == 'A') {
1073 /* 'A' strips both nulls and spaces */
1075 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1076 for (ptr = s+len-1; ptr >= s; ptr--)
1077 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
1078 !isSPACE_utf8(ptr)) break;
1079 if (ptr >= s) ptr += UTF8SKIP(ptr);
1082 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1084 for (ptr = s+len-1; ptr >= s; ptr--)
1085 if (*ptr != 0 && !isSPACE(*ptr)) break;
1088 sv = newSVpvn(s, ptr-s);
1089 } else sv = newSVpvn(s, len);
1093 /* Undo any upgrade done due to need_utf8() */
1094 if (!(symptr->flags & FLAG_WAS_UTF8))
1095 sv_utf8_downgrade(sv, 0);
1103 if (howlen == e_star || len > (strend - s) * 8)
1104 len = (strend - s) * 8;
1107 while (len >= 8 && s < strend) {
1108 cuv += PL_bitcount[utf8_to_byte(aTHX_ &s, strend, datumtype)];
1113 cuv += PL_bitcount[*(U8 *)s++];
1116 if (len && s < strend) {
1118 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1119 if (datumtype == 'b')
1121 if (bits & 1) cuv++;
1126 if (bits & 0x80) cuv++;
1133 sv = sv_2mortal(newSV(len ? len : 1));
1136 if (datumtype == 'b') {
1138 const I32 ai32 = len;
1139 for (len = 0; len < ai32; len++) {
1140 if (len & 7) bits >>= 1;
1142 if (s >= strend) break;
1143 bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
1144 } else bits = *(U8 *) s++;
1145 *str++ = bits & 1 ? '1' : '0';
1149 const I32 ai32 = len;
1150 for (len = 0; len < ai32; len++) {
1151 if (len & 7) bits <<= 1;
1153 if (s >= strend) break;
1154 bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
1155 } else bits = *(U8 *) s++;
1156 *str++ = bits & 0x80 ? '1' : '0';
1160 SvCUR_set(sv, str - SvPVX_const(sv));
1167 /* Preliminary length estimate, acceptable for utf8 too */
1168 if (howlen == e_star || len > (strend - s) * 2)
1169 len = (strend - s) * 2;
1171 sv = sv_2mortal(newSV(len ? len : 1));
1175 if (datumtype == 'h') {
1178 for (len = 0; len < ai32; len++) {
1179 if (len & 1) bits >>= 4;
1181 if (s >= strend) break;
1182 bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
1183 } else bits = * (U8 *) s++;
1185 *str++ = PL_hexdigit[bits & 15];
1189 const I32 ai32 = len;
1190 for (len = 0; len < ai32; len++) {
1191 if (len & 1) bits <<= 4;
1193 if (s >= strend) break;
1194 bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
1195 } else bits = *(U8 *) s++;
1197 *str++ = PL_hexdigit[(bits >> 4) & 15];
1202 SvCUR_set(sv, str - SvPVX_const(sv));
1209 if (explicit_length)
1210 /* Switch to "character" mode */
1211 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1216 while (len-- > 0 && s < strend) {
1221 aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1222 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1223 if (retlen == (STRLEN) -1 || retlen == 0)
1224 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1228 aint = *(U8 *)(s)++;
1229 if (aint >= 128 && datumtype != 'C') /* fake up signed chars */
1233 else if (checksum > bits_in_uv)
1234 cdouble += (NV)aint;
1242 while (len-- > 0 && s < strend) {
1244 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1245 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1246 if (retlen == (STRLEN) -1 || retlen == 0)
1247 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1251 else if (checksum > bits_in_uv)
1252 cdouble += (NV) val;
1256 } else if (!checksum)
1258 const U8 ch = *(U8 *) s++;
1261 else if (checksum > bits_in_uv)
1262 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1264 while (len-- > 0) cuv += *(U8 *) s++;
1268 if (explicit_length && howlen != e_star) {
1269 /* Switch to "bytes in UTF-8" mode */
1270 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
1272 /* Should be impossible due to the need_utf8() test */
1273 Perl_croak(aTHX_ "U0 mode on a byte string");
1277 if (len > strend - s) len = strend - s;
1279 if (len && unpack_only_one) len = 1;
1283 while (len-- > 0 && s < strend) {
1287 U8 result[UTF8_MAXLEN];
1288 const char *ptr = s;
1290 /* Bug: warns about bad utf8 even if we are short on bytes
1291 and will break out of the loop */
1292 if (!S_utf8_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1295 len = UTF8SKIP(result);
1296 if (!S_utf8_to_bytes(aTHX_ &ptr, strend,
1297 (char *) &result[1], len-1, 'U')) break;
1298 auv = NATIVE_TO_UNI(utf8n_to_uvchr(result,
1301 UTF8_ALLOW_DEFAULT));
1304 auv = NATIVE_TO_UNI(utf8n_to_uvchr((U8*)s,
1307 UTF8_ALLOW_DEFAULT));
1308 if (retlen == (STRLEN) -1 || retlen == 0)
1309 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1314 else if (checksum > bits_in_uv)
1315 cdouble += (NV) auv;
1320 case 's' | TYPE_IS_SHRIEKING:
1321 #if SHORTSIZE != SIZE16
1324 SHIFT_VAR(utf8, s, strend, ashort, datumtype, needs_swap);
1327 else if (checksum > bits_in_uv)
1328 cdouble += (NV)ashort;
1340 #if U16SIZE > SIZE16
1343 SHIFT16(utf8, s, strend, &ai16, datumtype, needs_swap);
1344 #if U16SIZE > SIZE16
1350 else if (checksum > bits_in_uv)
1351 cdouble += (NV)ai16;
1356 case 'S' | TYPE_IS_SHRIEKING:
1357 #if SHORTSIZE != SIZE16
1359 unsigned short aushort;
1360 SHIFT_VAR(utf8, s, strend, aushort, datumtype, needs_swap,
1364 else if (checksum > bits_in_uv)
1365 cdouble += (NV)aushort;
1378 #if U16SIZE > SIZE16
1381 SHIFT16(utf8, s, strend, &au16, datumtype, needs_swap);
1382 if (datumtype == 'n')
1383 au16 = PerlSock_ntohs(au16);
1384 if (datumtype == 'v')
1388 else if (checksum > bits_in_uv)
1389 cdouble += (NV) au16;
1394 case 'v' | TYPE_IS_SHRIEKING:
1395 case 'n' | TYPE_IS_SHRIEKING:
1398 # if U16SIZE > SIZE16
1401 SHIFT16(utf8, s, strend, &ai16, datumtype, needs_swap);
1402 /* There should never be any byte-swapping here. */
1403 assert(!TYPE_ENDIANNESS(datumtype));
1404 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
1405 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1406 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
1407 ai16 = (I16) vtohs((U16) ai16);
1410 else if (checksum > bits_in_uv)
1411 cdouble += (NV) ai16;
1417 case 'i' | TYPE_IS_SHRIEKING:
1420 SHIFT_VAR(utf8, s, strend, aint, datumtype, needs_swap);
1423 else if (checksum > bits_in_uv)
1424 cdouble += (NV)aint;
1430 case 'I' | TYPE_IS_SHRIEKING:
1433 SHIFT_VAR(utf8, s, strend, auint, datumtype, needs_swap);
1436 else if (checksum > bits_in_uv)
1437 cdouble += (NV)auint;
1445 SHIFT_VAR(utf8, s, strend, aiv, datumtype, needs_swap);
1448 else if (checksum > bits_in_uv)
1457 SHIFT_VAR(utf8, s, strend, auv, datumtype, needs_swap);
1460 else if (checksum > bits_in_uv)
1466 case 'l' | TYPE_IS_SHRIEKING:
1467 #if LONGSIZE != SIZE32
1470 SHIFT_VAR(utf8, s, strend, along, datumtype, needs_swap);
1473 else if (checksum > bits_in_uv)
1474 cdouble += (NV)along;
1485 #if U32SIZE > SIZE32
1488 SHIFT32(utf8, s, strend, &ai32, datumtype, needs_swap);
1489 #if U32SIZE > SIZE32
1490 if (ai32 > 2147483647) ai32 -= 4294967296;
1494 else if (checksum > bits_in_uv)
1495 cdouble += (NV)ai32;
1500 case 'L' | TYPE_IS_SHRIEKING:
1501 #if LONGSIZE != SIZE32
1503 unsigned long aulong;
1504 SHIFT_VAR(utf8, s, strend, aulong, datumtype, needs_swap);
1507 else if (checksum > bits_in_uv)
1508 cdouble += (NV)aulong;
1521 #if U32SIZE > SIZE32
1524 SHIFT32(utf8, s, strend, &au32, datumtype, needs_swap);
1525 if (datumtype == 'N')
1526 au32 = PerlSock_ntohl(au32);
1527 if (datumtype == 'V')
1531 else if (checksum > bits_in_uv)
1532 cdouble += (NV)au32;
1537 case 'V' | TYPE_IS_SHRIEKING:
1538 case 'N' | TYPE_IS_SHRIEKING:
1541 #if U32SIZE > SIZE32
1544 SHIFT32(utf8, s, strend, &ai32, datumtype, needs_swap);
1545 /* There should never be any byte swapping here. */
1546 assert(!TYPE_ENDIANNESS(datumtype));
1547 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1548 ai32 = (I32)PerlSock_ntohl((U32)ai32);
1549 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1550 ai32 = (I32)vtohl((U32)ai32);
1553 else if (checksum > bits_in_uv)
1554 cdouble += (NV)ai32;
1562 SHIFT_VAR(utf8, s, strend, aptr, datumtype, needs_swap);
1563 /* newSVpv generates undef if aptr is NULL */
1564 mPUSHs(newSVpv(aptr, 0));
1572 while (len > 0 && s < strend) {
1574 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1575 auv = (auv << 7) | (ch & 0x7f);
1576 /* UTF8_IS_XXXXX not right here because this is a BER, not
1577 * UTF-8 format - using constant 0x80 */
1585 if (++bytes >= sizeof(UV)) { /* promote to string */
1588 sv = Perl_newSVpvf(aTHX_ "%.*" UVuf,
1589 (int)TYPE_DIGITS(UV), auv);
1590 while (s < strend) {
1591 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1592 sv = mul128(sv, (U8)(ch & 0x7f));
1598 t = SvPV_nolen_const(sv);
1607 if ((s >= strend) && bytes)
1608 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
1612 if (symptr->howlen == e_star)
1613 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
1615 if (s + sizeof(char*) <= strend) {
1617 SHIFT_VAR(utf8, s, strend, aptr, datumtype, needs_swap);
1618 /* newSVpvn generates undef if aptr is NULL */
1619 PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP));
1622 #if defined(HAS_QUAD) && IVSIZE >= 8
1626 SHIFT_VAR(utf8, s, strend, aquad, datumtype, needs_swap);
1628 mPUSHs(newSViv((IV)aquad));
1629 else if (checksum > bits_in_uv)
1630 cdouble += (NV)aquad;
1638 SHIFT_VAR(utf8, s, strend, auquad, datumtype, needs_swap);
1640 mPUSHs(newSVuv((UV)auquad));
1641 else if (checksum > bits_in_uv)
1642 cdouble += (NV)auquad;
1648 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
1652 SHIFT_VAR(utf8, s, strend, afloat, datumtype, needs_swap);
1662 SHIFT_VAR(utf8, s, strend, adouble, datumtype, needs_swap);
1672 SHIFT_BYTES(utf8, s, strend, anv.bytes, sizeof(anv.bytes),
1673 datumtype, needs_swap);
1680 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
1684 SHIFT_BYTES(utf8, s, strend, aldouble.bytes,
1685 sizeof(aldouble.bytes), datumtype, needs_swap);
1686 /* The most common long double format, the x86 80-bit
1687 * extended precision, has either 2 or 6 unused bytes,
1688 * which may contain garbage, which may contain
1689 * unintentional data. While we do zero the bytes of
1690 * the long double data in pack(), here in unpack() we
1691 * don't, because it's really hard to envision that
1692 * reading the long double off aldouble would be
1693 * affected by the unused bytes.
1695 * Note that trying to unpack 'long doubles' of 'long
1696 * doubles' packed in another system is in the general
1697 * case doomed without having more detail. */
1699 mPUSHn(aldouble.ld);
1701 cdouble += aldouble.ld;
1707 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
1708 sv = sv_2mortal(newSV(l));
1709 if (l) SvPOK_on(sv);
1712 /* Note that all legal uuencoded strings are ASCII printables, so
1713 * have the same representation under UTF-8 vs not. This means we
1714 * can ignore UTF8ness on legal input. For illegal we stop at the
1715 * first failure, and don't report where/what that is, so again we
1716 * can ignore UTF8ness */
1718 while (s < strend && *s != ' ' && ISUUCHAR(*s)) {
1722 len = PL_uudmap[*(U8*)s++] & 077;
1724 if (s < strend && ISUUCHAR(*s))
1725 a = PL_uudmap[*(U8*)s++] & 077;
1728 if (s < strend && ISUUCHAR(*s))
1729 b = PL_uudmap[*(U8*)s++] & 077;
1732 if (s < strend && ISUUCHAR(*s))
1733 c = PL_uudmap[*(U8*)s++] & 077;
1736 if (s < strend && ISUUCHAR(*s))
1737 d = PL_uudmap[*(U8*)s++] & 077;
1740 hunk[0] = (char)((a << 2) | (b >> 4));
1741 hunk[1] = (char)((b << 4) | (c >> 2));
1742 hunk[2] = (char)((c << 6) | d);
1744 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
1749 else /* possible checksum byte */
1750 if (s + 1 < strend && s[1] == '\n')
1756 } /* End of switch */
1759 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
1760 (checksum > bits_in_uv &&
1761 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
1764 anv = (NV) (1 << (checksum & 15));
1765 while (checksum >= 16) {
1769 while (cdouble < 0.0)
1771 cdouble = Perl_modf(cdouble / anv, &trouble);
1772 #ifdef LONGDOUBLE_DOUBLEDOUBLE
1773 /* Workaround for powerpc doubledouble modfl bug:
1774 * close to 1.0L and -1.0L cdouble is 0, and trouble
1775 * is cdouble / anv. */
1776 if (trouble != Perl_ceil(trouble)) {
1778 if (cdouble > 1.0L) cdouble -= 1.0L;
1779 if (cdouble < -1.0L) cdouble += 1.0L;
1783 sv = newSVnv(cdouble);
1786 if (checksum < bits_in_uv) {
1787 UV mask = ((UV)1 << checksum) - 1;
1796 if (symptr->flags & FLAG_SLASH){
1797 if (SP - PL_stack_base - start_sp_offset <= 0)
1799 if( next_symbol(symptr) ){
1800 if( symptr->howlen == e_number )
1801 Perl_croak(aTHX_ "Count after length/code in unpack" );
1803 /* ...end of char buffer then no decent length available */
1804 Perl_croak(aTHX_ "length/code after end of string in unpack" );
1806 /* take top of stack (hope it's numeric) */
1809 Perl_croak(aTHX_ "Negative '/' count in unpack" );
1812 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
1814 datumtype = symptr->code;
1815 explicit_length = FALSE;
1823 return SP - PL_stack_base - start_sp_offset;
1833 const char *pat = SvPV_const(left, llen);
1834 const char *s = SvPV_const(right, rlen);
1835 const char *strend = s + rlen;
1836 const char *patend = pat + llen;
1840 cnt = unpackstring(pat, patend, s, strend,
1841 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
1842 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
1845 if ( !cnt && gimme == G_SCALAR )
1846 PUSHs(&PL_sv_undef);
1851 doencodes(U8 *h, const U8 *s, I32 len)
1853 *h++ = PL_uuemap[len];
1855 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1856 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
1857 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
1858 *h++ = PL_uuemap[(077 & (s[2] & 077))];
1863 const U8 r = (len > 1 ? s[1] : '\0');
1864 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1865 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
1866 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
1867 *h++ = PL_uuemap[0];
1874 S_is_an_int(pTHX_ const char *s, STRLEN l)
1876 SV *result = newSVpvn(s, l);
1877 char *const result_c = SvPV_nolen(result); /* convenience */
1878 char *out = result_c;
1882 PERL_ARGS_ASSERT_IS_AN_INT;
1890 SvREFCNT_dec(result);
1913 SvREFCNT_dec(result);
1919 SvCUR_set(result, out - result_c);
1923 /* pnum must be '\0' terminated */
1925 S_div128(pTHX_ SV *pnum, bool *done)
1928 char * const s = SvPV(pnum, len);
1932 PERL_ARGS_ASSERT_DIV128;
1936 const int i = m * 10 + (*t - '0');
1937 const int r = (i >> 7); /* r < 10 */
1945 SvCUR_set(pnum, (STRLEN) (t - s));
1950 =for apidoc packlist
1952 The engine implementing C<pack()> Perl function.
1958 Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, SV **beglist, SV **endlist )
1962 PERL_ARGS_ASSERT_PACKLIST;
1964 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
1966 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
1967 Also make sure any UTF8 flag is loaded */
1968 SvPV_force_nolen(cat);
1970 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
1972 (void)pack_rec( cat, &sym, beglist, endlist );
1975 /* like sv_utf8_upgrade, but also repoint the group start markers */
1977 marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
1980 const char *from_ptr, *from_start, *from_end, **marks, **m;
1981 char *to_start, *to_ptr;
1983 if (SvUTF8(sv)) return;
1985 from_start = SvPVX_const(sv);
1986 from_end = from_start + SvCUR(sv);
1987 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
1988 if (!NATIVE_BYTE_IS_INVARIANT(*from_ptr)) break;
1989 if (from_ptr == from_end) {
1990 /* Simple case: no character needs to be changed */
1995 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
1996 Newx(to_start, len, char);
1997 Copy(from_start, to_start, from_ptr-from_start, char);
1998 to_ptr = to_start + (from_ptr-from_start);
2000 Newx(marks, sym_ptr->level+2, const char *);
2001 for (group=sym_ptr; group; group = group->previous)
2002 marks[group->level] = from_start + group->strbeg;
2003 marks[sym_ptr->level+1] = from_end+1;
2004 for (m = marks; *m < from_ptr; m++)
2005 *m = to_start + (*m-from_start);
2007 for (;from_ptr < from_end; from_ptr++) {
2008 while (*m == from_ptr) *m++ = to_ptr;
2009 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
2013 while (*m == from_ptr) *m++ = to_ptr;
2014 if (m != marks + sym_ptr->level+1) {
2017 Perl_croak(aTHX_ "panic: marks beyond string end, m=%p, marks=%p, "
2018 "level=%d", m, marks, sym_ptr->level);
2020 for (group=sym_ptr; group; group = group->previous)
2021 group->strbeg = marks[group->level] - to_start;
2026 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
2027 from_start -= SvIVX(sv);
2030 SvFLAGS(sv) &= ~SVf_OOK;
2033 Safefree(from_start);
2034 SvPV_set(sv, to_start);
2035 SvCUR_set(sv, to_ptr - to_start);
2040 /* Exponential string grower. Makes string extension effectively O(n)
2041 needed says how many extra bytes we need (not counting the final '\0')
2042 Only grows the string if there is an actual lack of space
2045 S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
2046 const STRLEN cur = SvCUR(sv);
2047 const STRLEN len = SvLEN(sv);
2050 PERL_ARGS_ASSERT_SV_EXP_GROW;
2052 if (len - cur > needed) return SvPVX(sv);
2053 extend = needed > len ? needed : len;
2054 return SvGROW(sv, len+extend+1);
2058 S_sv_check_infnan(pTHX_ SV *sv, I32 datumtype)
2061 if (UNLIKELY(SvAMAGIC(sv)))
2063 if (UNLIKELY(isinfnansv(sv))) {
2064 const I32 c = TYPE_NO_MODIFIERS(datumtype);
2065 const NV nv = SvNV_nomg(sv);
2067 Perl_croak(aTHX_ "Cannot compress %" NVgf " in pack", nv);
2069 Perl_croak(aTHX_ "Cannot pack %" NVgf " with '%c'", nv, (int) c);
2074 #define SvIV_no_inf(sv,d) \
2075 ((sv) = S_sv_check_infnan(aTHX_ sv,d), SvIV_nomg(sv))
2076 #define SvUV_no_inf(sv,d) \
2077 ((sv) = S_sv_check_infnan(aTHX_ sv,d), SvUV_nomg(sv))
2081 S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
2083 tempsym_t lookahead;
2084 I32 items = endlist - beglist;
2085 bool found = next_symbol(symptr);
2086 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
2087 bool warn_utf8 = ckWARN(WARN_UTF8);
2090 PERL_ARGS_ASSERT_PACK_REC;
2092 if (symptr->level == 0 && found && symptr->code == 'U') {
2093 marked_upgrade(aTHX_ cat, symptr);
2094 symptr->flags |= FLAG_DO_UTF8;
2097 symptr->strbeg = SvCUR(cat);
2103 SV *lengthcode = NULL;
2104 I32 datumtype = symptr->code;
2105 howlen_t howlen = symptr->howlen;
2106 char *start = SvPVX(cat);
2107 char *cur = start + SvCUR(cat);
2110 #define NEXTFROM (lengthcode ? lengthcode : items > 0 ? (--items, *beglist++) : &PL_sv_no)
2111 #define PEEKFROM (lengthcode ? lengthcode : items > 0 ? *beglist : &PL_sv_no)
2115 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2119 /* e_no_len and e_number */
2120 len = symptr->length;
2125 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
2127 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2128 /* We can process this letter. */
2129 STRLEN size = props & PACK_SIZE_MASK;
2130 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2134 /* Look ahead for next symbol. Do we have code/code? */
2135 lookahead = *symptr;
2136 found = next_symbol(&lookahead);
2137 if (symptr->flags & FLAG_SLASH) {
2139 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
2140 if (strchr("aAZ", lookahead.code)) {
2141 if (lookahead.howlen == e_number) count = lookahead.length;
2144 count = sv_len_utf8(*beglist);
2147 if (lookahead.code == 'Z') count++;
2150 if (lookahead.howlen == e_number && lookahead.length < items)
2151 count = lookahead.length;
2154 lookahead.howlen = e_number;
2155 lookahead.length = count;
2156 lengthcode = sv_2mortal(newSViv(count));
2159 needs_swap = NEEDS_SWAP(datumtype);
2161 /* Code inside the switch must take care to properly update
2162 cat (CUR length and '\0' termination) if it updated *cur and
2163 doesn't simply leave using break */
2164 switch (TYPE_NO_ENDIANNESS(datumtype)) {
2166 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2167 (int) TYPE_NO_MODIFIERS(datumtype));
2169 Perl_croak(aTHX_ "'%%' may not be used in pack");
2171 case '.' | TYPE_IS_SHRIEKING:
2173 if (howlen == e_star) from = start;
2174 else if (len == 0) from = cur;
2176 tempsym_t *group = symptr;
2178 while (--len && group) group = group->previous;
2179 from = group ? start + group->strbeg : start;
2182 len = SvIV_no_inf(fromstr, datumtype);
2184 case '@' | TYPE_IS_SHRIEKING:
2186 from = start + symptr->strbeg;
2188 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
2190 while (len && from < cur) {
2191 from += UTF8SKIP(from);
2195 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2197 /* Here we know from == cur */
2199 GROWING(0, cat, start, cur, len);
2200 Zero(cur, len, char);
2202 } else if (from < cur) {
2205 } else goto no_change;
2213 if (len > 0) goto grow;
2214 if (len == 0) goto no_change;
2221 tempsym_t savsym = *symptr;
2222 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2223 symptr->flags |= group_modifiers;
2224 symptr->patend = savsym.grpend;
2226 symptr->previous = &lookahead;
2229 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2230 else symptr->flags &= ~FLAG_PARSE_UTF8;
2231 was_utf8 = SvUTF8(cat);
2232 symptr->patptr = savsym.grpbeg;
2233 beglist = pack_rec(cat, symptr, beglist, endlist);
2234 if (SvUTF8(cat) != was_utf8)
2235 /* This had better be an upgrade while in utf8==0 mode */
2238 if (savsym.howlen == e_star && beglist == endlist)
2239 break; /* No way to continue */
2241 items = endlist - beglist;
2242 lookahead.flags = symptr->flags & ~group_modifiers;
2245 case 'X' | TYPE_IS_SHRIEKING:
2246 if (!len) /* Avoid division by 0 */
2253 hop += UTF8SKIP(hop);
2260 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2264 len = (cur-start) % len;
2268 if (len < 1) goto no_change;
2272 Perl_croak(aTHX_ "'%c' outside of string in pack",
2273 (int) TYPE_NO_MODIFIERS(datumtype));
2274 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2276 Perl_croak(aTHX_ "'%c' outside of string in pack",
2277 (int) TYPE_NO_MODIFIERS(datumtype));
2283 if (cur - start < len)
2284 Perl_croak(aTHX_ "'%c' outside of string in pack",
2285 (int) TYPE_NO_MODIFIERS(datumtype));
2288 if (cur < start+symptr->strbeg) {
2289 /* Make sure group starts don't point into the void */
2291 const STRLEN length = cur-start;
2292 for (group = symptr;
2293 group && length < group->strbeg;
2294 group = group->previous) group->strbeg = length;
2295 lookahead.strbeg = length;
2298 case 'x' | TYPE_IS_SHRIEKING: {
2300 if (!len) /* Avoid division by 0 */
2302 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
2303 else ai32 = (cur - start) % len;
2304 if (ai32 == 0) goto no_change;
2316 aptr = SvPV_const(fromstr, fromlen);
2317 if (DO_UTF8(fromstr)) {
2318 const char *end, *s;
2320 if (!utf8 && !SvUTF8(cat)) {
2321 marked_upgrade(aTHX_ cat, symptr);
2322 lookahead.flags |= FLAG_DO_UTF8;
2323 lookahead.strbeg = symptr->strbeg;
2326 cur = start + SvCUR(cat);
2328 if (howlen == e_star) {
2329 if (utf8) goto string_copy;
2333 end = aptr + fromlen;
2334 fromlen = datumtype == 'Z' ? len-1 : len;
2335 while ((I32) fromlen > 0 && s < end) {
2340 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2343 if (datumtype == 'Z') len++;
2349 fromlen = len - fromlen;
2350 if (datumtype == 'Z') fromlen--;
2351 if (howlen == e_star) {
2353 if (datumtype == 'Z') len++;
2355 GROWING(0, cat, start, cur, len);
2356 if (!S_utf8_to_bytes(aTHX_ &aptr, end, cur, fromlen,
2357 datumtype | TYPE_IS_PACK))
2358 Perl_croak(aTHX_ "panic: predicted utf8 length not available, "
2359 "for '%c', aptr=%p end=%p cur=%p, fromlen=%" UVuf,
2360 (int)datumtype, aptr, end, cur, (UV)fromlen);
2364 if (howlen == e_star) {
2366 if (datumtype == 'Z') len++;
2368 if (len <= (I32) fromlen) {
2370 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2372 /* assumes a byte expands to at most UTF8_EXPAND bytes on
2374 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2375 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
2377 while (fromlen > 0) {
2378 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
2384 if (howlen == e_star) {
2386 if (datumtype == 'Z') len++;
2388 if (len <= (I32) fromlen) {
2390 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2392 GROWING(0, cat, start, cur, len);
2393 Copy(aptr, cur, fromlen, char);
2397 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2404 const char *str, *end;
2411 str = SvPV_const(fromstr, fromlen);
2412 end = str + fromlen;
2413 if (DO_UTF8(fromstr)) {
2415 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2417 utf8_source = FALSE;
2418 utf8_flags = 0; /* Unused, but keep compilers happy */
2420 if (howlen == e_star) len = fromlen;
2421 field_len = (len+7)/8;
2422 GROWING(utf8, cat, start, cur, field_len);
2423 if (len > (I32)fromlen) len = fromlen;
2426 if (datumtype == 'B')
2430 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2432 } else bits |= *str++ & 1;
2433 if (l & 7) bits <<= 1;
2435 PUSH_BYTE(utf8, cur, bits);
2440 /* datumtype == 'b' */
2444 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2445 if (val & 1) bits |= 0x80;
2446 } else if (*str++ & 1)
2448 if (l & 7) bits >>= 1;
2450 PUSH_BYTE(utf8, cur, bits);
2456 if (datumtype == 'B')
2457 bits <<= 7 - (l & 7);
2459 bits >>= 7 - (l & 7);
2460 PUSH_BYTE(utf8, cur, bits);
2463 /* Determine how many chars are left in the requested field */
2465 if (howlen == e_star) field_len = 0;
2466 else field_len -= l;
2467 Zero(cur, field_len, char);
2473 const char *str, *end;
2480 str = SvPV_const(fromstr, fromlen);
2481 end = str + fromlen;
2482 if (DO_UTF8(fromstr)) {
2484 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2486 utf8_source = FALSE;
2487 utf8_flags = 0; /* Unused, but keep compilers happy */
2489 if (howlen == e_star) len = fromlen;
2490 field_len = (len+1)/2;
2491 GROWING(utf8, cat, start, cur, field_len);
2492 if (!utf8_source && len > (I32)fromlen) len = fromlen;
2495 if (datumtype == 'H')
2499 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2500 if (val < 256 && isALPHA(val))
2501 bits |= (val + 9) & 0xf;
2504 } else if (isALPHA(*str))
2505 bits |= (*str++ + 9) & 0xf;
2507 bits |= *str++ & 0xf;
2508 if (l & 1) bits <<= 4;
2510 PUSH_BYTE(utf8, cur, bits);
2518 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2519 if (val < 256 && isALPHA(val))
2520 bits |= ((val + 9) & 0xf) << 4;
2522 bits |= (val & 0xf) << 4;
2523 } else if (isALPHA(*str))
2524 bits |= ((*str++ + 9) & 0xf) << 4;
2526 bits |= (*str++ & 0xf) << 4;
2527 if (l & 1) bits >>= 4;
2529 PUSH_BYTE(utf8, cur, bits);
2535 PUSH_BYTE(utf8, cur, bits);
2538 /* Determine how many chars are left in the requested field */
2540 if (howlen == e_star) field_len = 0;
2541 else field_len -= l;
2542 Zero(cur, field_len, char);
2550 aiv = SvIV_no_inf(fromstr, datumtype);
2551 if ((-128 > aiv || aiv > 127))
2552 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2553 "Character in 'c' format wrapped in pack");
2554 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
2559 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2565 aiv = SvIV_no_inf(fromstr, datumtype);
2566 if ((0 > aiv || aiv > 0xff))
2567 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2568 "Character in 'C' format wrapped in pack");
2569 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
2574 U8 in_bytes = (U8)IN_BYTES;
2576 end = start+SvLEN(cat)-1;
2577 if (utf8) end -= UTF8_MAXLEN-1;
2581 auv = SvUV_no_inf(fromstr, datumtype);
2582 if (in_bytes) auv = auv % 0x100;
2587 SvCUR_set(cat, cur - start);
2589 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2590 end = start+SvLEN(cat)-UTF8_MAXLEN;
2592 cur = (char *) uvchr_to_utf8_flags((U8 *) cur,
2595 0 : UNICODE_ALLOW_ANY);
2600 SvCUR_set(cat, cur - start);
2601 marked_upgrade(aTHX_ cat, symptr);
2602 lookahead.flags |= FLAG_DO_UTF8;
2603 lookahead.strbeg = symptr->strbeg;
2606 cur = start + SvCUR(cat);
2607 end = start+SvLEN(cat)-UTF8_MAXLEN;
2610 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2611 "Character in 'W' format wrapped in pack");
2616 SvCUR_set(cat, cur - start);
2617 GROWING(0, cat, start, cur, len+1);
2618 end = start+SvLEN(cat)-1;
2620 *(U8 *) cur++ = (U8)auv;
2629 if (!(symptr->flags & FLAG_DO_UTF8)) {
2630 marked_upgrade(aTHX_ cat, symptr);
2631 lookahead.flags |= FLAG_DO_UTF8;
2632 lookahead.strbeg = symptr->strbeg;
2638 end = start+SvLEN(cat);
2639 if (!utf8) end -= UTF8_MAXLEN;
2643 auv = SvUV_no_inf(fromstr, datumtype);
2645 U8 buffer[UTF8_MAXLEN], *endb;
2646 endb = uvchr_to_utf8_flags(buffer, UNI_TO_NATIVE(auv),
2648 0 : UNICODE_ALLOW_ANY);
2649 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
2651 SvCUR_set(cat, cur - start);
2652 GROWING(0, cat, start, cur,
2653 len+(endb-buffer)*UTF8_EXPAND);
2654 end = start+SvLEN(cat);
2656 cur = my_bytes_to_utf8(buffer, endb-buffer, cur, 0);
2660 SvCUR_set(cat, cur - start);
2661 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2662 end = start+SvLEN(cat)-UTF8_MAXLEN;
2664 cur = (char *) uvchr_to_utf8_flags((U8 *) cur, UNI_TO_NATIVE(auv),
2666 0 : UNICODE_ALLOW_ANY);
2671 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
2677 anv = SvNV(fromstr);
2678 # if (defined(VMS) && !defined(_IEEE_FP)) || defined(DOUBLE_IS_VAX_FLOAT)
2679 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
2680 * on Alpha; fake it if we don't have them.
2684 else if (anv < -FLT_MAX)
2686 else afloat = (float)anv;
2688 # if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
2690 afloat = (float)NV_NAN;
2694 /* a simple cast to float is undefined if outside
2695 * the range of values that can be represented */
2696 afloat = (float)(anv > FLT_MAX ? NV_INF :
2697 anv < -FLT_MAX ? -NV_INF : anv);
2700 PUSH_VAR(utf8, cur, afloat, needs_swap);
2708 anv = SvNV(fromstr);
2709 # if (defined(VMS) && !defined(_IEEE_FP)) || defined(DOUBLE_IS_VAX_FLOAT)
2710 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
2711 * on Alpha; fake it if we don't have them.
2715 else if (anv < -DBL_MAX)
2717 else adouble = (double)anv;
2719 adouble = (double)anv;
2721 PUSH_VAR(utf8, cur, adouble, needs_swap);
2726 Zero(&anv, 1, NV); /* can be long double with unused bits */
2730 /* to work round a gcc/x86 bug; don't use SvNV */
2731 anv.nv = sv_2nv(fromstr);
2732 # if defined(LONGDOUBLE_X86_80_BIT) && defined(USE_LONG_DOUBLE) \
2733 && LONG_DOUBLESIZE > 10
2734 /* GCC sometimes overwrites the padding in the
2736 Zero(anv.bytes+10, sizeof(anv.bytes) - 10, U8);
2739 anv.nv = SvNV(fromstr);
2741 PUSH_BYTES(utf8, cur, anv.bytes, sizeof(anv.bytes), needs_swap);
2745 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2748 /* long doubles can have unused bits, which may be nonzero */
2749 Zero(&aldouble, 1, long double);
2753 /* to work round a gcc/x86 bug; don't use SvNV */
2754 aldouble.ld = (long double)sv_2nv(fromstr);
2755 # if defined(LONGDOUBLE_X86_80_BIT) && LONG_DOUBLESIZE > 10
2756 /* GCC sometimes overwrites the padding in the
2758 Zero(aldouble.bytes+10, sizeof(aldouble.bytes) - 10, U8);
2761 aldouble.ld = (long double)SvNV(fromstr);
2763 PUSH_BYTES(utf8, cur, aldouble.bytes, sizeof(aldouble.bytes),
2769 case 'n' | TYPE_IS_SHRIEKING:
2774 ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
2775 ai16 = PerlSock_htons(ai16);
2776 PUSH16(utf8, cur, &ai16, FALSE);
2779 case 'v' | TYPE_IS_SHRIEKING:
2784 ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
2786 PUSH16(utf8, cur, &ai16, FALSE);
2789 case 'S' | TYPE_IS_SHRIEKING:
2790 #if SHORTSIZE != SIZE16
2792 unsigned short aushort;
2794 aushort = SvUV_no_inf(fromstr, datumtype);
2795 PUSH_VAR(utf8, cur, aushort, needs_swap);
2805 au16 = (U16)SvUV_no_inf(fromstr, datumtype);
2806 PUSH16(utf8, cur, &au16, needs_swap);
2809 case 's' | TYPE_IS_SHRIEKING:
2810 #if SHORTSIZE != SIZE16
2814 ashort = SvIV_no_inf(fromstr, datumtype);
2815 PUSH_VAR(utf8, cur, ashort, needs_swap);
2825 ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
2826 PUSH16(utf8, cur, &ai16, needs_swap);
2830 case 'I' | TYPE_IS_SHRIEKING:
2834 auint = SvUV_no_inf(fromstr, datumtype);
2835 PUSH_VAR(utf8, cur, auint, needs_swap);
2842 aiv = SvIV_no_inf(fromstr, datumtype);
2843 PUSH_VAR(utf8, cur, aiv, needs_swap);
2850 auv = SvUV_no_inf(fromstr, datumtype);
2851 PUSH_VAR(utf8, cur, auv, needs_swap);
2858 S_sv_check_infnan(aTHX_ fromstr, datumtype);
2859 anv = SvNV_nomg(fromstr);
2863 SvCUR_set(cat, cur - start);
2864 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
2867 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
2868 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
2869 any negative IVs will have already been got by the croak()
2870 above. IOK is untrue for fractions, so we test them
2871 against UV_MAX_P1. */
2872 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
2873 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
2874 char *in = buf + sizeof(buf);
2875 UV auv = SvUV_nomg(fromstr);
2878 *--in = (char)((auv & 0x7f) | 0x80);
2881 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
2882 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2883 in, (buf + sizeof(buf)) - in);
2884 } else if (SvPOKp(fromstr))
2886 else if (SvNOKp(fromstr)) {
2887 /* 10**NV_MAX_10_EXP is the largest power of 10
2888 so 10**(NV_MAX_10_EXP+1) is definitely unrepresentable
2889 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
2890 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
2891 And with that many bytes only Inf can overflow.
2892 Some C compilers are strict about integral constant
2893 expressions so we conservatively divide by a slightly
2894 smaller integer instead of multiplying by the exact
2895 floating-point value.
2897 #ifdef NV_MAX_10_EXP
2898 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
2899 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
2901 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
2902 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
2904 char *in = buf + sizeof(buf);
2906 anv = Perl_floor(anv);
2908 const NV next = Perl_floor(anv / 128);
2909 if (in <= buf) /* this cannot happen ;-) */
2910 Perl_croak(aTHX_ "Cannot compress integer in pack");
2911 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
2914 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
2915 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2916 in, (buf + sizeof(buf)) - in);
2925 /* Copy string and check for compliance */
2926 from = SvPV_nomg_const(fromstr, len);
2927 if ((norm = is_an_int(from, len)) == NULL)
2928 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
2930 Newx(result, len, char);
2933 while (!done) *--in = div128(norm, &done) | 0x80;
2934 result[len - 1] &= 0x7F; /* clear continue bit */
2935 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2936 in, (result + len) - in);
2938 SvREFCNT_dec(norm); /* free norm */
2943 case 'i' | TYPE_IS_SHRIEKING:
2947 aint = SvIV_no_inf(fromstr, datumtype);
2948 PUSH_VAR(utf8, cur, aint, needs_swap);
2951 case 'N' | TYPE_IS_SHRIEKING:
2956 au32 = SvUV_no_inf(fromstr, datumtype);
2957 au32 = PerlSock_htonl(au32);
2958 PUSH32(utf8, cur, &au32, FALSE);
2961 case 'V' | TYPE_IS_SHRIEKING:
2966 au32 = SvUV_no_inf(fromstr, datumtype);
2968 PUSH32(utf8, cur, &au32, FALSE);
2971 case 'L' | TYPE_IS_SHRIEKING:
2972 #if LONGSIZE != SIZE32
2974 unsigned long aulong;
2976 aulong = SvUV_no_inf(fromstr, datumtype);
2977 PUSH_VAR(utf8, cur, aulong, needs_swap);
2987 au32 = SvUV_no_inf(fromstr, datumtype);
2988 PUSH32(utf8, cur, &au32, needs_swap);
2991 case 'l' | TYPE_IS_SHRIEKING:
2992 #if LONGSIZE != SIZE32
2996 along = SvIV_no_inf(fromstr, datumtype);
2997 PUSH_VAR(utf8, cur, along, needs_swap);
3007 ai32 = SvIV_no_inf(fromstr, datumtype);
3008 PUSH32(utf8, cur, &ai32, needs_swap);
3011 #if defined(HAS_QUAD) && IVSIZE >= 8
3016 auquad = (Uquad_t) SvUV_no_inf(fromstr, datumtype);
3017 PUSH_VAR(utf8, cur, auquad, needs_swap);
3024 aquad = (Quad_t)SvIV_no_inf(fromstr, datumtype);
3025 PUSH_VAR(utf8, cur, aquad, needs_swap);
3030 len = 1; /* assume SV is correct length */
3031 GROWING(utf8, cat, start, cur, sizeof(char *));
3038 SvGETMAGIC(fromstr);
3039 if (!SvOK(fromstr)) aptr = NULL;
3041 /* XXX better yet, could spirit away the string to
3042 * a safe spot and hang on to it until the result
3043 * of pack() (and all copies of the result) are
3046 if (((SvTEMP(fromstr) && SvREFCNT(fromstr) == 1)
3047 || (SvPADTMP(fromstr) &&
3048 !SvREADONLY(fromstr)))) {
3049 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3050 "Attempt to pack pointer to temporary value");
3052 if (SvPOK(fromstr) || SvNIOK(fromstr))
3053 aptr = SvPV_nomg_const_nolen(fromstr);
3055 aptr = SvPV_force_flags_nolen(fromstr, 0);
3057 PUSH_VAR(utf8, cur, aptr, needs_swap);
3061 const char *aptr, *aend;
3065 if (len <= 2) len = 45;
3066 else len = len / 3 * 3;
3068 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3069 "Field too wide in 'u' format in pack");
3072 aptr = SvPV_const(fromstr, fromlen);
3073 from_utf8 = DO_UTF8(fromstr);
3075 aend = aptr + fromlen;
3076 fromlen = sv_len_utf8_nomg(fromstr);
3077 } else aend = NULL; /* Unused, but keep compilers happy */
3078 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
3079 while (fromlen > 0) {
3082 U8 hunk[1+63/3*4+1];
3084 if ((I32)fromlen > len)
3090 if (!S_utf8_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3091 'u' | TYPE_IS_PACK)) {
3093 SvCUR_set(cat, cur - start);
3094 Perl_croak(aTHX_ "panic: string is shorter than advertised, "
3095 "aptr=%p, aend=%p, buffer=%p, todo=%ld",
3096 aptr, aend, buffer, (long) todo);
3098 end = doencodes(hunk, (const U8 *)buffer, todo);
3100 end = doencodes(hunk, (const U8 *)aptr, todo);
3103 PUSH_BYTES(utf8, cur, hunk, end-hunk, 0);
3110 SvCUR_set(cat, cur - start);
3112 *symptr = lookahead;
3121 dSP; dMARK; dORIGMARK; dTARGET;
3124 SV *pat_sv = *++MARK;
3125 const char *pat = SvPV_const(pat_sv, fromlen);
3126 const char *patend = pat + fromlen;
3132 packlist(cat, pat, patend, MARK, SP + 1);
3141 * ex: set ts=8 sts=4 sw=4 et: