3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
18 * [p.653 of _The Lord of the Rings_, IV/iv: "Of Herbs and Stewed Rabbit"]
21 /* This file contains pp ("push/pop") functions that
22 * execute the opcodes that make up a perl program. A typical pp function
23 * expects to find its arguments on the stack, and usually pushes its
24 * results onto the stack, hence the 'pp' terminology. Each OP structure
25 * contains a pointer to the relevant pp_foo() function.
27 * This particular file just contains pp_pack() and pp_unpack(). See the
28 * other pp*.c files for the rest of the pp_ functions.
32 #define PERL_IN_PP_PACK_C
35 /* Types used by pack/unpack */
37 e_no_len, /* no length */
38 e_number, /* number, [] */
42 typedef struct tempsym {
43 const char* patptr; /* current template char */
44 const char* patend; /* one after last char */
45 const char* grpbeg; /* 1st char of ()-group */
46 const char* grpend; /* end of ()-group */
47 I32 code; /* template code (!<>) */
48 I32 length; /* length/repeat count */
49 howlen_t howlen; /* how length is given */
50 int level; /* () nesting level */
51 U32 flags; /* /=4, comma=2, pack=1 */
52 /* and group modifiers */
53 STRLEN strbeg; /* offset of group start */
54 struct tempsym *previous; /* previous group */
57 #define TEMPSYM_INIT(symptr, p, e, f) \
59 (symptr)->patptr = (p); \
60 (symptr)->patend = (e); \
61 (symptr)->grpbeg = NULL; \
62 (symptr)->grpend = NULL; \
63 (symptr)->grpend = NULL; \
65 (symptr)->length = 0; \
66 (symptr)->howlen = e_no_len; \
67 (symptr)->level = 0; \
68 (symptr)->flags = (f); \
69 (symptr)->strbeg = 0; \
70 (symptr)->previous = NULL; \
78 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
81 U8 bytes[sizeof(long double)];
88 /* Maximum number of bytes to which a byte can grow due to upgrade */
92 * Offset for integer pack/unpack.
94 * On architectures where I16 and I32 aren't really 16 and 32 bits,
95 * which for now are all Crays, pack and unpack have to play games.
99 * These values are required for portability of pack() output.
100 * If they're not right on your machine, then pack() and unpack()
101 * wouldn't work right anyway; you'll need to apply the Cray hack.
102 * (I'd like to check them with #if, but you can't use sizeof() in
103 * the preprocessor.) --???
106 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
107 defines are now in config.h. --Andy Dougherty April 1998
112 /* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
115 #if U16SIZE > SIZE16 || U32SIZE > SIZE32
116 # if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
117 # define OFF16(p) ((char*)(p))
118 # define OFF32(p) ((char*)(p))
120 # if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
121 # define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
122 # define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
124 ++++ bad cray byte order
128 # define OFF16(p) ((char *) (p))
129 # define OFF32(p) ((char *) (p))
132 #define PUSH16(utf8, cur, p, needs_swap) \
133 PUSH_BYTES(utf8, cur, OFF16(p), SIZE16, needs_swap)
134 #define PUSH32(utf8, cur, p, needs_swap) \
135 PUSH_BYTES(utf8, cur, OFF32(p), SIZE32, needs_swap)
137 #if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
138 # define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_LITTLE_ENDIAN)
139 #elif BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
140 # define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_BIG_ENDIAN)
142 # error "Unsupported byteorder"
143 /* Need to add code here to re-instate mixed endian support.
144 NEEDS_SWAP would need to hold a flag indicating which action to
145 take, and S_reverse_copy and the code in S_utf8_to_bytes would need
146 logic adding to deal with any mixed-endian transformations needed.
150 /* Only to be used inside a loop (see the break) */
151 #define SHIFT_BYTES(utf8, s, strend, buf, len, datumtype, needs_swap) \
153 if (UNLIKELY(utf8)) { \
154 if (!S_utf8_to_bytes(aTHX_ &s, strend, \
155 (char *) (buf), len, datumtype)) break; \
157 if (UNLIKELY(needs_swap)) \
158 S_reverse_copy(s, (char *) (buf), len); \
160 Copy(s, (char *) (buf), len, char); \
165 #define SHIFT16(utf8, s, strend, p, datumtype, needs_swap) \
166 SHIFT_BYTES(utf8, s, strend, OFF16(p), SIZE16, datumtype, needs_swap)
168 #define SHIFT32(utf8, s, strend, p, datumtype, needs_swap) \
169 SHIFT_BYTES(utf8, s, strend, OFF32(p), SIZE32, datumtype, needs_swap)
171 #define SHIFT_VAR(utf8, s, strend, var, datumtype, needs_swap) \
172 SHIFT_BYTES(utf8, s, strend, &(var), sizeof(var), datumtype, needs_swap)
174 #define PUSH_VAR(utf8, aptr, var, needs_swap) \
175 PUSH_BYTES(utf8, aptr, &(var), sizeof(var), needs_swap)
177 /* Avoid stack overflow due to pathological templates. 100 should be plenty. */
178 #define MAX_SUB_TEMPLATE_LEVEL 100
180 /* flags (note that type modifiers can also be used as flags!) */
181 #define FLAG_WAS_UTF8 0x40
182 #define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
183 #define FLAG_UNPACK_ONLY_ONE 0x10
184 #define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
185 #define FLAG_SLASH 0x04
186 #define FLAG_COMMA 0x02
187 #define FLAG_PACK 0x01
190 S_mul128(pTHX_ SV *sv, U8 m)
193 char *s = SvPV(sv, len);
196 PERL_ARGS_ASSERT_MUL128;
198 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
199 SV * const tmpNew = newSVpvs("0000000000");
201 sv_catsv(tmpNew, sv);
202 SvREFCNT_dec(sv); /* free old sv */
207 while (!*t) /* trailing '\0'? */
210 const U32 i = ((*t - '0') << 7) + m;
211 *(t--) = '0' + (char)(i % 10);
217 /* Explosives and implosives. */
219 #define ISUUCHAR(ch) (NATIVE_TO_LATIN1(ch) >= NATIVE_TO_LATIN1(' ') \
220 && NATIVE_TO_LATIN1(ch) < NATIVE_TO_LATIN1('a'))
223 #define TYPE_IS_SHRIEKING 0x100
224 #define TYPE_IS_BIG_ENDIAN 0x200
225 #define TYPE_IS_LITTLE_ENDIAN 0x400
226 #define TYPE_IS_PACK 0x800
227 #define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
228 #define TYPE_MODIFIERS(t) ((t) & ~0xFF)
229 #define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
231 # define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
232 # define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
234 # define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
236 #define PACK_SIZE_CANNOT_CSUM 0x80
237 #define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
238 #define PACK_SIZE_MASK 0x3F
240 #include "packsizetables.inc"
243 S_reverse_copy(const char *src, char *dest, STRLEN len)
251 utf8_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
254 UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
255 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
256 /* We try to process malformed UTF-8 as much as possible (preferably with
257 warnings), but these two mean we make no progress in the string and
258 might enter an infinite loop */
259 if (retlen == (STRLEN) -1 || retlen == 0)
260 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
261 (int) TYPE_NO_MODIFIERS(datumtype));
263 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
264 "Character in '%c' format wrapped in unpack",
265 (int) TYPE_NO_MODIFIERS(datumtype));
272 #define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
273 utf8_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
277 S_utf8_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
281 const char *from = *s;
283 const U32 flags = ckWARN(WARN_UTF8) ?
284 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
285 const bool needs_swap = NEEDS_SWAP(datumtype);
287 if (UNLIKELY(needs_swap))
290 for (;buf_len > 0; buf_len--) {
291 if (from >= end) return FALSE;
292 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
293 if (retlen == (STRLEN) -1 || retlen == 0) {
294 from += UTF8SKIP(from);
296 } else from += retlen;
301 if (UNLIKELY(needs_swap))
302 *(U8 *)--buf = (U8)val;
304 *(U8 *)buf++ = (U8)val;
306 /* We have enough characters for the buffer. Did we have problems ? */
309 /* Rewalk the string fragment while warning */
311 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
312 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
313 if (ptr >= end) break;
314 utf8n_to_uvchr((U8 *) ptr, end-ptr, &retlen, flags);
316 if (from > end) from = end;
319 Perl_ck_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
320 WARN_PACK : WARN_UNPACK),
321 "Character(s) in '%c' format wrapped in %s",
322 (int) TYPE_NO_MODIFIERS(datumtype),
323 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
330 S_my_bytes_to_utf8(const U8 *start, STRLEN len, char *dest, const bool needs_swap) {
331 PERL_ARGS_ASSERT_MY_BYTES_TO_UTF8;
333 if (UNLIKELY(needs_swap)) {
334 const U8 *p = start + len;
335 while (p-- > start) {
336 append_utf8_from_native_byte(*p, (U8 **) & dest);
339 const U8 * const end = start + len;
340 while (start < end) {
341 append_utf8_from_native_byte(*start, (U8 **) & dest);
348 #define PUSH_BYTES(utf8, cur, buf, len, needs_swap) \
350 if (UNLIKELY(utf8)) \
351 (cur) = my_bytes_to_utf8((U8 *) buf, len, (cur), needs_swap); \
353 if (UNLIKELY(needs_swap)) \
354 S_reverse_copy((char *)(buf), cur, len); \
356 Copy(buf, cur, len, char); \
361 #define GROWING(utf8, cat, start, cur, in_len) \
363 STRLEN glen = (in_len); \
364 if (utf8) glen *= UTF8_EXPAND; \
365 if ((cur) + glen >= (start) + SvLEN(cat)) { \
366 (start) = sv_exp_grow(cat, glen); \
367 (cur) = (start) + SvCUR(cat); \
371 #define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
373 const STRLEN glen = (in_len); \
375 if (utf8) gl *= UTF8_EXPAND; \
376 if ((cur) + gl >= (start) + SvLEN(cat)) { \
378 SvCUR_set((cat), (cur) - (start)); \
379 (start) = sv_exp_grow(cat, gl); \
380 (cur) = (start) + SvCUR(cat); \
382 PUSH_BYTES(utf8, cur, buf, glen, 0); \
385 #define PUSH_BYTE(utf8, s, byte) \
388 const U8 au8 = (byte); \
389 (s) = my_bytes_to_utf8(&au8, 1, (s), 0);\
390 } else *(U8 *)(s)++ = (byte); \
393 /* Only to be used inside a loop (see the break) */
394 #define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
397 if (str >= end) break; \
398 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
399 if (retlen == (STRLEN) -1 || retlen == 0) { \
401 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
406 static const char *_action( const tempsym_t* symptr )
408 return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
411 /* Returns the sizeof() struct described by pat */
413 S_measure_struct(pTHX_ tempsym_t* symptr)
417 PERL_ARGS_ASSERT_MEASURE_STRUCT;
419 while (next_symbol(symptr)) {
423 switch (symptr->howlen) {
425 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
429 /* e_no_len and e_number */
430 len = symptr->length;
434 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
437 /* endianness doesn't influence the size of a type */
438 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
440 Perl_croak(aTHX_ "Invalid type '%c' in %s",
441 (int)TYPE_NO_MODIFIERS(symptr->code),
443 case '.' | TYPE_IS_SHRIEKING:
444 case '@' | TYPE_IS_SHRIEKING:
448 case 'U': /* XXXX Is it correct? */
451 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
452 (int) TYPE_NO_MODIFIERS(symptr->code),
459 tempsym_t savsym = *symptr;
460 symptr->patptr = savsym.grpbeg;
461 symptr->patend = savsym.grpend;
462 /* XXXX Theoretically, we need to measure many times at
463 different positions, since the subexpression may contain
464 alignment commands, but be not of aligned length.
465 Need to detect this and croak(). */
466 size = measure_struct(symptr);
470 case 'X' | TYPE_IS_SHRIEKING:
471 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
473 if (!len) /* Avoid division by 0 */
475 len = total % len; /* Assumed: the start is aligned. */
480 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
482 case 'x' | TYPE_IS_SHRIEKING:
483 if (!len) /* Avoid division by 0 */
485 star = total % len; /* Assumed: the start is aligned. */
486 if (star) /* Other portable ways? */
510 size = sizeof(char*);
520 /* locate matching closing parenthesis or bracket
521 * returns char pointer to char after match, or NULL
524 S_group_end(pTHX_ const char *patptr, const char *patend, char ender)
526 PERL_ARGS_ASSERT_GROUP_END;
528 while (patptr < patend) {
529 const char c = *patptr++;
536 while (patptr < patend && *patptr != '\n')
540 patptr = group_end(patptr, patend, ')') + 1;
542 patptr = group_end(patptr, patend, ']') + 1;
544 Perl_croak(aTHX_ "No group ending character '%c' found in template",
546 NOT_REACHED; /* NOTREACHED */
550 /* Convert unsigned decimal number to binary.
551 * Expects a pointer to the first digit and address of length variable
552 * Advances char pointer to 1st non-digit char and returns number
555 S_get_num(pTHX_ const char *patptr, I32 *lenptr )
557 I32 len = *patptr++ - '0';
559 PERL_ARGS_ASSERT_GET_NUM;
561 while (isDIGIT(*patptr)) {
562 if (len >= 0x7FFFFFFF/10)
563 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
564 len = (len * 10) + (*patptr++ - '0');
570 /* The marvellous template parsing routine: Using state stored in *symptr,
571 * locates next template code and count
574 S_next_symbol(pTHX_ tempsym_t* symptr )
576 const char* patptr = symptr->patptr;
577 const char* const patend = symptr->patend;
579 PERL_ARGS_ASSERT_NEXT_SYMBOL;
581 symptr->flags &= ~FLAG_SLASH;
583 while (patptr < patend) {
584 if (isSPACE(*patptr))
586 else if (*patptr == '#') {
588 while (patptr < patend && *patptr != '\n')
593 /* We should have found a template code */
594 I32 code = *patptr++ & 0xFF;
595 U32 inherited_modifiers = 0;
597 if (code == ','){ /* grandfather in commas but with a warning */
598 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
599 symptr->flags |= FLAG_COMMA;
600 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
601 "Invalid type ',' in %s", _action( symptr ) );
606 /* for '(', skip to ')' */
608 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
609 Perl_croak(aTHX_ "()-group starts with a count in %s",
611 symptr->grpbeg = patptr;
612 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
613 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
614 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
618 /* look for group modifiers to inherit */
619 if (TYPE_ENDIANNESS(symptr->flags)) {
620 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
621 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
624 /* look for modifiers */
625 while (patptr < patend) {
630 modifier = TYPE_IS_SHRIEKING;
631 allowed = "sSiIlLxXnNvV@.";
634 modifier = TYPE_IS_BIG_ENDIAN;
635 allowed = ENDIANNESS_ALLOWED_TYPES;
638 modifier = TYPE_IS_LITTLE_ENDIAN;
639 allowed = ENDIANNESS_ALLOWED_TYPES;
650 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
651 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
652 allowed, _action( symptr ) );
654 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
655 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
656 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
657 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
658 TYPE_ENDIANNESS_MASK)
659 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
660 *patptr, _action( symptr ) );
662 if ((code & modifier)) {
663 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
664 "Duplicate modifier '%c' after '%c' in %s",
665 *patptr, (int) TYPE_NO_MODIFIERS(code),
673 /* inherit modifiers */
674 code |= inherited_modifiers;
676 /* look for count and/or / */
677 if (patptr < patend) {
678 if (isDIGIT(*patptr)) {
679 patptr = get_num( patptr, &symptr->length );
680 symptr->howlen = e_number;
682 } else if (*patptr == '*') {
684 symptr->howlen = e_star;
686 } else if (*patptr == '[') {
687 const char* lenptr = ++patptr;
688 symptr->howlen = e_number;
689 patptr = group_end( patptr, patend, ']' ) + 1;
690 /* what kind of [] is it? */
691 if (isDIGIT(*lenptr)) {
692 lenptr = get_num( lenptr, &symptr->length );
694 Perl_croak(aTHX_ "Malformed integer in [] in %s",
697 tempsym_t savsym = *symptr;
698 symptr->patend = patptr-1;
699 symptr->patptr = lenptr;
700 savsym.length = measure_struct(symptr);
704 symptr->howlen = e_no_len;
709 while (patptr < patend) {
710 if (isSPACE(*patptr))
712 else if (*patptr == '#') {
714 while (patptr < patend && *patptr != '\n')
719 if (*patptr == '/') {
720 symptr->flags |= FLAG_SLASH;
722 if (patptr < patend &&
723 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
724 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
731 /* at end - no count, no / */
732 symptr->howlen = e_no_len;
737 symptr->patptr = patptr;
741 symptr->patptr = patptr;
746 There is no way to cleanly handle the case where we should process the
747 string per byte in its upgraded form while it's really in downgraded form
748 (e.g. estimates like strend-s as an upper bound for the number of
749 characters left wouldn't work). So if we foresee the need of this
750 (pattern starts with U or contains U0), we want to work on the encoded
751 version of the string. Users are advised to upgrade their pack string
752 themselves if they need to do a lot of unpacks like this on it
755 need_utf8(const char *pat, const char *patend)
759 PERL_ARGS_ASSERT_NEED_UTF8;
761 while (pat < patend) {
764 pat = (const char *) memchr(pat, '\n', patend-pat);
765 if (!pat) return FALSE;
766 } else if (pat[0] == 'U') {
767 if (first || pat[1] == '0') return TRUE;
768 } else first = FALSE;
775 first_symbol(const char *pat, const char *patend) {
776 PERL_ARGS_ASSERT_FIRST_SYMBOL;
778 while (pat < patend) {
779 if (pat[0] != '#') return pat[0];
781 pat = (const char *) memchr(pat, '\n', patend-pat);
790 =head1 Pack and Unpack
792 =for apidoc unpackstring
794 The engine implementing the C<unpack()> Perl function.
796 Using the template C<pat..patend>, this function unpacks the string
797 C<s..strend> into a number of mortal SVs, which it pushes onto the perl
798 argument (C<@_>) stack (so you will need to issue a C<PUTBACK> before and
799 C<SPAGAIN> after the call to this function). It returns the number of
802 The C<strend> and C<patend> pointers should point to the byte following the
803 last character of each string.
805 Although this function returns its values on the perl argument stack, it
806 doesn't take any parameters from that stack (and thus in particular
807 there's no need to do a C<PUSHMARK> before calling it, unlike L</call_pv> for
813 Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
817 PERL_ARGS_ASSERT_UNPACKSTRING;
819 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
820 else if (need_utf8(pat, patend)) {
821 /* We probably should try to avoid this in case a scalar context call
822 wouldn't get to the "U0" */
823 STRLEN len = strend - s;
824 s = (char *) bytes_to_utf8((U8 *) s, &len);
827 flags |= FLAG_DO_UTF8;
830 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
831 flags |= FLAG_PARSE_UTF8;
833 TEMPSYM_INIT(&sym, pat, patend, flags);
835 return unpack_rec(&sym, s, s, strend, NULL );
839 S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
843 const I32 start_sp_offset = SP - PL_stack_base;
848 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
850 bool explicit_length;
851 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
852 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
854 PERL_ARGS_ASSERT_UNPACK_REC;
856 symptr->strbeg = s - strbeg;
858 while (next_symbol(symptr)) {
861 I32 datumtype = symptr->code;
863 /* do first one only unless in list context
864 / is implemented by unpacking the count, then popping it from the
865 stack, so must check that we're not in the middle of a / */
867 && (SP - PL_stack_base == start_sp_offset + 1)
868 && (datumtype != '/') ) /* XXX can this be omitted */
871 switch (howlen = symptr->howlen) {
873 len = strend - strbeg; /* long enough */
876 /* e_no_len and e_number */
877 len = symptr->length;
881 explicit_length = TRUE;
883 beyond = s >= strend;
885 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
887 /* props nonzero means we can process this letter. */
888 const long size = props & PACK_SIZE_MASK;
889 const long howmany = (strend - s) / size;
893 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
894 if (len && unpack_only_one) len = 1;
900 needs_swap = NEEDS_SWAP(datumtype);
902 switch(TYPE_NO_ENDIANNESS(datumtype)) {
904 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
907 if (howlen == e_no_len)
908 len = 16; /* len is not specified */
916 tempsym_t savsym = *symptr;
917 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
918 symptr->flags |= group_modifiers;
919 symptr->patend = savsym.grpend;
920 symptr->previous = &savsym;
923 if (len && unpack_only_one) len = 1;
925 symptr->patptr = savsym.grpbeg;
926 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
927 else symptr->flags &= ~FLAG_PARSE_UTF8;
928 unpack_rec(symptr, s, strbeg, strend, &s);
929 if (s == strend && savsym.howlen == e_star)
930 break; /* No way to continue */
933 savsym.flags = symptr->flags & ~group_modifiers;
937 case '.' | TYPE_IS_SHRIEKING:
941 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
942 if (howlen == e_star) from = strbeg;
943 else if (len <= 0) from = s;
945 tempsym_t *group = symptr;
947 while (--len && group) group = group->previous;
948 from = group ? strbeg + group->strbeg : strbeg;
951 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
952 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
956 case '@' | TYPE_IS_SHRIEKING:
958 s = strbeg + symptr->strbeg;
959 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
963 Perl_croak(aTHX_ "'@' outside of string in unpack");
968 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
971 Perl_croak(aTHX_ "'@' outside of string in unpack");
975 case 'X' | TYPE_IS_SHRIEKING:
976 if (!len) /* Avoid division by 0 */
979 const char *hop, *last;
983 hop += UTF8SKIP(hop);
990 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
994 len = (s - strbeg) % len;
1000 Perl_croak(aTHX_ "'X' outside of string in unpack");
1001 while (--s, UTF8_IS_CONTINUATION(*s)) {
1003 Perl_croak(aTHX_ "'X' outside of string in unpack");
1008 if (len > s - strbeg)
1009 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1013 case 'x' | TYPE_IS_SHRIEKING: {
1015 if (!len) /* Avoid division by 0 */
1017 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1018 else ai32 = (s - strbeg) % len;
1019 if (ai32 == 0) break;
1027 Perl_croak(aTHX_ "'x' outside of string in unpack");
1032 if (len > strend - s)
1033 Perl_croak(aTHX_ "'x' outside of string in unpack");
1038 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1044 /* Preliminary length estimate is assumed done in 'W' */
1045 if (len > strend - s) len = strend - s;
1051 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1052 if (hop >= strend) {
1054 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1059 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1061 } else if (len > strend - s)
1064 if (datumtype == 'Z') {
1065 /* 'Z' strips stuff after first null */
1066 const char *ptr, *end;
1068 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
1069 sv = newSVpvn(s, ptr-s);
1070 if (howlen == e_star) /* exact for 'Z*' */
1071 len = ptr-s + (ptr != strend ? 1 : 0);
1072 } else if (datumtype == 'A') {
1073 /* 'A' strips both nulls and spaces */
1075 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1076 for (ptr = s+len-1; ptr >= s; ptr--) {
1078 && !UTF8_IS_CONTINUATION(*ptr)
1079 && !isSPACE_utf8_safe(ptr, strend))
1084 if (ptr >= s) ptr += UTF8SKIP(ptr);
1087 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1089 for (ptr = s+len-1; ptr >= s; ptr--)
1090 if (*ptr != 0 && !isSPACE(*ptr)) break;
1093 sv = newSVpvn(s, ptr-s);
1094 } else sv = newSVpvn(s, len);
1098 /* Undo any upgrade done due to need_utf8() */
1099 if (!(symptr->flags & FLAG_WAS_UTF8))
1100 sv_utf8_downgrade(sv, 0);
1108 if (howlen == e_star || len > (strend - s) * 8)
1109 len = (strend - s) * 8;
1112 while (len >= 8 && s < strend) {
1113 cuv += PL_bitcount[utf8_to_byte(aTHX_ &s, strend, datumtype)];
1118 cuv += PL_bitcount[*(U8 *)s++];
1121 if (len && s < strend) {
1123 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1124 if (datumtype == 'b')
1126 if (bits & 1) cuv++;
1131 if (bits & 0x80) cuv++;
1138 sv = sv_2mortal(newSV(len ? len : 1));
1141 if (datumtype == 'b') {
1143 const I32 ai32 = len;
1144 for (len = 0; len < ai32; len++) {
1145 if (len & 7) bits >>= 1;
1147 if (s >= strend) break;
1148 bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
1149 } else bits = *(U8 *) s++;
1150 *str++ = bits & 1 ? '1' : '0';
1154 const I32 ai32 = len;
1155 for (len = 0; len < ai32; len++) {
1156 if (len & 7) bits <<= 1;
1158 if (s >= strend) break;
1159 bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
1160 } else bits = *(U8 *) s++;
1161 *str++ = bits & 0x80 ? '1' : '0';
1165 SvCUR_set(sv, str - SvPVX_const(sv));
1172 /* Preliminary length estimate, acceptable for utf8 too */
1173 if (howlen == e_star || len > (strend - s) * 2)
1174 len = (strend - s) * 2;
1176 sv = sv_2mortal(newSV(len ? len : 1));
1180 if (datumtype == 'h') {
1183 for (len = 0; len < ai32; len++) {
1184 if (len & 1) bits >>= 4;
1186 if (s >= strend) break;
1187 bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
1188 } else bits = * (U8 *) s++;
1190 *str++ = PL_hexdigit[bits & 15];
1194 const I32 ai32 = len;
1195 for (len = 0; len < ai32; len++) {
1196 if (len & 1) bits <<= 4;
1198 if (s >= strend) break;
1199 bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
1200 } else bits = *(U8 *) s++;
1202 *str++ = PL_hexdigit[(bits >> 4) & 15];
1207 SvCUR_set(sv, str - SvPVX_const(sv));
1214 if (explicit_length)
1215 /* Switch to "character" mode */
1216 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1221 while (len-- > 0 && s < strend) {
1226 aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1227 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1228 if (retlen == (STRLEN) -1 || retlen == 0)
1229 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1233 aint = *(U8 *)(s)++;
1234 if (aint >= 128 && datumtype != 'C') /* fake up signed chars */
1238 else if (checksum > bits_in_uv)
1239 cdouble += (NV)aint;
1247 while (len-- > 0 && s < strend) {
1249 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1250 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1251 if (retlen == (STRLEN) -1 || retlen == 0)
1252 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1256 else if (checksum > bits_in_uv)
1257 cdouble += (NV) val;
1261 } else if (!checksum)
1263 const U8 ch = *(U8 *) s++;
1266 else if (checksum > bits_in_uv)
1267 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1269 while (len-- > 0) cuv += *(U8 *) s++;
1273 if (explicit_length && howlen != e_star) {
1274 /* Switch to "bytes in UTF-8" mode */
1275 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
1277 /* Should be impossible due to the need_utf8() test */
1278 Perl_croak(aTHX_ "U0 mode on a byte string");
1282 if (len > strend - s) len = strend - s;
1284 if (len && unpack_only_one) len = 1;
1288 while (len-- > 0 && s < strend) {
1292 U8 result[UTF8_MAXLEN];
1293 const char *ptr = s;
1295 /* Bug: warns about bad utf8 even if we are short on bytes
1296 and will break out of the loop */
1297 if (!S_utf8_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1300 len = UTF8SKIP(result);
1301 if (!S_utf8_to_bytes(aTHX_ &ptr, strend,
1302 (char *) &result[1], len-1, 'U')) break;
1303 auv = NATIVE_TO_UNI(utf8n_to_uvchr(result,
1306 UTF8_ALLOW_DEFAULT));
1309 auv = NATIVE_TO_UNI(utf8n_to_uvchr((U8*)s,
1312 UTF8_ALLOW_DEFAULT));
1313 if (retlen == (STRLEN) -1 || retlen == 0)
1314 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1319 else if (checksum > bits_in_uv)
1320 cdouble += (NV) auv;
1325 case 's' | TYPE_IS_SHRIEKING:
1326 #if SHORTSIZE != SIZE16
1329 SHIFT_VAR(utf8, s, strend, ashort, datumtype, needs_swap);
1332 else if (checksum > bits_in_uv)
1333 cdouble += (NV)ashort;
1345 #if U16SIZE > SIZE16
1348 SHIFT16(utf8, s, strend, &ai16, datumtype, needs_swap);
1349 #if U16SIZE > SIZE16
1355 else if (checksum > bits_in_uv)
1356 cdouble += (NV)ai16;
1361 case 'S' | TYPE_IS_SHRIEKING:
1362 #if SHORTSIZE != SIZE16
1364 unsigned short aushort;
1365 SHIFT_VAR(utf8, s, strend, aushort, datumtype, needs_swap,
1369 else if (checksum > bits_in_uv)
1370 cdouble += (NV)aushort;
1383 #if U16SIZE > SIZE16
1386 SHIFT16(utf8, s, strend, &au16, datumtype, needs_swap);
1387 if (datumtype == 'n')
1388 au16 = PerlSock_ntohs(au16);
1389 if (datumtype == 'v')
1393 else if (checksum > bits_in_uv)
1394 cdouble += (NV) au16;
1399 case 'v' | TYPE_IS_SHRIEKING:
1400 case 'n' | TYPE_IS_SHRIEKING:
1403 # if U16SIZE > SIZE16
1406 SHIFT16(utf8, s, strend, &ai16, datumtype, needs_swap);
1407 /* There should never be any byte-swapping here. */
1408 assert(!TYPE_ENDIANNESS(datumtype));
1409 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
1410 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1411 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
1412 ai16 = (I16) vtohs((U16) ai16);
1415 else if (checksum > bits_in_uv)
1416 cdouble += (NV) ai16;
1422 case 'i' | TYPE_IS_SHRIEKING:
1425 SHIFT_VAR(utf8, s, strend, aint, datumtype, needs_swap);
1428 else if (checksum > bits_in_uv)
1429 cdouble += (NV)aint;
1435 case 'I' | TYPE_IS_SHRIEKING:
1438 SHIFT_VAR(utf8, s, strend, auint, datumtype, needs_swap);
1441 else if (checksum > bits_in_uv)
1442 cdouble += (NV)auint;
1450 SHIFT_VAR(utf8, s, strend, aiv, datumtype, needs_swap);
1453 else if (checksum > bits_in_uv)
1462 SHIFT_VAR(utf8, s, strend, auv, datumtype, needs_swap);
1465 else if (checksum > bits_in_uv)
1471 case 'l' | TYPE_IS_SHRIEKING:
1472 #if LONGSIZE != SIZE32
1475 SHIFT_VAR(utf8, s, strend, along, datumtype, needs_swap);
1478 else if (checksum > bits_in_uv)
1479 cdouble += (NV)along;
1490 #if U32SIZE > SIZE32
1493 SHIFT32(utf8, s, strend, &ai32, datumtype, needs_swap);
1494 #if U32SIZE > SIZE32
1495 if (ai32 > 2147483647) ai32 -= 4294967296;
1499 else if (checksum > bits_in_uv)
1500 cdouble += (NV)ai32;
1505 case 'L' | TYPE_IS_SHRIEKING:
1506 #if LONGSIZE != SIZE32
1508 unsigned long aulong;
1509 SHIFT_VAR(utf8, s, strend, aulong, datumtype, needs_swap);
1512 else if (checksum > bits_in_uv)
1513 cdouble += (NV)aulong;
1526 #if U32SIZE > SIZE32
1529 SHIFT32(utf8, s, strend, &au32, datumtype, needs_swap);
1530 if (datumtype == 'N')
1531 au32 = PerlSock_ntohl(au32);
1532 if (datumtype == 'V')
1536 else if (checksum > bits_in_uv)
1537 cdouble += (NV)au32;
1542 case 'V' | TYPE_IS_SHRIEKING:
1543 case 'N' | TYPE_IS_SHRIEKING:
1546 #if U32SIZE > SIZE32
1549 SHIFT32(utf8, s, strend, &ai32, datumtype, needs_swap);
1550 /* There should never be any byte swapping here. */
1551 assert(!TYPE_ENDIANNESS(datumtype));
1552 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1553 ai32 = (I32)PerlSock_ntohl((U32)ai32);
1554 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1555 ai32 = (I32)vtohl((U32)ai32);
1558 else if (checksum > bits_in_uv)
1559 cdouble += (NV)ai32;
1567 SHIFT_VAR(utf8, s, strend, aptr, datumtype, needs_swap);
1568 /* newSVpv generates undef if aptr is NULL */
1569 mPUSHs(newSVpv(aptr, 0));
1577 while (len > 0 && s < strend) {
1579 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1580 auv = (auv << 7) | (ch & 0x7f);
1581 /* UTF8_IS_XXXXX not right here because this is a BER, not
1582 * UTF-8 format - using constant 0x80 */
1590 if (++bytes >= sizeof(UV)) { /* promote to string */
1593 sv = Perl_newSVpvf(aTHX_ "%.*" UVuf,
1594 (int)TYPE_DIGITS(UV), auv);
1595 while (s < strend) {
1596 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1597 sv = mul128(sv, (U8)(ch & 0x7f));
1603 t = SvPV_nolen_const(sv);
1612 if ((s >= strend) && bytes)
1613 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
1617 if (symptr->howlen == e_star)
1618 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
1620 if (s + sizeof(char*) <= strend) {
1622 SHIFT_VAR(utf8, s, strend, aptr, datumtype, needs_swap);
1623 /* newSVpvn generates undef if aptr is NULL */
1624 PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP));
1627 #if defined(HAS_QUAD) && IVSIZE >= 8
1631 SHIFT_VAR(utf8, s, strend, aquad, datumtype, needs_swap);
1633 mPUSHs(newSViv((IV)aquad));
1634 else if (checksum > bits_in_uv)
1635 cdouble += (NV)aquad;
1643 SHIFT_VAR(utf8, s, strend, auquad, datumtype, needs_swap);
1645 mPUSHs(newSVuv((UV)auquad));
1646 else if (checksum > bits_in_uv)
1647 cdouble += (NV)auquad;
1653 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
1657 SHIFT_VAR(utf8, s, strend, afloat, datumtype, needs_swap);
1667 SHIFT_VAR(utf8, s, strend, adouble, datumtype, needs_swap);
1677 SHIFT_BYTES(utf8, s, strend, anv.bytes, sizeof(anv.bytes),
1678 datumtype, needs_swap);
1685 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
1689 SHIFT_BYTES(utf8, s, strend, aldouble.bytes,
1690 sizeof(aldouble.bytes), datumtype, needs_swap);
1691 /* The most common long double format, the x86 80-bit
1692 * extended precision, has either 2 or 6 unused bytes,
1693 * which may contain garbage, which may contain
1694 * unintentional data. While we do zero the bytes of
1695 * the long double data in pack(), here in unpack() we
1696 * don't, because it's really hard to envision that
1697 * reading the long double off aldouble would be
1698 * affected by the unused bytes.
1700 * Note that trying to unpack 'long doubles' of 'long
1701 * doubles' packed in another system is in the general
1702 * case doomed without having more detail. */
1704 mPUSHn(aldouble.ld);
1706 cdouble += aldouble.ld;
1712 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
1713 sv = sv_2mortal(newSV(l));
1714 if (l) SvPOK_on(sv);
1717 /* Note that all legal uuencoded strings are ASCII printables, so
1718 * have the same representation under UTF-8 vs not. This means we
1719 * can ignore UTF8ness on legal input. For illegal we stop at the
1720 * first failure, and don't report where/what that is, so again we
1721 * can ignore UTF8ness */
1723 while (s < strend && *s != ' ' && ISUUCHAR(*s)) {
1727 len = PL_uudmap[*(U8*)s++] & 077;
1729 if (s < strend && ISUUCHAR(*s))
1730 a = PL_uudmap[*(U8*)s++] & 077;
1733 if (s < strend && ISUUCHAR(*s))
1734 b = PL_uudmap[*(U8*)s++] & 077;
1737 if (s < strend && ISUUCHAR(*s))
1738 c = PL_uudmap[*(U8*)s++] & 077;
1741 if (s < strend && ISUUCHAR(*s))
1742 d = PL_uudmap[*(U8*)s++] & 077;
1745 hunk[0] = (char)((a << 2) | (b >> 4));
1746 hunk[1] = (char)((b << 4) | (c >> 2));
1747 hunk[2] = (char)((c << 6) | d);
1749 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
1754 else /* possible checksum byte */
1755 if (s + 1 < strend && s[1] == '\n')
1761 } /* End of switch */
1764 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
1765 (checksum > bits_in_uv &&
1766 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
1769 anv = (NV) (1 << (checksum & 15));
1770 while (checksum >= 16) {
1774 while (cdouble < 0.0)
1776 cdouble = Perl_modf(cdouble / anv, &trouble);
1777 #ifdef LONGDOUBLE_DOUBLEDOUBLE
1778 /* Workaround for powerpc doubledouble modfl bug:
1779 * close to 1.0L and -1.0L cdouble is 0, and trouble
1780 * is cdouble / anv. */
1781 if (trouble != Perl_ceil(trouble)) {
1783 if (cdouble > 1.0L) cdouble -= 1.0L;
1784 if (cdouble < -1.0L) cdouble += 1.0L;
1788 sv = newSVnv(cdouble);
1791 if (checksum < bits_in_uv) {
1792 UV mask = ((UV)1 << checksum) - 1;
1801 if (symptr->flags & FLAG_SLASH){
1802 if (SP - PL_stack_base - start_sp_offset <= 0)
1804 if( next_symbol(symptr) ){
1805 if( symptr->howlen == e_number )
1806 Perl_croak(aTHX_ "Count after length/code in unpack" );
1808 /* ...end of char buffer then no decent length available */
1809 Perl_croak(aTHX_ "length/code after end of string in unpack" );
1811 /* take top of stack (hope it's numeric) */
1814 Perl_croak(aTHX_ "Negative '/' count in unpack" );
1817 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
1819 datumtype = symptr->code;
1820 explicit_length = FALSE;
1828 return SP - PL_stack_base - start_sp_offset;
1838 const char *pat = SvPV_const(left, llen);
1839 const char *s = SvPV_const(right, rlen);
1840 const char *strend = s + rlen;
1841 const char *patend = pat + llen;
1845 cnt = unpackstring(pat, patend, s, strend,
1846 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
1847 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
1850 if ( !cnt && gimme == G_SCALAR )
1851 PUSHs(&PL_sv_undef);
1856 doencodes(U8 *h, const U8 *s, I32 len)
1858 *h++ = PL_uuemap[len];
1860 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1861 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
1862 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
1863 *h++ = PL_uuemap[(077 & (s[2] & 077))];
1868 const U8 r = (len > 1 ? s[1] : '\0');
1869 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1870 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
1871 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
1872 *h++ = PL_uuemap[0];
1879 S_is_an_int(pTHX_ const char *s, STRLEN l)
1881 SV *result = newSVpvn(s, l);
1882 char *const result_c = SvPV_nolen(result); /* convenience */
1883 char *out = result_c;
1887 PERL_ARGS_ASSERT_IS_AN_INT;
1895 SvREFCNT_dec(result);
1918 SvREFCNT_dec(result);
1924 SvCUR_set(result, out - result_c);
1928 /* pnum must be '\0' terminated */
1930 S_div128(pTHX_ SV *pnum, bool *done)
1933 char * const s = SvPV(pnum, len);
1937 PERL_ARGS_ASSERT_DIV128;
1941 const int i = m * 10 + (*t - '0');
1942 const int r = (i >> 7); /* r < 10 */
1950 SvCUR_set(pnum, (STRLEN) (t - s));
1955 =for apidoc packlist
1957 The engine implementing C<pack()> Perl function.
1963 Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, SV **beglist, SV **endlist )
1967 PERL_ARGS_ASSERT_PACKLIST;
1969 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
1971 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
1972 Also make sure any UTF8 flag is loaded */
1973 SvPV_force_nolen(cat);
1975 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
1977 (void)pack_rec( cat, &sym, beglist, endlist );
1980 /* like sv_utf8_upgrade, but also repoint the group start markers */
1982 marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
1985 const char *from_ptr, *from_start, *from_end, **marks, **m;
1986 char *to_start, *to_ptr;
1988 if (SvUTF8(sv)) return;
1990 from_start = SvPVX_const(sv);
1991 from_end = from_start + SvCUR(sv);
1992 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
1993 if (!NATIVE_BYTE_IS_INVARIANT(*from_ptr)) break;
1994 if (from_ptr == from_end) {
1995 /* Simple case: no character needs to be changed */
2000 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
2001 Newx(to_start, len, char);
2002 Copy(from_start, to_start, from_ptr-from_start, char);
2003 to_ptr = to_start + (from_ptr-from_start);
2005 Newx(marks, sym_ptr->level+2, const char *);
2006 for (group=sym_ptr; group; group = group->previous)
2007 marks[group->level] = from_start + group->strbeg;
2008 marks[sym_ptr->level+1] = from_end+1;
2009 for (m = marks; *m < from_ptr; m++)
2010 *m = to_start + (*m-from_start);
2012 for (;from_ptr < from_end; from_ptr++) {
2013 while (*m == from_ptr) *m++ = to_ptr;
2014 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
2018 while (*m == from_ptr) *m++ = to_ptr;
2019 if (m != marks + sym_ptr->level+1) {
2022 Perl_croak(aTHX_ "panic: marks beyond string end, m=%p, marks=%p, "
2023 "level=%d", m, marks, sym_ptr->level);
2025 for (group=sym_ptr; group; group = group->previous)
2026 group->strbeg = marks[group->level] - to_start;
2031 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
2032 from_start -= SvIVX(sv);
2035 SvFLAGS(sv) &= ~SVf_OOK;
2038 Safefree(from_start);
2039 SvPV_set(sv, to_start);
2040 SvCUR_set(sv, to_ptr - to_start);
2045 /* Exponential string grower. Makes string extension effectively O(n)
2046 needed says how many extra bytes we need (not counting the final '\0')
2047 Only grows the string if there is an actual lack of space
2050 S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
2051 const STRLEN cur = SvCUR(sv);
2052 const STRLEN len = SvLEN(sv);
2055 PERL_ARGS_ASSERT_SV_EXP_GROW;
2057 if (len - cur > needed) return SvPVX(sv);
2058 extend = needed > len ? needed : len;
2059 return SvGROW(sv, len+extend+1);
2063 S_sv_check_infnan(pTHX_ SV *sv, I32 datumtype)
2066 if (UNLIKELY(SvAMAGIC(sv)))
2068 if (UNLIKELY(isinfnansv(sv))) {
2069 const I32 c = TYPE_NO_MODIFIERS(datumtype);
2070 const NV nv = SvNV_nomg(sv);
2072 Perl_croak(aTHX_ "Cannot compress %" NVgf " in pack", nv);
2074 Perl_croak(aTHX_ "Cannot pack %" NVgf " with '%c'", nv, (int) c);
2079 #define SvIV_no_inf(sv,d) \
2080 ((sv) = S_sv_check_infnan(aTHX_ sv,d), SvIV_nomg(sv))
2081 #define SvUV_no_inf(sv,d) \
2082 ((sv) = S_sv_check_infnan(aTHX_ sv,d), SvUV_nomg(sv))
2086 S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
2088 tempsym_t lookahead;
2089 I32 items = endlist - beglist;
2090 bool found = next_symbol(symptr);
2091 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
2092 bool warn_utf8 = ckWARN(WARN_UTF8);
2095 PERL_ARGS_ASSERT_PACK_REC;
2097 if (symptr->level == 0 && found && symptr->code == 'U') {
2098 marked_upgrade(aTHX_ cat, symptr);
2099 symptr->flags |= FLAG_DO_UTF8;
2102 symptr->strbeg = SvCUR(cat);
2108 SV *lengthcode = NULL;
2109 I32 datumtype = symptr->code;
2110 howlen_t howlen = symptr->howlen;
2111 char *start = SvPVX(cat);
2112 char *cur = start + SvCUR(cat);
2115 #define NEXTFROM (lengthcode ? lengthcode : items > 0 ? (--items, *beglist++) : &PL_sv_no)
2116 #define PEEKFROM (lengthcode ? lengthcode : items > 0 ? *beglist : &PL_sv_no)
2120 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2124 /* e_no_len and e_number */
2125 len = symptr->length;
2130 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
2132 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2133 /* We can process this letter. */
2134 STRLEN size = props & PACK_SIZE_MASK;
2135 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2139 /* Look ahead for next symbol. Do we have code/code? */
2140 lookahead = *symptr;
2141 found = next_symbol(&lookahead);
2142 if (symptr->flags & FLAG_SLASH) {
2144 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
2145 if (strchr("aAZ", lookahead.code)) {
2146 if (lookahead.howlen == e_number) count = lookahead.length;
2149 count = sv_len_utf8(*beglist);
2152 if (lookahead.code == 'Z') count++;
2155 if (lookahead.howlen == e_number && lookahead.length < items)
2156 count = lookahead.length;
2159 lookahead.howlen = e_number;
2160 lookahead.length = count;
2161 lengthcode = sv_2mortal(newSViv(count));
2164 needs_swap = NEEDS_SWAP(datumtype);
2166 /* Code inside the switch must take care to properly update
2167 cat (CUR length and '\0' termination) if it updated *cur and
2168 doesn't simply leave using break */
2169 switch (TYPE_NO_ENDIANNESS(datumtype)) {
2171 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2172 (int) TYPE_NO_MODIFIERS(datumtype));
2174 Perl_croak(aTHX_ "'%%' may not be used in pack");
2176 case '.' | TYPE_IS_SHRIEKING:
2178 if (howlen == e_star) from = start;
2179 else if (len == 0) from = cur;
2181 tempsym_t *group = symptr;
2183 while (--len && group) group = group->previous;
2184 from = group ? start + group->strbeg : start;
2187 len = SvIV_no_inf(fromstr, datumtype);
2189 case '@' | TYPE_IS_SHRIEKING:
2191 from = start + symptr->strbeg;
2193 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
2195 while (len && from < cur) {
2196 from += UTF8SKIP(from);
2200 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2202 /* Here we know from == cur */
2204 GROWING(0, cat, start, cur, len);
2205 Zero(cur, len, char);
2207 } else if (from < cur) {
2210 } else goto no_change;
2218 if (len > 0) goto grow;
2219 if (len == 0) goto no_change;
2226 tempsym_t savsym = *symptr;
2227 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2228 symptr->flags |= group_modifiers;
2229 symptr->patend = savsym.grpend;
2231 symptr->previous = &lookahead;
2234 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2235 else symptr->flags &= ~FLAG_PARSE_UTF8;
2236 was_utf8 = SvUTF8(cat);
2237 symptr->patptr = savsym.grpbeg;
2238 beglist = pack_rec(cat, symptr, beglist, endlist);
2239 if (SvUTF8(cat) != was_utf8)
2240 /* This had better be an upgrade while in utf8==0 mode */
2243 if (savsym.howlen == e_star && beglist == endlist)
2244 break; /* No way to continue */
2246 items = endlist - beglist;
2247 lookahead.flags = symptr->flags & ~group_modifiers;
2250 case 'X' | TYPE_IS_SHRIEKING:
2251 if (!len) /* Avoid division by 0 */
2258 hop += UTF8SKIP(hop);
2265 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2269 len = (cur-start) % len;
2273 if (len < 1) goto no_change;
2277 Perl_croak(aTHX_ "'%c' outside of string in pack",
2278 (int) TYPE_NO_MODIFIERS(datumtype));
2279 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2281 Perl_croak(aTHX_ "'%c' outside of string in pack",
2282 (int) TYPE_NO_MODIFIERS(datumtype));
2288 if (cur - start < len)
2289 Perl_croak(aTHX_ "'%c' outside of string in pack",
2290 (int) TYPE_NO_MODIFIERS(datumtype));
2293 if (cur < start+symptr->strbeg) {
2294 /* Make sure group starts don't point into the void */
2296 const STRLEN length = cur-start;
2297 for (group = symptr;
2298 group && length < group->strbeg;
2299 group = group->previous) group->strbeg = length;
2300 lookahead.strbeg = length;
2303 case 'x' | TYPE_IS_SHRIEKING: {
2305 if (!len) /* Avoid division by 0 */
2307 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
2308 else ai32 = (cur - start) % len;
2309 if (ai32 == 0) goto no_change;
2321 aptr = SvPV_const(fromstr, fromlen);
2322 if (DO_UTF8(fromstr)) {
2323 const char *end, *s;
2325 if (!utf8 && !SvUTF8(cat)) {
2326 marked_upgrade(aTHX_ cat, symptr);
2327 lookahead.flags |= FLAG_DO_UTF8;
2328 lookahead.strbeg = symptr->strbeg;
2331 cur = start + SvCUR(cat);
2333 if (howlen == e_star) {
2334 if (utf8) goto string_copy;
2338 end = aptr + fromlen;
2339 fromlen = datumtype == 'Z' ? len-1 : len;
2340 while ((I32) fromlen > 0 && s < end) {
2345 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2348 if (datumtype == 'Z') len++;
2354 fromlen = len - fromlen;
2355 if (datumtype == 'Z') fromlen--;
2356 if (howlen == e_star) {
2358 if (datumtype == 'Z') len++;
2360 GROWING(0, cat, start, cur, len);
2361 if (!S_utf8_to_bytes(aTHX_ &aptr, end, cur, fromlen,
2362 datumtype | TYPE_IS_PACK))
2363 Perl_croak(aTHX_ "panic: predicted utf8 length not available, "
2364 "for '%c', aptr=%p end=%p cur=%p, fromlen=%" UVuf,
2365 (int)datumtype, aptr, end, cur, (UV)fromlen);
2369 if (howlen == e_star) {
2371 if (datumtype == 'Z') len++;
2373 if (len <= (I32) fromlen) {
2375 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2377 /* assumes a byte expands to at most UTF8_EXPAND bytes on
2379 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2380 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
2382 while (fromlen > 0) {
2383 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
2389 if (howlen == e_star) {
2391 if (datumtype == 'Z') len++;
2393 if (len <= (I32) fromlen) {
2395 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2397 GROWING(0, cat, start, cur, len);
2398 Copy(aptr, cur, fromlen, char);
2402 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2409 const char *str, *end;
2416 str = SvPV_const(fromstr, fromlen);
2417 end = str + fromlen;
2418 if (DO_UTF8(fromstr)) {
2420 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2422 utf8_source = FALSE;
2423 utf8_flags = 0; /* Unused, but keep compilers happy */
2425 if (howlen == e_star) len = fromlen;
2426 field_len = (len+7)/8;
2427 GROWING(utf8, cat, start, cur, field_len);
2428 if (len > (I32)fromlen) len = fromlen;
2431 if (datumtype == 'B')
2435 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2437 } else bits |= *str++ & 1;
2438 if (l & 7) bits <<= 1;
2440 PUSH_BYTE(utf8, cur, bits);
2445 /* datumtype == 'b' */
2449 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2450 if (val & 1) bits |= 0x80;
2451 } else if (*str++ & 1)
2453 if (l & 7) bits >>= 1;
2455 PUSH_BYTE(utf8, cur, bits);
2461 if (datumtype == 'B')
2462 bits <<= 7 - (l & 7);
2464 bits >>= 7 - (l & 7);
2465 PUSH_BYTE(utf8, cur, bits);
2468 /* Determine how many chars are left in the requested field */
2470 if (howlen == e_star) field_len = 0;
2471 else field_len -= l;
2472 Zero(cur, field_len, char);
2478 const char *str, *end;
2485 str = SvPV_const(fromstr, fromlen);
2486 end = str + fromlen;
2487 if (DO_UTF8(fromstr)) {
2489 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2491 utf8_source = FALSE;
2492 utf8_flags = 0; /* Unused, but keep compilers happy */
2494 if (howlen == e_star) len = fromlen;
2495 field_len = (len+1)/2;
2496 GROWING(utf8, cat, start, cur, field_len);
2497 if (!utf8_source && len > (I32)fromlen) len = fromlen;
2500 if (datumtype == 'H')
2504 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2505 if (val < 256 && isALPHA(val))
2506 bits |= (val + 9) & 0xf;
2509 } else if (isALPHA(*str))
2510 bits |= (*str++ + 9) & 0xf;
2512 bits |= *str++ & 0xf;
2513 if (l & 1) bits <<= 4;
2515 PUSH_BYTE(utf8, cur, bits);
2523 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2524 if (val < 256 && isALPHA(val))
2525 bits |= ((val + 9) & 0xf) << 4;
2527 bits |= (val & 0xf) << 4;
2528 } else if (isALPHA(*str))
2529 bits |= ((*str++ + 9) & 0xf) << 4;
2531 bits |= (*str++ & 0xf) << 4;
2532 if (l & 1) bits >>= 4;
2534 PUSH_BYTE(utf8, cur, bits);
2540 PUSH_BYTE(utf8, cur, bits);
2543 /* Determine how many chars are left in the requested field */
2545 if (howlen == e_star) field_len = 0;
2546 else field_len -= l;
2547 Zero(cur, field_len, char);
2555 aiv = SvIV_no_inf(fromstr, datumtype);
2556 if ((-128 > aiv || aiv > 127))
2557 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2558 "Character in 'c' format wrapped in pack");
2559 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
2564 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2570 aiv = SvIV_no_inf(fromstr, datumtype);
2571 if ((0 > aiv || aiv > 0xff))
2572 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2573 "Character in 'C' format wrapped in pack");
2574 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
2579 U8 in_bytes = (U8)IN_BYTES;
2581 end = start+SvLEN(cat)-1;
2582 if (utf8) end -= UTF8_MAXLEN-1;
2586 auv = SvUV_no_inf(fromstr, datumtype);
2587 if (in_bytes) auv = auv % 0x100;
2592 SvCUR_set(cat, cur - start);
2594 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2595 end = start+SvLEN(cat)-UTF8_MAXLEN;
2597 cur = (char *) uvchr_to_utf8_flags((U8 *) cur,
2600 0 : UNICODE_ALLOW_ANY);
2605 SvCUR_set(cat, cur - start);
2606 marked_upgrade(aTHX_ cat, symptr);
2607 lookahead.flags |= FLAG_DO_UTF8;
2608 lookahead.strbeg = symptr->strbeg;
2611 cur = start + SvCUR(cat);
2612 end = start+SvLEN(cat)-UTF8_MAXLEN;
2615 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2616 "Character in 'W' format wrapped in pack");
2621 SvCUR_set(cat, cur - start);
2622 GROWING(0, cat, start, cur, len+1);
2623 end = start+SvLEN(cat)-1;
2625 *(U8 *) cur++ = (U8)auv;
2634 if (!(symptr->flags & FLAG_DO_UTF8)) {
2635 marked_upgrade(aTHX_ cat, symptr);
2636 lookahead.flags |= FLAG_DO_UTF8;
2637 lookahead.strbeg = symptr->strbeg;
2643 end = start+SvLEN(cat);
2644 if (!utf8) end -= UTF8_MAXLEN;
2648 auv = SvUV_no_inf(fromstr, datumtype);
2650 U8 buffer[UTF8_MAXLEN], *endb;
2651 endb = uvchr_to_utf8_flags(buffer, UNI_TO_NATIVE(auv),
2653 0 : UNICODE_ALLOW_ANY);
2654 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
2656 SvCUR_set(cat, cur - start);
2657 GROWING(0, cat, start, cur,
2658 len+(endb-buffer)*UTF8_EXPAND);
2659 end = start+SvLEN(cat);
2661 cur = my_bytes_to_utf8(buffer, endb-buffer, cur, 0);
2665 SvCUR_set(cat, cur - start);
2666 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2667 end = start+SvLEN(cat)-UTF8_MAXLEN;
2669 cur = (char *) uvchr_to_utf8_flags((U8 *) cur, UNI_TO_NATIVE(auv),
2671 0 : UNICODE_ALLOW_ANY);
2676 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
2682 anv = SvNV(fromstr);
2683 # if (defined(VMS) && !defined(_IEEE_FP)) || defined(DOUBLE_IS_VAX_FLOAT)
2684 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
2685 * on Alpha; fake it if we don't have them.
2689 else if (anv < -FLT_MAX)
2691 else afloat = (float)anv;
2693 # if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
2695 afloat = (float)NV_NAN;
2699 /* a simple cast to float is undefined if outside
2700 * the range of values that can be represented */
2701 afloat = (float)(anv > FLT_MAX ? NV_INF :
2702 anv < -FLT_MAX ? -NV_INF : anv);
2705 PUSH_VAR(utf8, cur, afloat, needs_swap);
2713 anv = SvNV(fromstr);
2714 # if (defined(VMS) && !defined(_IEEE_FP)) || defined(DOUBLE_IS_VAX_FLOAT)
2715 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
2716 * on Alpha; fake it if we don't have them.
2720 else if (anv < -DBL_MAX)
2722 else adouble = (double)anv;
2724 adouble = (double)anv;
2726 PUSH_VAR(utf8, cur, adouble, needs_swap);
2731 Zero(&anv, 1, NV); /* can be long double with unused bits */
2735 /* to work round a gcc/x86 bug; don't use SvNV */
2736 anv.nv = sv_2nv(fromstr);
2737 # if defined(LONGDOUBLE_X86_80_BIT) && defined(USE_LONG_DOUBLE) \
2738 && LONG_DOUBLESIZE > 10
2739 /* GCC sometimes overwrites the padding in the
2741 Zero(anv.bytes+10, sizeof(anv.bytes) - 10, U8);
2744 anv.nv = SvNV(fromstr);
2746 PUSH_BYTES(utf8, cur, anv.bytes, sizeof(anv.bytes), needs_swap);
2750 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2753 /* long doubles can have unused bits, which may be nonzero */
2754 Zero(&aldouble, 1, long double);
2758 /* to work round a gcc/x86 bug; don't use SvNV */
2759 aldouble.ld = (long double)sv_2nv(fromstr);
2760 # if defined(LONGDOUBLE_X86_80_BIT) && LONG_DOUBLESIZE > 10
2761 /* GCC sometimes overwrites the padding in the
2763 Zero(aldouble.bytes+10, sizeof(aldouble.bytes) - 10, U8);
2766 aldouble.ld = (long double)SvNV(fromstr);
2768 PUSH_BYTES(utf8, cur, aldouble.bytes, sizeof(aldouble.bytes),
2774 case 'n' | TYPE_IS_SHRIEKING:
2779 ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
2780 ai16 = PerlSock_htons(ai16);
2781 PUSH16(utf8, cur, &ai16, FALSE);
2784 case 'v' | TYPE_IS_SHRIEKING:
2789 ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
2791 PUSH16(utf8, cur, &ai16, FALSE);
2794 case 'S' | TYPE_IS_SHRIEKING:
2795 #if SHORTSIZE != SIZE16
2797 unsigned short aushort;
2799 aushort = SvUV_no_inf(fromstr, datumtype);
2800 PUSH_VAR(utf8, cur, aushort, needs_swap);
2810 au16 = (U16)SvUV_no_inf(fromstr, datumtype);
2811 PUSH16(utf8, cur, &au16, needs_swap);
2814 case 's' | TYPE_IS_SHRIEKING:
2815 #if SHORTSIZE != SIZE16
2819 ashort = SvIV_no_inf(fromstr, datumtype);
2820 PUSH_VAR(utf8, cur, ashort, needs_swap);
2830 ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
2831 PUSH16(utf8, cur, &ai16, needs_swap);
2835 case 'I' | TYPE_IS_SHRIEKING:
2839 auint = SvUV_no_inf(fromstr, datumtype);
2840 PUSH_VAR(utf8, cur, auint, needs_swap);
2847 aiv = SvIV_no_inf(fromstr, datumtype);
2848 PUSH_VAR(utf8, cur, aiv, needs_swap);
2855 auv = SvUV_no_inf(fromstr, datumtype);
2856 PUSH_VAR(utf8, cur, auv, needs_swap);
2863 S_sv_check_infnan(aTHX_ fromstr, datumtype);
2864 anv = SvNV_nomg(fromstr);
2868 SvCUR_set(cat, cur - start);
2869 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
2872 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
2873 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
2874 any negative IVs will have already been got by the croak()
2875 above. IOK is untrue for fractions, so we test them
2876 against UV_MAX_P1. */
2877 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
2878 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
2879 char *in = buf + sizeof(buf);
2880 UV auv = SvUV_nomg(fromstr);
2883 *--in = (char)((auv & 0x7f) | 0x80);
2886 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
2887 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2888 in, (buf + sizeof(buf)) - in);
2889 } else if (SvPOKp(fromstr))
2891 else if (SvNOKp(fromstr)) {
2892 /* 10**NV_MAX_10_EXP is the largest power of 10
2893 so 10**(NV_MAX_10_EXP+1) is definitely unrepresentable
2894 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
2895 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
2896 And with that many bytes only Inf can overflow.
2897 Some C compilers are strict about integral constant
2898 expressions so we conservatively divide by a slightly
2899 smaller integer instead of multiplying by the exact
2900 floating-point value.
2902 #ifdef NV_MAX_10_EXP
2903 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
2904 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
2906 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
2907 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
2909 char *in = buf + sizeof(buf);
2911 anv = Perl_floor(anv);
2913 const NV next = Perl_floor(anv / 128);
2914 if (in <= buf) /* this cannot happen ;-) */
2915 Perl_croak(aTHX_ "Cannot compress integer in pack");
2916 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
2919 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
2920 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2921 in, (buf + sizeof(buf)) - in);
2930 /* Copy string and check for compliance */
2931 from = SvPV_nomg_const(fromstr, len);
2932 if ((norm = is_an_int(from, len)) == NULL)
2933 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
2935 Newx(result, len, char);
2938 while (!done) *--in = div128(norm, &done) | 0x80;
2939 result[len - 1] &= 0x7F; /* clear continue bit */
2940 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2941 in, (result + len) - in);
2943 SvREFCNT_dec(norm); /* free norm */
2948 case 'i' | TYPE_IS_SHRIEKING:
2952 aint = SvIV_no_inf(fromstr, datumtype);
2953 PUSH_VAR(utf8, cur, aint, needs_swap);
2956 case 'N' | TYPE_IS_SHRIEKING:
2961 au32 = SvUV_no_inf(fromstr, datumtype);
2962 au32 = PerlSock_htonl(au32);
2963 PUSH32(utf8, cur, &au32, FALSE);
2966 case 'V' | TYPE_IS_SHRIEKING:
2971 au32 = SvUV_no_inf(fromstr, datumtype);
2973 PUSH32(utf8, cur, &au32, FALSE);
2976 case 'L' | TYPE_IS_SHRIEKING:
2977 #if LONGSIZE != SIZE32
2979 unsigned long aulong;
2981 aulong = SvUV_no_inf(fromstr, datumtype);
2982 PUSH_VAR(utf8, cur, aulong, needs_swap);
2992 au32 = SvUV_no_inf(fromstr, datumtype);
2993 PUSH32(utf8, cur, &au32, needs_swap);
2996 case 'l' | TYPE_IS_SHRIEKING:
2997 #if LONGSIZE != SIZE32
3001 along = SvIV_no_inf(fromstr, datumtype);
3002 PUSH_VAR(utf8, cur, along, needs_swap);
3012 ai32 = SvIV_no_inf(fromstr, datumtype);
3013 PUSH32(utf8, cur, &ai32, needs_swap);
3016 #if defined(HAS_QUAD) && IVSIZE >= 8
3021 auquad = (Uquad_t) SvUV_no_inf(fromstr, datumtype);
3022 PUSH_VAR(utf8, cur, auquad, needs_swap);
3029 aquad = (Quad_t)SvIV_no_inf(fromstr, datumtype);
3030 PUSH_VAR(utf8, cur, aquad, needs_swap);
3035 len = 1; /* assume SV is correct length */
3036 GROWING(utf8, cat, start, cur, sizeof(char *));
3043 SvGETMAGIC(fromstr);
3044 if (!SvOK(fromstr)) aptr = NULL;
3046 /* XXX better yet, could spirit away the string to
3047 * a safe spot and hang on to it until the result
3048 * of pack() (and all copies of the result) are
3051 if (((SvTEMP(fromstr) && SvREFCNT(fromstr) == 1)
3052 || (SvPADTMP(fromstr) &&
3053 !SvREADONLY(fromstr)))) {
3054 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3055 "Attempt to pack pointer to temporary value");
3057 if (SvPOK(fromstr) || SvNIOK(fromstr))
3058 aptr = SvPV_nomg_const_nolen(fromstr);
3060 aptr = SvPV_force_flags_nolen(fromstr, 0);
3062 PUSH_VAR(utf8, cur, aptr, needs_swap);
3066 const char *aptr, *aend;
3070 if (len <= 2) len = 45;
3071 else len = len / 3 * 3;
3073 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3074 "Field too wide in 'u' format in pack");
3077 aptr = SvPV_const(fromstr, fromlen);
3078 from_utf8 = DO_UTF8(fromstr);
3080 aend = aptr + fromlen;
3081 fromlen = sv_len_utf8_nomg(fromstr);
3082 } else aend = NULL; /* Unused, but keep compilers happy */
3083 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
3084 while (fromlen > 0) {
3087 U8 hunk[1+63/3*4+1];
3089 if ((I32)fromlen > len)
3095 if (!S_utf8_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3096 'u' | TYPE_IS_PACK)) {
3098 SvCUR_set(cat, cur - start);
3099 Perl_croak(aTHX_ "panic: string is shorter than advertised, "
3100 "aptr=%p, aend=%p, buffer=%p, todo=%ld",
3101 aptr, aend, buffer, (long) todo);
3103 end = doencodes(hunk, (const U8 *)buffer, todo);
3105 end = doencodes(hunk, (const U8 *)aptr, todo);
3108 PUSH_BYTES(utf8, cur, hunk, end-hunk, 0);
3115 SvCUR_set(cat, cur - start);
3117 *symptr = lookahead;
3126 dSP; dMARK; dORIGMARK; dTARGET;
3129 SV *pat_sv = *++MARK;
3130 const char *pat = SvPV_const(pat_sv, fromlen);
3131 const char *patend = pat + fromlen;
3137 packlist(cat, pat, patend, MARK, SP + 1);
3146 * ex: set ts=8 sts=4 sw=4 et: