3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
18 * [p.653 of _The Lord of the Rings_, IV/iv: "Of Herbs and Stewed Rabbit"]
21 /* This file contains pp ("push/pop") functions that
22 * execute the opcodes that make up a perl program. A typical pp function
23 * expects to find its arguments on the stack, and usually pushes its
24 * results onto the stack, hence the 'pp' terminology. Each OP structure
25 * contains a pointer to the relevant pp_foo() function.
27 * This particular file just contains pp_pack() and pp_unpack(). See the
28 * other pp*.c files for the rest of the pp_ functions.
32 #define PERL_IN_PP_PACK_C
35 /* Types used by pack/unpack */
37 e_no_len, /* no length */
38 e_number, /* number, [] */
42 typedef struct tempsym {
43 const char* patptr; /* current template char */
44 const char* patend; /* one after last char */
45 const char* grpbeg; /* 1st char of ()-group */
46 const char* grpend; /* end of ()-group */
47 I32 code; /* template code (!<>) */
48 I32 length; /* length/repeat count */
49 howlen_t howlen; /* how length is given */
50 int level; /* () nesting level */
51 U32 flags; /* /=4, comma=2, pack=1 */
52 /* and group modifiers */
53 STRLEN strbeg; /* offset of group start */
54 struct tempsym *previous; /* previous group */
57 #define TEMPSYM_INIT(symptr, p, e, f) \
59 (symptr)->patptr = (p); \
60 (symptr)->patend = (e); \
61 (symptr)->grpbeg = NULL; \
62 (symptr)->grpend = NULL; \
63 (symptr)->grpend = NULL; \
65 (symptr)->length = 0; \
66 (symptr)->howlen = e_no_len; \
67 (symptr)->level = 0; \
68 (symptr)->flags = (f); \
69 (symptr)->strbeg = 0; \
70 (symptr)->previous = NULL; \
78 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
81 U8 bytes[sizeof(long double)];
88 /* Maximum number of bytes to which a byte can grow due to upgrade */
92 * Offset for integer pack/unpack.
94 * On architectures where I16 and I32 aren't really 16 and 32 bits,
95 * which for now are all Crays, pack and unpack have to play games.
99 * These values are required for portability of pack() output.
100 * If they're not right on your machine, then pack() and unpack()
101 * wouldn't work right anyway; you'll need to apply the Cray hack.
102 * (I'd like to check them with #if, but you can't use sizeof() in
103 * the preprocessor.) --???
106 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
107 defines are now in config.h. --Andy Dougherty April 1998
112 /* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
115 #if U16SIZE > SIZE16 || U32SIZE > SIZE32
116 # if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
117 # define OFF16(p) ((char*)(p))
118 # define OFF32(p) ((char*)(p))
120 # if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
121 # define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
122 # define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
124 ++++ bad cray byte order
128 # define OFF16(p) ((char *) (p))
129 # define OFF32(p) ((char *) (p))
132 /* Only to be used inside a loop (see the break) */
133 #define SHIFT16(utf8, s, strend, p, datumtype) STMT_START { \
135 if (!uni_to_bytes(aTHX_ &(s), strend, OFF16(p), SIZE16, datumtype)) break; \
137 Copy(s, OFF16(p), SIZE16, char); \
142 /* Only to be used inside a loop (see the break) */
143 #define SHIFT32(utf8, s, strend, p, datumtype) STMT_START { \
145 if (!uni_to_bytes(aTHX_ &(s), strend, OFF32(p), SIZE32, datumtype)) break; \
147 Copy(s, OFF32(p), SIZE32, char); \
152 #define PUSH16(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF16(p), SIZE16)
153 #define PUSH32(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF32(p), SIZE32)
155 #if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
156 # define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_LITTLE_ENDIAN)
157 #elif BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
158 # define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_BIG_ENDIAN)
160 # error "Unsupported byteorder"
161 /* Need to add code here to re-instate mixed endian support. */
164 /* Only to be used inside a loop (see the break) */
165 #define SHIFT_BYTES(utf8, s, strend, buf, len, datumtype) \
168 if (!uni_to_bytes(aTHX_ &s, strend, \
169 (char *) (buf), len, datumtype)) break; \
171 Copy(s, (char *) (buf), len, char); \
176 #define SHIFT_VAR(utf8, s, strend, var, datumtype) \
177 SHIFT_BYTES(utf8, s, strend, &(var), sizeof(var), datumtype)
179 #define PUSH_VAR(utf8, aptr, var) \
180 PUSH_BYTES(utf8, aptr, &(var), sizeof(var))
182 /* Avoid stack overflow due to pathological templates. 100 should be plenty. */
183 #define MAX_SUB_TEMPLATE_LEVEL 100
185 /* flags (note that type modifiers can also be used as flags!) */
186 #define FLAG_WAS_UTF8 0x40
187 #define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
188 #define FLAG_UNPACK_ONLY_ONE 0x10
189 #define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
190 #define FLAG_SLASH 0x04
191 #define FLAG_COMMA 0x02
192 #define FLAG_PACK 0x01
195 S_mul128(pTHX_ SV *sv, U8 m)
198 char *s = SvPV(sv, len);
201 PERL_ARGS_ASSERT_MUL128;
203 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
204 SV * const tmpNew = newSVpvs("0000000000");
206 sv_catsv(tmpNew, sv);
207 SvREFCNT_dec(sv); /* free old sv */
212 while (!*t) /* trailing '\0'? */
215 const U32 i = ((*t - '0') << 7) + m;
216 *(t--) = '0' + (char)(i % 10);
222 /* Explosives and implosives. */
224 #if 'I' == 73 && 'J' == 74
225 /* On an ASCII/ISO kind of system */
226 #define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
229 Some other sort of character set - use memchr() so we don't match
232 #define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
236 #define TYPE_IS_SHRIEKING 0x100
237 #define TYPE_IS_BIG_ENDIAN 0x200
238 #define TYPE_IS_LITTLE_ENDIAN 0x400
239 #define TYPE_IS_PACK 0x800
240 #define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
241 #define TYPE_MODIFIERS(t) ((t) & ~0xFF)
242 #define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
244 # define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
245 # define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
247 # define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
249 # define DO_BO_UNPACK(var, type) \
252 my_swabn(&var, sizeof(var)); \
256 # define DO_BO_PACK(var, type) \
259 my_swabn(&var, sizeof(var)); \
263 #define PACK_SIZE_CANNOT_CSUM 0x80
264 #define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
265 #define PACK_SIZE_MASK 0x3F
267 #include "packsizetables.c"
270 uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
273 UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
274 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
275 /* We try to process malformed UTF-8 as much as possible (preferably with
276 warnings), but these two mean we make no progress in the string and
277 might enter an infinite loop */
278 if (retlen == (STRLEN) -1 || retlen == 0)
279 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
280 (int) TYPE_NO_MODIFIERS(datumtype));
282 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
283 "Character in '%c' format wrapped in unpack",
284 (int) TYPE_NO_MODIFIERS(datumtype));
291 #define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
292 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
296 uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
300 const char *from = *s;
302 const U32 flags = ckWARN(WARN_UTF8) ?
303 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
304 for (;buf_len > 0; buf_len--) {
305 if (from >= end) return FALSE;
306 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
307 if (retlen == (STRLEN) -1 || retlen == 0) {
308 from += UTF8SKIP(from);
310 } else from += retlen;
315 *(U8 *)buf++ = (U8)val;
317 /* We have enough characters for the buffer. Did we have problems ? */
320 /* Rewalk the string fragment while warning */
322 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
323 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
324 if (ptr >= end) break;
325 utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
327 if (from > end) from = end;
330 Perl_ck_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
331 WARN_PACK : WARN_UNPACK),
332 "Character(s) in '%c' format wrapped in %s",
333 (int) TYPE_NO_MODIFIERS(datumtype),
334 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
341 next_uni_uu(pTHX_ const char **s, const char *end, I32 *out)
345 const UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
346 if (val >= 0x100 || !ISUUCHAR(val) ||
347 retlen == (STRLEN) -1 || retlen == 0) {
351 *out = PL_uudmap[val] & 077;
357 S_bytes_to_uni(const U8 *start, STRLEN len, char *dest) {
358 const U8 * const end = start + len;
360 PERL_ARGS_ASSERT_BYTES_TO_UNI;
362 while (start < end) {
363 const UV uv = NATIVE_TO_ASCII(*start);
364 if (UNI_IS_INVARIANT(uv))
365 *dest++ = (char)(U8)UTF_TO_NATIVE(uv);
367 *dest++ = (char)(U8)UTF8_EIGHT_BIT_HI(uv);
368 *dest++ = (char)(U8)UTF8_EIGHT_BIT_LO(uv);
375 #define PUSH_BYTES(utf8, cur, buf, len) \
378 (cur) = bytes_to_uni((U8 *) buf, len, (cur)); \
380 Copy(buf, cur, len, char); \
385 #define GROWING(utf8, cat, start, cur, in_len) \
387 STRLEN glen = (in_len); \
388 if (utf8) glen *= UTF8_EXPAND; \
389 if ((cur) + glen >= (start) + SvLEN(cat)) { \
390 (start) = sv_exp_grow(cat, glen); \
391 (cur) = (start) + SvCUR(cat); \
395 #define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
397 const STRLEN glen = (in_len); \
399 if (utf8) gl *= UTF8_EXPAND; \
400 if ((cur) + gl >= (start) + SvLEN(cat)) { \
402 SvCUR_set((cat), (cur) - (start)); \
403 (start) = sv_exp_grow(cat, gl); \
404 (cur) = (start) + SvCUR(cat); \
406 PUSH_BYTES(utf8, cur, buf, glen); \
409 #define PUSH_BYTE(utf8, s, byte) \
412 const U8 au8 = (byte); \
413 (s) = bytes_to_uni(&au8, 1, (s)); \
414 } else *(U8 *)(s)++ = (byte); \
417 /* Only to be used inside a loop (see the break) */
418 #define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
421 if (str >= end) break; \
422 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
423 if (retlen == (STRLEN) -1 || retlen == 0) { \
425 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
430 static const char *_action( const tempsym_t* symptr )
432 return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
435 /* Returns the sizeof() struct described by pat */
437 S_measure_struct(pTHX_ tempsym_t* symptr)
441 PERL_ARGS_ASSERT_MEASURE_STRUCT;
443 while (next_symbol(symptr)) {
447 switch (symptr->howlen) {
449 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
453 /* e_no_len and e_number */
454 len = symptr->length;
458 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
461 /* endianness doesn't influence the size of a type */
462 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
464 Perl_croak(aTHX_ "Invalid type '%c' in %s",
465 (int)TYPE_NO_MODIFIERS(symptr->code),
467 case '.' | TYPE_IS_SHRIEKING:
468 case '@' | TYPE_IS_SHRIEKING:
472 case 'U': /* XXXX Is it correct? */
475 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
476 (int) TYPE_NO_MODIFIERS(symptr->code),
483 tempsym_t savsym = *symptr;
484 symptr->patptr = savsym.grpbeg;
485 symptr->patend = savsym.grpend;
486 /* XXXX Theoretically, we need to measure many times at
487 different positions, since the subexpression may contain
488 alignment commands, but be not of aligned length.
489 Need to detect this and croak(). */
490 size = measure_struct(symptr);
494 case 'X' | TYPE_IS_SHRIEKING:
495 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
497 if (!len) /* Avoid division by 0 */
499 len = total % len; /* Assumed: the start is aligned. */
504 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
506 case 'x' | TYPE_IS_SHRIEKING:
507 if (!len) /* Avoid division by 0 */
509 star = total % len; /* Assumed: the start is aligned. */
510 if (star) /* Other portable ways? */
534 size = sizeof(char*);
544 /* locate matching closing parenthesis or bracket
545 * returns char pointer to char after match, or NULL
548 S_group_end(pTHX_ const char *patptr, const char *patend, char ender)
550 PERL_ARGS_ASSERT_GROUP_END;
552 while (patptr < patend) {
553 const char c = *patptr++;
560 while (patptr < patend && *patptr != '\n')
564 patptr = group_end(patptr, patend, ')') + 1;
566 patptr = group_end(patptr, patend, ']') + 1;
568 Perl_croak(aTHX_ "No group ending character '%c' found in template",
574 /* Convert unsigned decimal number to binary.
575 * Expects a pointer to the first digit and address of length variable
576 * Advances char pointer to 1st non-digit char and returns number
579 S_get_num(pTHX_ const char *patptr, I32 *lenptr )
581 I32 len = *patptr++ - '0';
583 PERL_ARGS_ASSERT_GET_NUM;
585 while (isDIGIT(*patptr)) {
586 if (len >= 0x7FFFFFFF/10)
587 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
588 len = (len * 10) + (*patptr++ - '0');
594 /* The marvellous template parsing routine: Using state stored in *symptr,
595 * locates next template code and count
598 S_next_symbol(pTHX_ tempsym_t* symptr )
600 const char* patptr = symptr->patptr;
601 const char* const patend = symptr->patend;
603 PERL_ARGS_ASSERT_NEXT_SYMBOL;
605 symptr->flags &= ~FLAG_SLASH;
607 while (patptr < patend) {
608 if (isSPACE(*patptr))
610 else if (*patptr == '#') {
612 while (patptr < patend && *patptr != '\n')
617 /* We should have found a template code */
618 I32 code = *patptr++ & 0xFF;
619 U32 inherited_modifiers = 0;
621 if (code == ','){ /* grandfather in commas but with a warning */
622 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
623 symptr->flags |= FLAG_COMMA;
624 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
625 "Invalid type ',' in %s", _action( symptr ) );
630 /* for '(', skip to ')' */
632 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
633 Perl_croak(aTHX_ "()-group starts with a count in %s",
635 symptr->grpbeg = patptr;
636 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
637 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
638 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
642 /* look for group modifiers to inherit */
643 if (TYPE_ENDIANNESS(symptr->flags)) {
644 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
645 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
648 /* look for modifiers */
649 while (patptr < patend) {
654 modifier = TYPE_IS_SHRIEKING;
655 allowed = "sSiIlLxXnNvV@.";
658 modifier = TYPE_IS_BIG_ENDIAN;
659 allowed = ENDIANNESS_ALLOWED_TYPES;
662 modifier = TYPE_IS_LITTLE_ENDIAN;
663 allowed = ENDIANNESS_ALLOWED_TYPES;
674 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
675 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
676 allowed, _action( symptr ) );
678 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
679 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
680 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
681 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
682 TYPE_ENDIANNESS_MASK)
683 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
684 *patptr, _action( symptr ) );
686 if ((code & modifier)) {
687 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
688 "Duplicate modifier '%c' after '%c' in %s",
689 *patptr, (int) TYPE_NO_MODIFIERS(code),
697 /* inherit modifiers */
698 code |= inherited_modifiers;
700 /* look for count and/or / */
701 if (patptr < patend) {
702 if (isDIGIT(*patptr)) {
703 patptr = get_num( patptr, &symptr->length );
704 symptr->howlen = e_number;
706 } else if (*patptr == '*') {
708 symptr->howlen = e_star;
710 } else if (*patptr == '[') {
711 const char* lenptr = ++patptr;
712 symptr->howlen = e_number;
713 patptr = group_end( patptr, patend, ']' ) + 1;
714 /* what kind of [] is it? */
715 if (isDIGIT(*lenptr)) {
716 lenptr = get_num( lenptr, &symptr->length );
718 Perl_croak(aTHX_ "Malformed integer in [] in %s",
721 tempsym_t savsym = *symptr;
722 symptr->patend = patptr-1;
723 symptr->patptr = lenptr;
724 savsym.length = measure_struct(symptr);
728 symptr->howlen = e_no_len;
733 while (patptr < patend) {
734 if (isSPACE(*patptr))
736 else if (*patptr == '#') {
738 while (patptr < patend && *patptr != '\n')
743 if (*patptr == '/') {
744 symptr->flags |= FLAG_SLASH;
746 if (patptr < patend &&
747 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
748 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
755 /* at end - no count, no / */
756 symptr->howlen = e_no_len;
761 symptr->patptr = patptr;
765 symptr->patptr = patptr;
770 There is no way to cleanly handle the case where we should process the
771 string per byte in its upgraded form while it's really in downgraded form
772 (e.g. estimates like strend-s as an upper bound for the number of
773 characters left wouldn't work). So if we foresee the need of this
774 (pattern starts with U or contains U0), we want to work on the encoded
775 version of the string. Users are advised to upgrade their pack string
776 themselves if they need to do a lot of unpacks like this on it
779 need_utf8(const char *pat, const char *patend)
783 PERL_ARGS_ASSERT_NEED_UTF8;
785 while (pat < patend) {
788 pat = (const char *) memchr(pat, '\n', patend-pat);
789 if (!pat) return FALSE;
790 } else if (pat[0] == 'U') {
791 if (first || pat[1] == '0') return TRUE;
792 } else first = FALSE;
799 first_symbol(const char *pat, const char *patend) {
800 PERL_ARGS_ASSERT_FIRST_SYMBOL;
802 while (pat < patend) {
803 if (pat[0] != '#') return pat[0];
805 pat = (const char *) memchr(pat, '\n', patend-pat);
813 =for apidoc unpackstring
815 The engine implementing the unpack() Perl function.
817 Using the template pat..patend, this function unpacks the string
818 s..strend into a number of mortal SVs, which it pushes onto the perl
819 argument (@_) stack (so you will need to issue a C<PUTBACK> before and
820 C<SPAGAIN> after the call to this function). It returns the number of
823 The strend and patend pointers should point to the byte following the last
824 character of each string.
826 Although this function returns its values on the perl argument stack, it
827 doesn't take any parameters from that stack (and thus in particular
828 there's no need to do a PUSHMARK before calling it, unlike L</call_pv> for
834 Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
838 PERL_ARGS_ASSERT_UNPACKSTRING;
840 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
841 else if (need_utf8(pat, patend)) {
842 /* We probably should try to avoid this in case a scalar context call
843 wouldn't get to the "U0" */
844 STRLEN len = strend - s;
845 s = (char *) bytes_to_utf8((U8 *) s, &len);
848 flags |= FLAG_DO_UTF8;
851 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
852 flags |= FLAG_PARSE_UTF8;
854 TEMPSYM_INIT(&sym, pat, patend, flags);
856 return unpack_rec(&sym, s, s, strend, NULL );
860 S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
864 const I32 start_sp_offset = SP - PL_stack_base;
869 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
871 bool explicit_length;
872 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
873 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
875 PERL_ARGS_ASSERT_UNPACK_REC;
877 symptr->strbeg = s - strbeg;
879 while (next_symbol(symptr)) {
882 I32 datumtype = symptr->code;
884 /* do first one only unless in list context
885 / is implemented by unpacking the count, then popping it from the
886 stack, so must check that we're not in the middle of a / */
888 && (SP - PL_stack_base == start_sp_offset + 1)
889 && (datumtype != '/') ) /* XXX can this be omitted */
892 switch (howlen = symptr->howlen) {
894 len = strend - strbeg; /* long enough */
897 /* e_no_len and e_number */
898 len = symptr->length;
902 explicit_length = TRUE;
904 beyond = s >= strend;
906 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
908 /* props nonzero means we can process this letter. */
909 const long size = props & PACK_SIZE_MASK;
910 const long howmany = (strend - s) / size;
914 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
915 if (len && unpack_only_one) len = 1;
921 needs_swap = NEEDS_SWAP(datumtype);
923 switch(TYPE_NO_ENDIANNESS(datumtype)) {
925 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
928 if (howlen == e_no_len)
929 len = 16; /* len is not specified */
937 tempsym_t savsym = *symptr;
938 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
939 symptr->flags |= group_modifiers;
940 symptr->patend = savsym.grpend;
941 symptr->previous = &savsym;
944 if (len && unpack_only_one) len = 1;
946 symptr->patptr = savsym.grpbeg;
947 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
948 else symptr->flags &= ~FLAG_PARSE_UTF8;
949 unpack_rec(symptr, s, strbeg, strend, &s);
950 if (s == strend && savsym.howlen == e_star)
951 break; /* No way to continue */
954 savsym.flags = symptr->flags & ~group_modifiers;
958 case '.' | TYPE_IS_SHRIEKING:
962 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
963 if (howlen == e_star) from = strbeg;
964 else if (len <= 0) from = s;
966 tempsym_t *group = symptr;
968 while (--len && group) group = group->previous;
969 from = group ? strbeg + group->strbeg : strbeg;
972 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
973 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
977 case '@' | TYPE_IS_SHRIEKING:
979 s = strbeg + symptr->strbeg;
980 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
984 Perl_croak(aTHX_ "'@' outside of string in unpack");
989 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
992 Perl_croak(aTHX_ "'@' outside of string in unpack");
996 case 'X' | TYPE_IS_SHRIEKING:
997 if (!len) /* Avoid division by 0 */
1000 const char *hop, *last;
1002 hop = last = strbeg;
1004 hop += UTF8SKIP(hop);
1011 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1015 len = (s - strbeg) % len;
1021 Perl_croak(aTHX_ "'X' outside of string in unpack");
1022 while (--s, UTF8_IS_CONTINUATION(*s)) {
1024 Perl_croak(aTHX_ "'X' outside of string in unpack");
1029 if (len > s - strbeg)
1030 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1034 case 'x' | TYPE_IS_SHRIEKING: {
1036 if (!len) /* Avoid division by 0 */
1038 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1039 else ai32 = (s - strbeg) % len;
1040 if (ai32 == 0) break;
1048 Perl_croak(aTHX_ "'x' outside of string in unpack");
1053 if (len > strend - s)
1054 Perl_croak(aTHX_ "'x' outside of string in unpack");
1059 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1065 /* Preliminary length estimate is assumed done in 'W' */
1066 if (len > strend - s) len = strend - s;
1072 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1073 if (hop >= strend) {
1075 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1080 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1082 } else if (len > strend - s)
1085 if (datumtype == 'Z') {
1086 /* 'Z' strips stuff after first null */
1087 const char *ptr, *end;
1089 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
1090 sv = newSVpvn(s, ptr-s);
1091 if (howlen == e_star) /* exact for 'Z*' */
1092 len = ptr-s + (ptr != strend ? 1 : 0);
1093 } else if (datumtype == 'A') {
1094 /* 'A' strips both nulls and spaces */
1096 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1097 for (ptr = s+len-1; ptr >= s; ptr--)
1098 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
1099 !isSPACE_utf8(ptr)) break;
1100 if (ptr >= s) ptr += UTF8SKIP(ptr);
1103 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1105 for (ptr = s+len-1; ptr >= s; ptr--)
1106 if (*ptr != 0 && !isSPACE(*ptr)) break;
1109 sv = newSVpvn(s, ptr-s);
1110 } else sv = newSVpvn(s, len);
1114 /* Undo any upgrade done due to need_utf8() */
1115 if (!(symptr->flags & FLAG_WAS_UTF8))
1116 sv_utf8_downgrade(sv, 0);
1124 if (howlen == e_star || len > (strend - s) * 8)
1125 len = (strend - s) * 8;
1128 while (len >= 8 && s < strend) {
1129 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
1134 cuv += PL_bitcount[*(U8 *)s++];
1137 if (len && s < strend) {
1139 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1140 if (datumtype == 'b')
1142 if (bits & 1) cuv++;
1147 if (bits & 0x80) cuv++;
1154 sv = sv_2mortal(newSV(len ? len : 1));
1157 if (datumtype == 'b') {
1159 const I32 ai32 = len;
1160 for (len = 0; len < ai32; len++) {
1161 if (len & 7) bits >>= 1;
1163 if (s >= strend) break;
1164 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1165 } else bits = *(U8 *) s++;
1166 *str++ = bits & 1 ? '1' : '0';
1170 const I32 ai32 = len;
1171 for (len = 0; len < ai32; len++) {
1172 if (len & 7) bits <<= 1;
1174 if (s >= strend) break;
1175 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1176 } else bits = *(U8 *) s++;
1177 *str++ = bits & 0x80 ? '1' : '0';
1181 SvCUR_set(sv, str - SvPVX_const(sv));
1188 /* Preliminary length estimate, acceptable for utf8 too */
1189 if (howlen == e_star || len > (strend - s) * 2)
1190 len = (strend - s) * 2;
1192 sv = sv_2mortal(newSV(len ? len : 1));
1196 if (datumtype == 'h') {
1199 for (len = 0; len < ai32; len++) {
1200 if (len & 1) bits >>= 4;
1202 if (s >= strend) break;
1203 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1204 } else bits = * (U8 *) s++;
1206 *str++ = PL_hexdigit[bits & 15];
1210 const I32 ai32 = len;
1211 for (len = 0; len < ai32; len++) {
1212 if (len & 1) bits <<= 4;
1214 if (s >= strend) break;
1215 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1216 } else bits = *(U8 *) s++;
1218 *str++ = PL_hexdigit[(bits >> 4) & 15];
1223 SvCUR_set(sv, str - SvPVX_const(sv));
1230 if (explicit_length)
1231 /* Switch to "character" mode */
1232 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1237 while (len-- > 0 && s < strend) {
1242 aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1243 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1244 if (retlen == (STRLEN) -1 || retlen == 0)
1245 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1249 aint = *(U8 *)(s)++;
1250 if (aint >= 128 && datumtype != 'C') /* fake up signed chars */
1254 else if (checksum > bits_in_uv)
1255 cdouble += (NV)aint;
1263 while (len-- > 0 && s < strend) {
1265 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1266 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1267 if (retlen == (STRLEN) -1 || retlen == 0)
1268 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1272 else if (checksum > bits_in_uv)
1273 cdouble += (NV) val;
1277 } else if (!checksum)
1279 const U8 ch = *(U8 *) s++;
1282 else if (checksum > bits_in_uv)
1283 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1285 while (len-- > 0) cuv += *(U8 *) s++;
1289 if (explicit_length && howlen != e_star) {
1290 /* Switch to "bytes in UTF-8" mode */
1291 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
1293 /* Should be impossible due to the need_utf8() test */
1294 Perl_croak(aTHX_ "U0 mode on a byte string");
1298 if (len > strend - s) len = strend - s;
1300 if (len && unpack_only_one) len = 1;
1304 while (len-- > 0 && s < strend) {
1308 U8 result[UTF8_MAXLEN];
1309 const char *ptr = s;
1311 /* Bug: warns about bad utf8 even if we are short on bytes
1312 and will break out of the loop */
1313 if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1316 len = UTF8SKIP(result);
1317 if (!uni_to_bytes(aTHX_ &ptr, strend,
1318 (char *) &result[1], len-1, 'U')) break;
1319 auv = utf8n_to_uvuni(result, len, &retlen, UTF8_ALLOW_DEFAULT);
1322 auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, UTF8_ALLOW_DEFAULT);
1323 if (retlen == (STRLEN) -1 || retlen == 0)
1324 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1329 else if (checksum > bits_in_uv)
1330 cdouble += (NV) auv;
1335 case 's' | TYPE_IS_SHRIEKING:
1336 #if SHORTSIZE != SIZE16
1339 SHIFT_VAR(utf8, s, strend, ashort, datumtype);
1340 DO_BO_UNPACK(ashort, s);
1343 else if (checksum > bits_in_uv)
1344 cdouble += (NV)ashort;
1356 #if U16SIZE > SIZE16
1359 SHIFT16(utf8, s, strend, &ai16, datumtype);
1360 DO_BO_UNPACK(ai16, 16);
1361 #if U16SIZE > SIZE16
1367 else if (checksum > bits_in_uv)
1368 cdouble += (NV)ai16;
1373 case 'S' | TYPE_IS_SHRIEKING:
1374 #if SHORTSIZE != SIZE16
1376 unsigned short aushort;
1377 SHIFT_VAR(utf8, s, strend, aushort, datumtype);
1378 DO_BO_UNPACK(aushort, s);
1381 else if (checksum > bits_in_uv)
1382 cdouble += (NV)aushort;
1395 #if U16SIZE > SIZE16
1398 SHIFT16(utf8, s, strend, &au16, datumtype);
1399 DO_BO_UNPACK(au16, 16);
1400 if (datumtype == 'n')
1401 au16 = PerlSock_ntohs(au16);
1402 if (datumtype == 'v')
1406 else if (checksum > bits_in_uv)
1407 cdouble += (NV) au16;
1412 case 'v' | TYPE_IS_SHRIEKING:
1413 case 'n' | TYPE_IS_SHRIEKING:
1416 # if U16SIZE > SIZE16
1419 SHIFT16(utf8, s, strend, &ai16, datumtype);
1420 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
1421 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1422 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
1423 ai16 = (I16) vtohs((U16) ai16);
1426 else if (checksum > bits_in_uv)
1427 cdouble += (NV) ai16;
1433 case 'i' | TYPE_IS_SHRIEKING:
1436 SHIFT_VAR(utf8, s, strend, aint, datumtype);
1437 DO_BO_UNPACK(aint, i);
1440 else if (checksum > bits_in_uv)
1441 cdouble += (NV)aint;
1447 case 'I' | TYPE_IS_SHRIEKING:
1450 SHIFT_VAR(utf8, s, strend, auint, datumtype);
1451 DO_BO_UNPACK(auint, i);
1454 else if (checksum > bits_in_uv)
1455 cdouble += (NV)auint;
1463 SHIFT_VAR(utf8, s, strend, aiv, datumtype);
1464 #if IVSIZE == INTSIZE
1465 DO_BO_UNPACK(aiv, i);
1466 #elif IVSIZE == LONGSIZE
1467 DO_BO_UNPACK(aiv, l);
1468 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
1469 DO_BO_UNPACK(aiv, 64);
1471 Perl_croak(aTHX_ "'j' not supported on this platform");
1475 else if (checksum > bits_in_uv)
1484 SHIFT_VAR(utf8, s, strend, auv, datumtype);
1485 #if IVSIZE == INTSIZE
1486 DO_BO_UNPACK(auv, i);
1487 #elif IVSIZE == LONGSIZE
1488 DO_BO_UNPACK(auv, l);
1489 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
1490 DO_BO_UNPACK(auv, 64);
1492 Perl_croak(aTHX_ "'J' not supported on this platform");
1496 else if (checksum > bits_in_uv)
1502 case 'l' | TYPE_IS_SHRIEKING:
1503 #if LONGSIZE != SIZE32
1506 SHIFT_VAR(utf8, s, strend, along, datumtype);
1507 DO_BO_UNPACK(along, l);
1510 else if (checksum > bits_in_uv)
1511 cdouble += (NV)along;
1522 #if U32SIZE > SIZE32
1525 SHIFT32(utf8, s, strend, &ai32, datumtype);
1526 DO_BO_UNPACK(ai32, 32);
1527 #if U32SIZE > SIZE32
1528 if (ai32 > 2147483647) ai32 -= 4294967296;
1532 else if (checksum > bits_in_uv)
1533 cdouble += (NV)ai32;
1538 case 'L' | TYPE_IS_SHRIEKING:
1539 #if LONGSIZE != SIZE32
1541 unsigned long aulong;
1542 SHIFT_VAR(utf8, s, strend, aulong, datumtype);
1543 DO_BO_UNPACK(aulong, l);
1546 else if (checksum > bits_in_uv)
1547 cdouble += (NV)aulong;
1560 #if U32SIZE > SIZE32
1563 SHIFT32(utf8, s, strend, &au32, datumtype);
1564 DO_BO_UNPACK(au32, 32);
1565 if (datumtype == 'N')
1566 au32 = PerlSock_ntohl(au32);
1567 if (datumtype == 'V')
1571 else if (checksum > bits_in_uv)
1572 cdouble += (NV)au32;
1577 case 'V' | TYPE_IS_SHRIEKING:
1578 case 'N' | TYPE_IS_SHRIEKING:
1581 #if U32SIZE > SIZE32
1584 SHIFT32(utf8, s, strend, &ai32, datumtype);
1585 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1586 ai32 = (I32)PerlSock_ntohl((U32)ai32);
1587 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1588 ai32 = (I32)vtohl((U32)ai32);
1591 else if (checksum > bits_in_uv)
1592 cdouble += (NV)ai32;
1600 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
1601 DO_BO_UNPACK(aptr, pointer);
1602 /* newSVpv generates undef if aptr is NULL */
1603 mPUSHs(newSVpv(aptr, 0));
1611 while (len > 0 && s < strend) {
1613 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1614 auv = (auv << 7) | (ch & 0x7f);
1615 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
1623 if (++bytes >= sizeof(UV)) { /* promote to string */
1626 sv = Perl_newSVpvf(aTHX_ "%.*"UVuf, (int)TYPE_DIGITS(UV), auv);
1627 while (s < strend) {
1628 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1629 sv = mul128(sv, (U8)(ch & 0x7f));
1635 t = SvPV_nolen_const(sv);
1644 if ((s >= strend) && bytes)
1645 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
1649 if (symptr->howlen == e_star)
1650 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
1652 if (s + sizeof(char*) <= strend) {
1654 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
1655 DO_BO_UNPACK(aptr, pointer);
1656 /* newSVpvn generates undef if aptr is NULL */
1657 PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP));
1664 SHIFT_VAR(utf8, s, strend, aquad, datumtype);
1665 DO_BO_UNPACK(aquad, 64);
1667 mPUSHs(aquad >= IV_MIN && aquad <= IV_MAX ?
1668 newSViv((IV)aquad) : newSVnv((NV)aquad));
1669 else if (checksum > bits_in_uv)
1670 cdouble += (NV)aquad;
1678 SHIFT_VAR(utf8, s, strend, auquad, datumtype);
1679 DO_BO_UNPACK(auquad, 64);
1681 mPUSHs(auquad <= UV_MAX ?
1682 newSVuv((UV)auquad) : newSVnv((NV)auquad));
1683 else if (checksum > bits_in_uv)
1684 cdouble += (NV)auquad;
1689 #endif /* HAS_QUAD */
1690 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
1694 SHIFT_VAR(utf8, s, strend, afloat, datumtype);
1695 DO_BO_UNPACK(afloat, float);
1705 SHIFT_VAR(utf8, s, strend, adouble, datumtype);
1706 DO_BO_UNPACK(adouble, double);
1716 SHIFT_BYTES(utf8, s, strend, anv.bytes, sizeof(anv.bytes), datumtype);
1717 DO_BO_UNPACK(anv.nv, NV);
1724 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
1728 SHIFT_BYTES(utf8, s, strend, aldouble.bytes, sizeof(aldouble.bytes), datumtype);
1729 DO_BO_UNPACK(aldouble.ld, long double);
1731 mPUSHn(aldouble.ld);
1733 cdouble += aldouble.ld;
1739 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
1740 sv = sv_2mortal(newSV(l));
1741 if (l) SvPOK_on(sv);
1744 while (next_uni_uu(aTHX_ &s, strend, &len)) {
1749 next_uni_uu(aTHX_ &s, strend, &a);
1750 next_uni_uu(aTHX_ &s, strend, &b);
1751 next_uni_uu(aTHX_ &s, strend, &c);
1752 next_uni_uu(aTHX_ &s, strend, &d);
1753 hunk[0] = (char)((a << 2) | (b >> 4));
1754 hunk[1] = (char)((b << 4) | (c >> 2));
1755 hunk[2] = (char)((c << 6) | d);
1757 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
1765 /* possible checksum byte */
1766 const char *skip = s+UTF8SKIP(s);
1767 if (skip < strend && *skip == '\n')
1773 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
1777 len = PL_uudmap[*(U8*)s++] & 077;
1779 if (s < strend && ISUUCHAR(*s))
1780 a = PL_uudmap[*(U8*)s++] & 077;
1783 if (s < strend && ISUUCHAR(*s))
1784 b = PL_uudmap[*(U8*)s++] & 077;
1787 if (s < strend && ISUUCHAR(*s))
1788 c = PL_uudmap[*(U8*)s++] & 077;
1791 if (s < strend && ISUUCHAR(*s))
1792 d = PL_uudmap[*(U8*)s++] & 077;
1795 hunk[0] = (char)((a << 2) | (b >> 4));
1796 hunk[1] = (char)((b << 4) | (c >> 2));
1797 hunk[2] = (char)((c << 6) | d);
1799 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
1804 else /* possible checksum byte */
1805 if (s + 1 < strend && s[1] == '\n')
1815 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
1816 (checksum > bits_in_uv &&
1817 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
1820 anv = (NV) (1 << (checksum & 15));
1821 while (checksum >= 16) {
1825 while (cdouble < 0.0)
1827 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
1828 sv = newSVnv(cdouble);
1831 if (checksum < bits_in_uv) {
1832 UV mask = ((UV)1 << checksum) - 1;
1841 if (symptr->flags & FLAG_SLASH){
1842 if (SP - PL_stack_base - start_sp_offset <= 0)
1844 if( next_symbol(symptr) ){
1845 if( symptr->howlen == e_number )
1846 Perl_croak(aTHX_ "Count after length/code in unpack" );
1848 /* ...end of char buffer then no decent length available */
1849 Perl_croak(aTHX_ "length/code after end of string in unpack" );
1851 /* take top of stack (hope it's numeric) */
1854 Perl_croak(aTHX_ "Negative '/' count in unpack" );
1857 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
1859 datumtype = symptr->code;
1860 explicit_length = FALSE;
1868 return SP - PL_stack_base - start_sp_offset;
1876 I32 gimme = GIMME_V;
1879 const char *pat = SvPV_const(left, llen);
1880 const char *s = SvPV_const(right, rlen);
1881 const char *strend = s + rlen;
1882 const char *patend = pat + llen;
1886 cnt = unpackstring(pat, patend, s, strend,
1887 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
1888 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
1891 if ( !cnt && gimme == G_SCALAR )
1892 PUSHs(&PL_sv_undef);
1897 doencodes(U8 *h, const char *s, I32 len)
1899 *h++ = PL_uuemap[len];
1901 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1902 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
1903 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
1904 *h++ = PL_uuemap[(077 & (s[2] & 077))];
1909 const char r = (len > 1 ? s[1] : '\0');
1910 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1911 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
1912 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
1913 *h++ = PL_uuemap[0];
1920 S_is_an_int(pTHX_ const char *s, STRLEN l)
1922 SV *result = newSVpvn(s, l);
1923 char *const result_c = SvPV_nolen(result); /* convenience */
1924 char *out = result_c;
1928 PERL_ARGS_ASSERT_IS_AN_INT;
1936 SvREFCNT_dec(result);
1959 SvREFCNT_dec(result);
1965 SvCUR_set(result, out - result_c);
1969 /* pnum must be '\0' terminated */
1971 S_div128(pTHX_ SV *pnum, bool *done)
1974 char * const s = SvPV(pnum, len);
1978 PERL_ARGS_ASSERT_DIV128;
1982 const int i = m * 10 + (*t - '0');
1983 const int r = (i >> 7); /* r < 10 */
1991 SvCUR_set(pnum, (STRLEN) (t - s));
1996 =for apidoc packlist
1998 The engine implementing pack() Perl function.
2004 Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, SV **beglist, SV **endlist )
2009 PERL_ARGS_ASSERT_PACKLIST;
2011 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
2013 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
2014 Also make sure any UTF8 flag is loaded */
2015 SvPV_force_nolen(cat);
2017 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
2019 (void)pack_rec( cat, &sym, beglist, endlist );
2022 /* like sv_utf8_upgrade, but also repoint the group start markers */
2024 marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2027 const char *from_ptr, *from_start, *from_end, **marks, **m;
2028 char *to_start, *to_ptr;
2030 if (SvUTF8(sv)) return;
2032 from_start = SvPVX_const(sv);
2033 from_end = from_start + SvCUR(sv);
2034 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2035 if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
2036 if (from_ptr == from_end) {
2037 /* Simple case: no character needs to be changed */
2042 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
2043 Newx(to_start, len, char);
2044 Copy(from_start, to_start, from_ptr-from_start, char);
2045 to_ptr = to_start + (from_ptr-from_start);
2047 Newx(marks, sym_ptr->level+2, const char *);
2048 for (group=sym_ptr; group; group = group->previous)
2049 marks[group->level] = from_start + group->strbeg;
2050 marks[sym_ptr->level+1] = from_end+1;
2051 for (m = marks; *m < from_ptr; m++)
2052 *m = to_start + (*m-from_start);
2054 for (;from_ptr < from_end; from_ptr++) {
2055 while (*m == from_ptr) *m++ = to_ptr;
2056 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
2060 while (*m == from_ptr) *m++ = to_ptr;
2061 if (m != marks + sym_ptr->level+1) {
2064 Perl_croak(aTHX_ "panic: marks beyond string end, m=%p, marks=%p, "
2065 "level=%d", m, marks, sym_ptr->level);
2067 for (group=sym_ptr; group; group = group->previous)
2068 group->strbeg = marks[group->level] - to_start;
2073 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
2074 from_start -= SvIVX(sv);
2077 SvFLAGS(sv) &= ~SVf_OOK;
2080 Safefree(from_start);
2081 SvPV_set(sv, to_start);
2082 SvCUR_set(sv, to_ptr - to_start);
2087 /* Exponential string grower. Makes string extension effectively O(n)
2088 needed says how many extra bytes we need (not counting the final '\0')
2089 Only grows the string if there is an actual lack of space
2092 S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
2093 const STRLEN cur = SvCUR(sv);
2094 const STRLEN len = SvLEN(sv);
2097 PERL_ARGS_ASSERT_SV_EXP_GROW;
2099 if (len - cur > needed) return SvPVX(sv);
2100 extend = needed > len ? needed : len;
2101 return SvGROW(sv, len+extend+1);
2106 S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
2109 tempsym_t lookahead;
2110 I32 items = endlist - beglist;
2111 bool found = next_symbol(symptr);
2112 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
2113 bool warn_utf8 = ckWARN(WARN_UTF8);
2115 PERL_ARGS_ASSERT_PACK_REC;
2117 if (symptr->level == 0 && found && symptr->code == 'U') {
2118 marked_upgrade(aTHX_ cat, symptr);
2119 symptr->flags |= FLAG_DO_UTF8;
2122 symptr->strbeg = SvCUR(cat);
2128 SV *lengthcode = NULL;
2129 I32 datumtype = symptr->code;
2130 howlen_t howlen = symptr->howlen;
2131 char *start = SvPVX(cat);
2132 char *cur = start + SvCUR(cat);
2135 #define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2139 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2143 /* e_no_len and e_number */
2144 len = symptr->length;
2149 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
2151 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2152 /* We can process this letter. */
2153 STRLEN size = props & PACK_SIZE_MASK;
2154 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2158 /* Look ahead for next symbol. Do we have code/code? */
2159 lookahead = *symptr;
2160 found = next_symbol(&lookahead);
2161 if (symptr->flags & FLAG_SLASH) {
2163 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
2164 if (strchr("aAZ", lookahead.code)) {
2165 if (lookahead.howlen == e_number) count = lookahead.length;
2168 count = sv_len_utf8(*beglist);
2171 if (lookahead.code == 'Z') count++;
2174 if (lookahead.howlen == e_number && lookahead.length < items)
2175 count = lookahead.length;
2178 lookahead.howlen = e_number;
2179 lookahead.length = count;
2180 lengthcode = sv_2mortal(newSViv(count));
2183 needs_swap = NEEDS_SWAP(datumtype);
2185 /* Code inside the switch must take care to properly update
2186 cat (CUR length and '\0' termination) if it updated *cur and
2187 doesn't simply leave using break */
2188 switch(TYPE_NO_ENDIANNESS(datumtype)) {
2190 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2191 (int) TYPE_NO_MODIFIERS(datumtype));
2193 Perl_croak(aTHX_ "'%%' may not be used in pack");
2196 case '.' | TYPE_IS_SHRIEKING:
2198 if (howlen == e_star) from = start;
2199 else if (len == 0) from = cur;
2201 tempsym_t *group = symptr;
2203 while (--len && group) group = group->previous;
2204 from = group ? start + group->strbeg : start;
2207 len = SvIV(fromstr);
2209 case '@' | TYPE_IS_SHRIEKING:
2211 from = start + symptr->strbeg;
2213 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
2215 while (len && from < cur) {
2216 from += UTF8SKIP(from);
2220 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2222 /* Here we know from == cur */
2224 GROWING(0, cat, start, cur, len);
2225 Zero(cur, len, char);
2227 } else if (from < cur) {
2230 } else goto no_change;
2238 if (len > 0) goto grow;
2239 if (len == 0) goto no_change;
2246 tempsym_t savsym = *symptr;
2247 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2248 symptr->flags |= group_modifiers;
2249 symptr->patend = savsym.grpend;
2251 symptr->previous = &lookahead;
2254 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2255 else symptr->flags &= ~FLAG_PARSE_UTF8;
2256 was_utf8 = SvUTF8(cat);
2257 symptr->patptr = savsym.grpbeg;
2258 beglist = pack_rec(cat, symptr, beglist, endlist);
2259 if (SvUTF8(cat) != was_utf8)
2260 /* This had better be an upgrade while in utf8==0 mode */
2263 if (savsym.howlen == e_star && beglist == endlist)
2264 break; /* No way to continue */
2266 items = endlist - beglist;
2267 lookahead.flags = symptr->flags & ~group_modifiers;
2270 case 'X' | TYPE_IS_SHRIEKING:
2271 if (!len) /* Avoid division by 0 */
2278 hop += UTF8SKIP(hop);
2285 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2289 len = (cur-start) % len;
2293 if (len < 1) goto no_change;
2297 Perl_croak(aTHX_ "'%c' outside of string in pack",
2298 (int) TYPE_NO_MODIFIERS(datumtype));
2299 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2301 Perl_croak(aTHX_ "'%c' outside of string in pack",
2302 (int) TYPE_NO_MODIFIERS(datumtype));
2308 if (cur - start < len)
2309 Perl_croak(aTHX_ "'%c' outside of string in pack",
2310 (int) TYPE_NO_MODIFIERS(datumtype));
2313 if (cur < start+symptr->strbeg) {
2314 /* Make sure group starts don't point into the void */
2316 const STRLEN length = cur-start;
2317 for (group = symptr;
2318 group && length < group->strbeg;
2319 group = group->previous) group->strbeg = length;
2320 lookahead.strbeg = length;
2323 case 'x' | TYPE_IS_SHRIEKING: {
2325 if (!len) /* Avoid division by 0 */
2327 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
2328 else ai32 = (cur - start) % len;
2329 if (ai32 == 0) goto no_change;
2341 aptr = SvPV_const(fromstr, fromlen);
2342 if (DO_UTF8(fromstr)) {
2343 const char *end, *s;
2345 if (!utf8 && !SvUTF8(cat)) {
2346 marked_upgrade(aTHX_ cat, symptr);
2347 lookahead.flags |= FLAG_DO_UTF8;
2348 lookahead.strbeg = symptr->strbeg;
2351 cur = start + SvCUR(cat);
2353 if (howlen == e_star) {
2354 if (utf8) goto string_copy;
2358 end = aptr + fromlen;
2359 fromlen = datumtype == 'Z' ? len-1 : len;
2360 while ((I32) fromlen > 0 && s < end) {
2365 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2368 if (datumtype == 'Z') len++;
2374 fromlen = len - fromlen;
2375 if (datumtype == 'Z') fromlen--;
2376 if (howlen == e_star) {
2378 if (datumtype == 'Z') len++;
2380 GROWING(0, cat, start, cur, len);
2381 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
2382 datumtype | TYPE_IS_PACK))
2383 Perl_croak(aTHX_ "panic: predicted utf8 length not available, "
2384 "for '%c', aptr=%p end=%p cur=%p, fromlen=%"UVuf,
2385 (int)datumtype, aptr, end, cur, (UV)fromlen);
2389 if (howlen == e_star) {
2391 if (datumtype == 'Z') len++;
2393 if (len <= (I32) fromlen) {
2395 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2397 /* assumes a byte expands to at most UTF8_EXPAND bytes on
2399 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2400 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
2402 while (fromlen > 0) {
2403 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
2409 if (howlen == e_star) {
2411 if (datumtype == 'Z') len++;
2413 if (len <= (I32) fromlen) {
2415 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2417 GROWING(0, cat, start, cur, len);
2418 Copy(aptr, cur, fromlen, char);
2422 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2429 const char *str, *end;
2436 str = SvPV_const(fromstr, fromlen);
2437 end = str + fromlen;
2438 if (DO_UTF8(fromstr)) {
2440 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2442 utf8_source = FALSE;
2443 utf8_flags = 0; /* Unused, but keep compilers happy */
2445 if (howlen == e_star) len = fromlen;
2446 field_len = (len+7)/8;
2447 GROWING(utf8, cat, start, cur, field_len);
2448 if (len > (I32)fromlen) len = fromlen;
2451 if (datumtype == 'B')
2455 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2457 } else bits |= *str++ & 1;
2458 if (l & 7) bits <<= 1;
2460 PUSH_BYTE(utf8, cur, bits);
2465 /* datumtype == 'b' */
2469 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2470 if (val & 1) bits |= 0x80;
2471 } else if (*str++ & 1)
2473 if (l & 7) bits >>= 1;
2475 PUSH_BYTE(utf8, cur, bits);
2481 if (datumtype == 'B')
2482 bits <<= 7 - (l & 7);
2484 bits >>= 7 - (l & 7);
2485 PUSH_BYTE(utf8, cur, bits);
2488 /* Determine how many chars are left in the requested field */
2490 if (howlen == e_star) field_len = 0;
2491 else field_len -= l;
2492 Zero(cur, field_len, char);
2498 const char *str, *end;
2505 str = SvPV_const(fromstr, fromlen);
2506 end = str + fromlen;
2507 if (DO_UTF8(fromstr)) {
2509 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2511 utf8_source = FALSE;
2512 utf8_flags = 0; /* Unused, but keep compilers happy */
2514 if (howlen == e_star) len = fromlen;
2515 field_len = (len+1)/2;
2516 GROWING(utf8, cat, start, cur, field_len);
2517 if (!utf8 && len > (I32)fromlen) len = fromlen;
2520 if (datumtype == 'H')
2524 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2525 if (val < 256 && isALPHA(val))
2526 bits |= (val + 9) & 0xf;
2529 } else if (isALPHA(*str))
2530 bits |= (*str++ + 9) & 0xf;
2532 bits |= *str++ & 0xf;
2533 if (l & 1) bits <<= 4;
2535 PUSH_BYTE(utf8, cur, bits);
2543 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2544 if (val < 256 && isALPHA(val))
2545 bits |= ((val + 9) & 0xf) << 4;
2547 bits |= (val & 0xf) << 4;
2548 } else if (isALPHA(*str))
2549 bits |= ((*str++ + 9) & 0xf) << 4;
2551 bits |= (*str++ & 0xf) << 4;
2552 if (l & 1) bits >>= 4;
2554 PUSH_BYTE(utf8, cur, bits);
2560 PUSH_BYTE(utf8, cur, bits);
2563 /* Determine how many chars are left in the requested field */
2565 if (howlen == e_star) field_len = 0;
2566 else field_len -= l;
2567 Zero(cur, field_len, char);
2575 aiv = SvIV(fromstr);
2576 if ((-128 > aiv || aiv > 127))
2577 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2578 "Character in 'c' format wrapped in pack");
2579 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
2584 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2590 aiv = SvIV(fromstr);
2591 if ((0 > aiv || aiv > 0xff))
2592 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2593 "Character in 'C' format wrapped in pack");
2594 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
2599 U8 in_bytes = (U8)IN_BYTES;
2601 end = start+SvLEN(cat)-1;
2602 if (utf8) end -= UTF8_MAXLEN-1;
2606 auv = SvUV(fromstr);
2607 if (in_bytes) auv = auv % 0x100;
2612 SvCUR_set(cat, cur - start);
2614 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2615 end = start+SvLEN(cat)-UTF8_MAXLEN;
2617 cur = (char *) uvuni_to_utf8_flags((U8 *) cur,
2620 0 : UNICODE_ALLOW_ANY);
2625 SvCUR_set(cat, cur - start);
2626 marked_upgrade(aTHX_ cat, symptr);
2627 lookahead.flags |= FLAG_DO_UTF8;
2628 lookahead.strbeg = symptr->strbeg;
2631 cur = start + SvCUR(cat);
2632 end = start+SvLEN(cat)-UTF8_MAXLEN;
2635 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2636 "Character in 'W' format wrapped in pack");
2641 SvCUR_set(cat, cur - start);
2642 GROWING(0, cat, start, cur, len+1);
2643 end = start+SvLEN(cat)-1;
2645 *(U8 *) cur++ = (U8)auv;
2654 if (!(symptr->flags & FLAG_DO_UTF8)) {
2655 marked_upgrade(aTHX_ cat, symptr);
2656 lookahead.flags |= FLAG_DO_UTF8;
2657 lookahead.strbeg = symptr->strbeg;
2663 end = start+SvLEN(cat);
2664 if (!utf8) end -= UTF8_MAXLEN;
2668 auv = SvUV(fromstr);
2670 U8 buffer[UTF8_MAXLEN], *endb;
2671 endb = uvuni_to_utf8_flags(buffer, auv,
2673 0 : UNICODE_ALLOW_ANY);
2674 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
2676 SvCUR_set(cat, cur - start);
2677 GROWING(0, cat, start, cur,
2678 len+(endb-buffer)*UTF8_EXPAND);
2679 end = start+SvLEN(cat);
2681 cur = bytes_to_uni(buffer, endb-buffer, cur);
2685 SvCUR_set(cat, cur - start);
2686 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2687 end = start+SvLEN(cat)-UTF8_MAXLEN;
2689 cur = (char *) uvuni_to_utf8_flags((U8 *) cur, auv,
2691 0 : UNICODE_ALLOW_ANY);
2696 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
2702 anv = SvNV(fromstr);
2703 # if defined(VMS) && !defined(_IEEE_FP)
2704 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
2705 * on Alpha; fake it if we don't have them.
2709 else if (anv < -FLT_MAX)
2711 else afloat = (float)anv;
2713 afloat = (float)anv;
2715 DO_BO_PACK(afloat, float);
2716 PUSH_VAR(utf8, cur, afloat);
2724 anv = SvNV(fromstr);
2725 # if defined(VMS) && !defined(_IEEE_FP)
2726 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
2727 * on Alpha; fake it if we don't have them.
2731 else if (anv < -DBL_MAX)
2733 else adouble = (double)anv;
2735 adouble = (double)anv;
2737 DO_BO_PACK(adouble, double);
2738 PUSH_VAR(utf8, cur, adouble);
2743 Zero(&anv, 1, NV); /* can be long double with unused bits */
2747 /* to work round a gcc/x86 bug; don't use SvNV */
2748 anv.nv = sv_2nv(fromstr);
2750 anv.nv = SvNV(fromstr);
2752 DO_BO_PACK(anv, NV);
2753 PUSH_BYTES(utf8, cur, anv.bytes, sizeof(anv.bytes));
2757 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2760 /* long doubles can have unused bits, which may be nonzero */
2761 Zero(&aldouble, 1, long double);
2765 /* to work round a gcc/x86 bug; don't use SvNV */
2766 aldouble.ld = (long double)sv_2nv(fromstr);
2768 aldouble.ld = (long double)SvNV(fromstr);
2770 DO_BO_PACK(aldouble, long double);
2771 PUSH_BYTES(utf8, cur, aldouble.bytes, sizeof(aldouble.bytes));
2776 case 'n' | TYPE_IS_SHRIEKING:
2781 ai16 = (I16)SvIV(fromstr);
2782 ai16 = PerlSock_htons(ai16);
2783 PUSH16(utf8, cur, &ai16);
2786 case 'v' | TYPE_IS_SHRIEKING:
2791 ai16 = (I16)SvIV(fromstr);
2793 PUSH16(utf8, cur, &ai16);
2796 case 'S' | TYPE_IS_SHRIEKING:
2797 #if SHORTSIZE != SIZE16
2799 unsigned short aushort;
2801 aushort = SvUV(fromstr);
2802 DO_BO_PACK(aushort, s);
2803 PUSH_VAR(utf8, cur, aushort);
2813 au16 = (U16)SvUV(fromstr);
2814 DO_BO_PACK(au16, 16);
2815 PUSH16(utf8, cur, &au16);
2818 case 's' | TYPE_IS_SHRIEKING:
2819 #if SHORTSIZE != SIZE16
2823 ashort = SvIV(fromstr);
2824 DO_BO_PACK(ashort, s);
2825 PUSH_VAR(utf8, cur, ashort);
2835 ai16 = (I16)SvIV(fromstr);
2836 DO_BO_PACK(ai16, 16);
2837 PUSH16(utf8, cur, &ai16);
2841 case 'I' | TYPE_IS_SHRIEKING:
2845 auint = SvUV(fromstr);
2846 DO_BO_PACK(auint, i);
2847 PUSH_VAR(utf8, cur, auint);
2854 aiv = SvIV(fromstr);
2855 #if IVSIZE == INTSIZE
2857 #elif IVSIZE == LONGSIZE
2859 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
2860 DO_BO_PACK(aiv, 64);
2862 Perl_croak(aTHX_ "'j' not supported on this platform");
2864 PUSH_VAR(utf8, cur, aiv);
2871 auv = SvUV(fromstr);
2872 #if UVSIZE == INTSIZE
2874 #elif UVSIZE == LONGSIZE
2876 #elif defined(HAS_QUAD) && UVSIZE == U64SIZE
2877 DO_BO_PACK(auv, 64);
2879 Perl_croak(aTHX_ "'J' not supported on this platform");
2881 PUSH_VAR(utf8, cur, auv);
2888 anv = SvNV(fromstr);
2892 SvCUR_set(cat, cur - start);
2893 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
2896 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
2897 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
2898 any negative IVs will have already been got by the croak()
2899 above. IOK is untrue for fractions, so we test them
2900 against UV_MAX_P1. */
2901 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
2902 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
2903 char *in = buf + sizeof(buf);
2904 UV auv = SvUV(fromstr);
2907 *--in = (char)((auv & 0x7f) | 0x80);
2910 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
2911 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2912 in, (buf + sizeof(buf)) - in);
2913 } else if (SvPOKp(fromstr))
2915 else if (SvNOKp(fromstr)) {
2916 /* 10**NV_MAX_10_EXP is the largest power of 10
2917 so 10**(NV_MAX_10_EXP+1) is definitely unrepresentable
2918 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
2919 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
2920 And with that many bytes only Inf can overflow.
2921 Some C compilers are strict about integral constant
2922 expressions so we conservatively divide by a slightly
2923 smaller integer instead of multiplying by the exact
2924 floating-point value.
2926 #ifdef NV_MAX_10_EXP
2927 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
2928 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
2930 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
2931 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
2933 char *in = buf + sizeof(buf);
2935 anv = Perl_floor(anv);
2937 const NV next = Perl_floor(anv / 128);
2938 if (in <= buf) /* this cannot happen ;-) */
2939 Perl_croak(aTHX_ "Cannot compress integer in pack");
2940 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
2943 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
2944 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2945 in, (buf + sizeof(buf)) - in);
2954 /* Copy string and check for compliance */
2955 from = SvPV_const(fromstr, len);
2956 if ((norm = is_an_int(from, len)) == NULL)
2957 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
2959 Newx(result, len, char);
2962 while (!done) *--in = div128(norm, &done) | 0x80;
2963 result[len - 1] &= 0x7F; /* clear continue bit */
2964 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2965 in, (result + len) - in);
2967 SvREFCNT_dec(norm); /* free norm */
2972 case 'i' | TYPE_IS_SHRIEKING:
2976 aint = SvIV(fromstr);
2977 DO_BO_PACK(aint, i);
2978 PUSH_VAR(utf8, cur, aint);
2981 case 'N' | TYPE_IS_SHRIEKING:
2986 au32 = SvUV(fromstr);
2987 au32 = PerlSock_htonl(au32);
2988 PUSH32(utf8, cur, &au32);
2991 case 'V' | TYPE_IS_SHRIEKING:
2996 au32 = SvUV(fromstr);
2998 PUSH32(utf8, cur, &au32);
3001 case 'L' | TYPE_IS_SHRIEKING:
3002 #if LONGSIZE != SIZE32
3004 unsigned long aulong;
3006 aulong = SvUV(fromstr);
3007 DO_BO_PACK(aulong, l);
3008 PUSH_VAR(utf8, cur, aulong);
3018 au32 = SvUV(fromstr);
3019 DO_BO_PACK(au32, 32);
3020 PUSH32(utf8, cur, &au32);
3023 case 'l' | TYPE_IS_SHRIEKING:
3024 #if LONGSIZE != SIZE32
3028 along = SvIV(fromstr);
3029 DO_BO_PACK(along, l);
3030 PUSH_VAR(utf8, cur, along);
3040 ai32 = SvIV(fromstr);
3041 DO_BO_PACK(ai32, 32);
3042 PUSH32(utf8, cur, &ai32);
3050 auquad = (Uquad_t) SvUV(fromstr);
3051 DO_BO_PACK(auquad, 64);
3052 PUSH_VAR(utf8, cur, auquad);
3059 aquad = (Quad_t)SvIV(fromstr);
3060 DO_BO_PACK(aquad, 64);
3061 PUSH_VAR(utf8, cur, aquad);
3064 #endif /* HAS_QUAD */
3066 len = 1; /* assume SV is correct length */
3067 GROWING(utf8, cat, start, cur, sizeof(char *));
3074 SvGETMAGIC(fromstr);
3075 if (!SvOK(fromstr)) aptr = NULL;
3077 /* XXX better yet, could spirit away the string to
3078 * a safe spot and hang on to it until the result
3079 * of pack() (and all copies of the result) are
3082 if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
3083 !SvREADONLY(fromstr)))) {
3084 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3085 "Attempt to pack pointer to temporary value");
3087 if (SvPOK(fromstr) || SvNIOK(fromstr))
3088 aptr = SvPV_nomg_const_nolen(fromstr);
3090 aptr = SvPV_force_flags_nolen(fromstr, 0);
3092 DO_BO_PACK(aptr, pointer);
3093 PUSH_VAR(utf8, cur, aptr);
3097 const char *aptr, *aend;
3101 if (len <= 2) len = 45;
3102 else len = len / 3 * 3;
3104 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3105 "Field too wide in 'u' format in pack");
3108 aptr = SvPV_const(fromstr, fromlen);
3109 from_utf8 = DO_UTF8(fromstr);
3111 aend = aptr + fromlen;
3112 fromlen = sv_len_utf8_nomg(fromstr);
3113 } else aend = NULL; /* Unused, but keep compilers happy */
3114 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
3115 while (fromlen > 0) {
3118 U8 hunk[1+63/3*4+1];
3120 if ((I32)fromlen > len)
3126 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3127 'u' | TYPE_IS_PACK)) {
3129 SvCUR_set(cat, cur - start);
3130 Perl_croak(aTHX_ "panic: string is shorter than advertised, "
3131 "aptr=%p, aend=%p, buffer=%p, todo=%ld",
3132 aptr, aend, buffer, (long) todo);
3134 end = doencodes(hunk, buffer, todo);
3136 end = doencodes(hunk, aptr, todo);
3139 PUSH_BYTES(utf8, cur, hunk, end-hunk);
3146 SvCUR_set(cat, cur - start);
3148 *symptr = lookahead;
3157 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
3160 SV *pat_sv = *++MARK;
3161 const char *pat = SvPV_const(pat_sv, fromlen);
3162 const char *patend = pat + fromlen;
3168 packlist(cat, pat, patend, MARK, SP + 1);
3178 * c-indentation-style: bsd
3180 * indent-tabs-mode: nil
3183 * ex: set ts=8 sts=4 sw=4 et: