3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
18 * [p.653 of _The Lord of the Rings_, IV/iv: "Of Herbs and Stewed Rabbit"]
21 /* This file contains pp ("push/pop") functions that
22 * execute the opcodes that make up a perl program. A typical pp function
23 * expects to find its arguments on the stack, and usually pushes its
24 * results onto the stack, hence the 'pp' terminology. Each OP structure
25 * contains a pointer to the relevant pp_foo() function.
27 * This particular file just contains pp_pack() and pp_unpack(). See the
28 * other pp*.c files for the rest of the pp_ functions.
32 #define PERL_IN_PP_PACK_C
35 /* Types used by pack/unpack */
37 e_no_len, /* no length */
38 e_number, /* number, [] */
42 typedef struct tempsym {
43 const char* patptr; /* current template char */
44 const char* patend; /* one after last char */
45 const char* grpbeg; /* 1st char of ()-group */
46 const char* grpend; /* end of ()-group */
47 I32 code; /* template code (!<>) */
48 I32 length; /* length/repeat count */
49 howlen_t howlen; /* how length is given */
50 int level; /* () nesting level */
51 U32 flags; /* /=4, comma=2, pack=1 */
52 /* and group modifiers */
53 STRLEN strbeg; /* offset of group start */
54 struct tempsym *previous; /* previous group */
57 #define TEMPSYM_INIT(symptr, p, e, f) \
59 (symptr)->patptr = (p); \
60 (symptr)->patend = (e); \
61 (symptr)->grpbeg = NULL; \
62 (symptr)->grpend = NULL; \
63 (symptr)->grpend = NULL; \
65 (symptr)->length = 0; \
66 (symptr)->howlen = e_no_len; \
67 (symptr)->level = 0; \
68 (symptr)->flags = (f); \
69 (symptr)->strbeg = 0; \
70 (symptr)->previous = NULL; \
78 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
81 U8 bytes[sizeof(long double)];
88 /* Maximum number of bytes to which a byte can grow due to upgrade */
92 * Offset for integer pack/unpack.
94 * On architectures where I16 and I32 aren't really 16 and 32 bits,
95 * which for now are all Crays, pack and unpack have to play games.
99 * These values are required for portability of pack() output.
100 * If they're not right on your machine, then pack() and unpack()
101 * wouldn't work right anyway; you'll need to apply the Cray hack.
102 * (I'd like to check them with #if, but you can't use sizeof() in
103 * the preprocessor.) --???
106 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
107 defines are now in config.h. --Andy Dougherty April 1998
112 /* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
115 #if U16SIZE > SIZE16 || U32SIZE > SIZE32
116 # if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
117 # define OFF16(p) ((char*)(p))
118 # define OFF32(p) ((char*)(p))
120 # if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
121 # define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
122 # define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
124 ++++ bad cray byte order
128 # define OFF16(p) ((char *) (p))
129 # define OFF32(p) ((char *) (p))
132 /* Only to be used inside a loop (see the break) */
133 #define SHIFT16(utf8, s, strend, p, datumtype) STMT_START { \
135 if (!uni_to_bytes(aTHX_ &(s), strend, OFF16(p), SIZE16, datumtype)) break; \
137 Copy(s, OFF16(p), SIZE16, char); \
142 /* Only to be used inside a loop (see the break) */
143 #define SHIFT32(utf8, s, strend, p, datumtype) STMT_START { \
145 if (!uni_to_bytes(aTHX_ &(s), strend, OFF32(p), SIZE32, datumtype)) break; \
147 Copy(s, OFF32(p), SIZE32, char); \
152 #define PUSH16(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF16(p), SIZE16)
153 #define PUSH32(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF32(p), SIZE32)
155 #if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
156 # define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_LITTLE_ENDIAN)
157 #elif BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
158 # define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_BIG_ENDIAN)
160 # error "Unsupported byteorder"
161 /* Need to add code here to re-instate mixed endian support. */
164 /* Only to be used inside a loop (see the break) */
165 #define SHIFT_BYTES(utf8, s, strend, buf, len, datumtype) \
168 if (!uni_to_bytes(aTHX_ &s, strend, \
169 (char *) (buf), len, datumtype)) break; \
171 Copy(s, (char *) (buf), len, char); \
176 #define SHIFT_VAR(utf8, s, strend, var, datumtype) \
177 SHIFT_BYTES(utf8, s, strend, &(var), sizeof(var), datumtype)
179 #define PUSH_VAR(utf8, aptr, var) \
180 PUSH_BYTES(utf8, aptr, &(var), sizeof(var))
182 /* Avoid stack overflow due to pathological templates. 100 should be plenty. */
183 #define MAX_SUB_TEMPLATE_LEVEL 100
185 /* flags (note that type modifiers can also be used as flags!) */
186 #define FLAG_WAS_UTF8 0x40
187 #define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
188 #define FLAG_UNPACK_ONLY_ONE 0x10
189 #define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
190 #define FLAG_SLASH 0x04
191 #define FLAG_COMMA 0x02
192 #define FLAG_PACK 0x01
195 S_mul128(pTHX_ SV *sv, U8 m)
198 char *s = SvPV(sv, len);
201 PERL_ARGS_ASSERT_MUL128;
203 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
204 SV * const tmpNew = newSVpvs("0000000000");
206 sv_catsv(tmpNew, sv);
207 SvREFCNT_dec(sv); /* free old sv */
212 while (!*t) /* trailing '\0'? */
215 const U32 i = ((*t - '0') << 7) + m;
216 *(t--) = '0' + (char)(i % 10);
222 /* Explosives and implosives. */
224 #if 'I' == 73 && 'J' == 74
225 /* On an ASCII/ISO kind of system */
226 #define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
229 Some other sort of character set - use memchr() so we don't match
232 #define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
236 #define TYPE_IS_SHRIEKING 0x100
237 #define TYPE_IS_BIG_ENDIAN 0x200
238 #define TYPE_IS_LITTLE_ENDIAN 0x400
239 #define TYPE_IS_PACK 0x800
240 #define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
241 #define TYPE_MODIFIERS(t) ((t) & ~0xFF)
242 #define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
244 # define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
245 # define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
247 # define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
249 #if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
251 # define DO_BO_UNPACK(var, type) \
254 my_swabn(&var, sizeof(var)); \
258 # define DO_BO_PACK(var, type) \
261 my_swabn(&var, sizeof(var)); \
265 # elif BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
267 # define DO_BO_UNPACK(var, type) \
270 my_swabn(&var, sizeof(var)); \
274 # define DO_BO_PACK(var, type) \
277 my_swabn(&var, sizeof(var)); \
282 # define DO_BO_UNPACK(var, type) BO_CANT_DOIT(unpack, type)
283 # define DO_BO_PACK(var, type) BO_CANT_DOIT(pack, type)
286 # define BO_CANT_DOIT(action, type) \
288 switch (TYPE_ENDIANNESS(datumtype)) { \
289 case TYPE_IS_BIG_ENDIAN: \
290 Perl_croak(aTHX_ "Can't %s big-endian %ss on this " \
291 "platform", #action, #type); \
293 case TYPE_IS_LITTLE_ENDIAN: \
294 Perl_croak(aTHX_ "Can't %s little-endian %ss on this " \
295 "platform", #action, #type); \
302 #define PACK_SIZE_CANNOT_CSUM 0x80
303 #define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
304 #define PACK_SIZE_MASK 0x3F
306 #include "packsizetables.c"
309 uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
312 UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
313 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
314 /* We try to process malformed UTF-8 as much as possible (preferably with
315 warnings), but these two mean we make no progress in the string and
316 might enter an infinite loop */
317 if (retlen == (STRLEN) -1 || retlen == 0)
318 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
319 (int) TYPE_NO_MODIFIERS(datumtype));
321 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
322 "Character in '%c' format wrapped in unpack",
323 (int) TYPE_NO_MODIFIERS(datumtype));
330 #define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
331 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
335 uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
339 const char *from = *s;
341 const U32 flags = ckWARN(WARN_UTF8) ?
342 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
343 for (;buf_len > 0; buf_len--) {
344 if (from >= end) return FALSE;
345 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
346 if (retlen == (STRLEN) -1 || retlen == 0) {
347 from += UTF8SKIP(from);
349 } else from += retlen;
354 *(U8 *)buf++ = (U8)val;
356 /* We have enough characters for the buffer. Did we have problems ? */
359 /* Rewalk the string fragment while warning */
361 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
362 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
363 if (ptr >= end) break;
364 utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
366 if (from > end) from = end;
369 Perl_ck_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
370 WARN_PACK : WARN_UNPACK),
371 "Character(s) in '%c' format wrapped in %s",
372 (int) TYPE_NO_MODIFIERS(datumtype),
373 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
380 next_uni_uu(pTHX_ const char **s, const char *end, I32 *out)
384 const UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
385 if (val >= 0x100 || !ISUUCHAR(val) ||
386 retlen == (STRLEN) -1 || retlen == 0) {
390 *out = PL_uudmap[val] & 077;
396 S_bytes_to_uni(const U8 *start, STRLEN len, char *dest) {
397 const U8 * const end = start + len;
399 PERL_ARGS_ASSERT_BYTES_TO_UNI;
401 while (start < end) {
402 const UV uv = NATIVE_TO_ASCII(*start);
403 if (UNI_IS_INVARIANT(uv))
404 *dest++ = (char)(U8)UTF_TO_NATIVE(uv);
406 *dest++ = (char)(U8)UTF8_EIGHT_BIT_HI(uv);
407 *dest++ = (char)(U8)UTF8_EIGHT_BIT_LO(uv);
414 #define PUSH_BYTES(utf8, cur, buf, len) \
417 (cur) = bytes_to_uni((U8 *) buf, len, (cur)); \
419 Copy(buf, cur, len, char); \
424 #define GROWING(utf8, cat, start, cur, in_len) \
426 STRLEN glen = (in_len); \
427 if (utf8) glen *= UTF8_EXPAND; \
428 if ((cur) + glen >= (start) + SvLEN(cat)) { \
429 (start) = sv_exp_grow(cat, glen); \
430 (cur) = (start) + SvCUR(cat); \
434 #define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
436 const STRLEN glen = (in_len); \
438 if (utf8) gl *= UTF8_EXPAND; \
439 if ((cur) + gl >= (start) + SvLEN(cat)) { \
441 SvCUR_set((cat), (cur) - (start)); \
442 (start) = sv_exp_grow(cat, gl); \
443 (cur) = (start) + SvCUR(cat); \
445 PUSH_BYTES(utf8, cur, buf, glen); \
448 #define PUSH_BYTE(utf8, s, byte) \
451 const U8 au8 = (byte); \
452 (s) = bytes_to_uni(&au8, 1, (s)); \
453 } else *(U8 *)(s)++ = (byte); \
456 /* Only to be used inside a loop (see the break) */
457 #define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
460 if (str >= end) break; \
461 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
462 if (retlen == (STRLEN) -1 || retlen == 0) { \
464 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
469 static const char *_action( const tempsym_t* symptr )
471 return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
474 /* Returns the sizeof() struct described by pat */
476 S_measure_struct(pTHX_ tempsym_t* symptr)
480 PERL_ARGS_ASSERT_MEASURE_STRUCT;
482 while (next_symbol(symptr)) {
486 switch (symptr->howlen) {
488 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
492 /* e_no_len and e_number */
493 len = symptr->length;
497 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
500 /* endianness doesn't influence the size of a type */
501 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
503 Perl_croak(aTHX_ "Invalid type '%c' in %s",
504 (int)TYPE_NO_MODIFIERS(symptr->code),
506 case '.' | TYPE_IS_SHRIEKING:
507 case '@' | TYPE_IS_SHRIEKING:
511 case 'U': /* XXXX Is it correct? */
514 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
515 (int) TYPE_NO_MODIFIERS(symptr->code),
522 tempsym_t savsym = *symptr;
523 symptr->patptr = savsym.grpbeg;
524 symptr->patend = savsym.grpend;
525 /* XXXX Theoretically, we need to measure many times at
526 different positions, since the subexpression may contain
527 alignment commands, but be not of aligned length.
528 Need to detect this and croak(). */
529 size = measure_struct(symptr);
533 case 'X' | TYPE_IS_SHRIEKING:
534 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
536 if (!len) /* Avoid division by 0 */
538 len = total % len; /* Assumed: the start is aligned. */
543 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
545 case 'x' | TYPE_IS_SHRIEKING:
546 if (!len) /* Avoid division by 0 */
548 star = total % len; /* Assumed: the start is aligned. */
549 if (star) /* Other portable ways? */
573 size = sizeof(char*);
583 /* locate matching closing parenthesis or bracket
584 * returns char pointer to char after match, or NULL
587 S_group_end(pTHX_ const char *patptr, const char *patend, char ender)
589 PERL_ARGS_ASSERT_GROUP_END;
591 while (patptr < patend) {
592 const char c = *patptr++;
599 while (patptr < patend && *patptr != '\n')
603 patptr = group_end(patptr, patend, ')') + 1;
605 patptr = group_end(patptr, patend, ']') + 1;
607 Perl_croak(aTHX_ "No group ending character '%c' found in template",
613 /* Convert unsigned decimal number to binary.
614 * Expects a pointer to the first digit and address of length variable
615 * Advances char pointer to 1st non-digit char and returns number
618 S_get_num(pTHX_ const char *patptr, I32 *lenptr )
620 I32 len = *patptr++ - '0';
622 PERL_ARGS_ASSERT_GET_NUM;
624 while (isDIGIT(*patptr)) {
625 if (len >= 0x7FFFFFFF/10)
626 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
627 len = (len * 10) + (*patptr++ - '0');
633 /* The marvellous template parsing routine: Using state stored in *symptr,
634 * locates next template code and count
637 S_next_symbol(pTHX_ tempsym_t* symptr )
639 const char* patptr = symptr->patptr;
640 const char* const patend = symptr->patend;
642 PERL_ARGS_ASSERT_NEXT_SYMBOL;
644 symptr->flags &= ~FLAG_SLASH;
646 while (patptr < patend) {
647 if (isSPACE(*patptr))
649 else if (*patptr == '#') {
651 while (patptr < patend && *patptr != '\n')
656 /* We should have found a template code */
657 I32 code = *patptr++ & 0xFF;
658 U32 inherited_modifiers = 0;
660 if (code == ','){ /* grandfather in commas but with a warning */
661 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
662 symptr->flags |= FLAG_COMMA;
663 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
664 "Invalid type ',' in %s", _action( symptr ) );
669 /* for '(', skip to ')' */
671 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
672 Perl_croak(aTHX_ "()-group starts with a count in %s",
674 symptr->grpbeg = patptr;
675 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
676 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
677 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
681 /* look for group modifiers to inherit */
682 if (TYPE_ENDIANNESS(symptr->flags)) {
683 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
684 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
687 /* look for modifiers */
688 while (patptr < patend) {
693 modifier = TYPE_IS_SHRIEKING;
694 allowed = "sSiIlLxXnNvV@.";
697 modifier = TYPE_IS_BIG_ENDIAN;
698 allowed = ENDIANNESS_ALLOWED_TYPES;
701 modifier = TYPE_IS_LITTLE_ENDIAN;
702 allowed = ENDIANNESS_ALLOWED_TYPES;
713 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
714 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
715 allowed, _action( symptr ) );
717 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
718 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
719 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
720 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
721 TYPE_ENDIANNESS_MASK)
722 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
723 *patptr, _action( symptr ) );
725 if ((code & modifier)) {
726 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
727 "Duplicate modifier '%c' after '%c' in %s",
728 *patptr, (int) TYPE_NO_MODIFIERS(code),
736 /* inherit modifiers */
737 code |= inherited_modifiers;
739 /* look for count and/or / */
740 if (patptr < patend) {
741 if (isDIGIT(*patptr)) {
742 patptr = get_num( patptr, &symptr->length );
743 symptr->howlen = e_number;
745 } else if (*patptr == '*') {
747 symptr->howlen = e_star;
749 } else if (*patptr == '[') {
750 const char* lenptr = ++patptr;
751 symptr->howlen = e_number;
752 patptr = group_end( patptr, patend, ']' ) + 1;
753 /* what kind of [] is it? */
754 if (isDIGIT(*lenptr)) {
755 lenptr = get_num( lenptr, &symptr->length );
757 Perl_croak(aTHX_ "Malformed integer in [] in %s",
760 tempsym_t savsym = *symptr;
761 symptr->patend = patptr-1;
762 symptr->patptr = lenptr;
763 savsym.length = measure_struct(symptr);
767 symptr->howlen = e_no_len;
772 while (patptr < patend) {
773 if (isSPACE(*patptr))
775 else if (*patptr == '#') {
777 while (patptr < patend && *patptr != '\n')
782 if (*patptr == '/') {
783 symptr->flags |= FLAG_SLASH;
785 if (patptr < patend &&
786 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
787 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
794 /* at end - no count, no / */
795 symptr->howlen = e_no_len;
800 symptr->patptr = patptr;
804 symptr->patptr = patptr;
809 There is no way to cleanly handle the case where we should process the
810 string per byte in its upgraded form while it's really in downgraded form
811 (e.g. estimates like strend-s as an upper bound for the number of
812 characters left wouldn't work). So if we foresee the need of this
813 (pattern starts with U or contains U0), we want to work on the encoded
814 version of the string. Users are advised to upgrade their pack string
815 themselves if they need to do a lot of unpacks like this on it
818 need_utf8(const char *pat, const char *patend)
822 PERL_ARGS_ASSERT_NEED_UTF8;
824 while (pat < patend) {
827 pat = (const char *) memchr(pat, '\n', patend-pat);
828 if (!pat) return FALSE;
829 } else if (pat[0] == 'U') {
830 if (first || pat[1] == '0') return TRUE;
831 } else first = FALSE;
838 first_symbol(const char *pat, const char *patend) {
839 PERL_ARGS_ASSERT_FIRST_SYMBOL;
841 while (pat < patend) {
842 if (pat[0] != '#') return pat[0];
844 pat = (const char *) memchr(pat, '\n', patend-pat);
852 =for apidoc unpackstring
854 The engine implementing the unpack() Perl function.
856 Using the template pat..patend, this function unpacks the string
857 s..strend into a number of mortal SVs, which it pushes onto the perl
858 argument (@_) stack (so you will need to issue a C<PUTBACK> before and
859 C<SPAGAIN> after the call to this function). It returns the number of
862 The strend and patend pointers should point to the byte following the last
863 character of each string.
865 Although this function returns its values on the perl argument stack, it
866 doesn't take any parameters from that stack (and thus in particular
867 there's no need to do a PUSHMARK before calling it, unlike L</call_pv> for
873 Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
877 PERL_ARGS_ASSERT_UNPACKSTRING;
879 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
880 else if (need_utf8(pat, patend)) {
881 /* We probably should try to avoid this in case a scalar context call
882 wouldn't get to the "U0" */
883 STRLEN len = strend - s;
884 s = (char *) bytes_to_utf8((U8 *) s, &len);
887 flags |= FLAG_DO_UTF8;
890 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
891 flags |= FLAG_PARSE_UTF8;
893 TEMPSYM_INIT(&sym, pat, patend, flags);
895 return unpack_rec(&sym, s, s, strend, NULL );
899 S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
903 const I32 start_sp_offset = SP - PL_stack_base;
908 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
910 bool explicit_length;
911 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
912 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
914 PERL_ARGS_ASSERT_UNPACK_REC;
916 symptr->strbeg = s - strbeg;
918 while (next_symbol(symptr)) {
921 I32 datumtype = symptr->code;
923 /* do first one only unless in list context
924 / is implemented by unpacking the count, then popping it from the
925 stack, so must check that we're not in the middle of a / */
927 && (SP - PL_stack_base == start_sp_offset + 1)
928 && (datumtype != '/') ) /* XXX can this be omitted */
931 switch (howlen = symptr->howlen) {
933 len = strend - strbeg; /* long enough */
936 /* e_no_len and e_number */
937 len = symptr->length;
941 explicit_length = TRUE;
943 beyond = s >= strend;
945 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
947 /* props nonzero means we can process this letter. */
948 const long size = props & PACK_SIZE_MASK;
949 const long howmany = (strend - s) / size;
953 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
954 if (len && unpack_only_one) len = 1;
960 needs_swap = NEEDS_SWAP(datumtype);
962 switch(TYPE_NO_ENDIANNESS(datumtype)) {
964 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
967 if (howlen == e_no_len)
968 len = 16; /* len is not specified */
976 tempsym_t savsym = *symptr;
977 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
978 symptr->flags |= group_modifiers;
979 symptr->patend = savsym.grpend;
980 symptr->previous = &savsym;
983 if (len && unpack_only_one) len = 1;
985 symptr->patptr = savsym.grpbeg;
986 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
987 else symptr->flags &= ~FLAG_PARSE_UTF8;
988 unpack_rec(symptr, s, strbeg, strend, &s);
989 if (s == strend && savsym.howlen == e_star)
990 break; /* No way to continue */
993 savsym.flags = symptr->flags & ~group_modifiers;
997 case '.' | TYPE_IS_SHRIEKING:
1001 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
1002 if (howlen == e_star) from = strbeg;
1003 else if (len <= 0) from = s;
1005 tempsym_t *group = symptr;
1007 while (--len && group) group = group->previous;
1008 from = group ? strbeg + group->strbeg : strbeg;
1011 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
1012 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
1016 case '@' | TYPE_IS_SHRIEKING:
1018 s = strbeg + symptr->strbeg;
1019 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
1023 Perl_croak(aTHX_ "'@' outside of string in unpack");
1028 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
1031 Perl_croak(aTHX_ "'@' outside of string in unpack");
1035 case 'X' | TYPE_IS_SHRIEKING:
1036 if (!len) /* Avoid division by 0 */
1039 const char *hop, *last;
1041 hop = last = strbeg;
1043 hop += UTF8SKIP(hop);
1050 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1054 len = (s - strbeg) % len;
1060 Perl_croak(aTHX_ "'X' outside of string in unpack");
1061 while (--s, UTF8_IS_CONTINUATION(*s)) {
1063 Perl_croak(aTHX_ "'X' outside of string in unpack");
1068 if (len > s - strbeg)
1069 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1073 case 'x' | TYPE_IS_SHRIEKING: {
1075 if (!len) /* Avoid division by 0 */
1077 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1078 else ai32 = (s - strbeg) % len;
1079 if (ai32 == 0) break;
1087 Perl_croak(aTHX_ "'x' outside of string in unpack");
1092 if (len > strend - s)
1093 Perl_croak(aTHX_ "'x' outside of string in unpack");
1098 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1104 /* Preliminary length estimate is assumed done in 'W' */
1105 if (len > strend - s) len = strend - s;
1111 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1112 if (hop >= strend) {
1114 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1119 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1121 } else if (len > strend - s)
1124 if (datumtype == 'Z') {
1125 /* 'Z' strips stuff after first null */
1126 const char *ptr, *end;
1128 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
1129 sv = newSVpvn(s, ptr-s);
1130 if (howlen == e_star) /* exact for 'Z*' */
1131 len = ptr-s + (ptr != strend ? 1 : 0);
1132 } else if (datumtype == 'A') {
1133 /* 'A' strips both nulls and spaces */
1135 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1136 for (ptr = s+len-1; ptr >= s; ptr--)
1137 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
1138 !isSPACE_utf8(ptr)) break;
1139 if (ptr >= s) ptr += UTF8SKIP(ptr);
1142 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1144 for (ptr = s+len-1; ptr >= s; ptr--)
1145 if (*ptr != 0 && !isSPACE(*ptr)) break;
1148 sv = newSVpvn(s, ptr-s);
1149 } else sv = newSVpvn(s, len);
1153 /* Undo any upgrade done due to need_utf8() */
1154 if (!(symptr->flags & FLAG_WAS_UTF8))
1155 sv_utf8_downgrade(sv, 0);
1163 if (howlen == e_star || len > (strend - s) * 8)
1164 len = (strend - s) * 8;
1167 while (len >= 8 && s < strend) {
1168 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
1173 cuv += PL_bitcount[*(U8 *)s++];
1176 if (len && s < strend) {
1178 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1179 if (datumtype == 'b')
1181 if (bits & 1) cuv++;
1186 if (bits & 0x80) cuv++;
1193 sv = sv_2mortal(newSV(len ? len : 1));
1196 if (datumtype == 'b') {
1198 const I32 ai32 = len;
1199 for (len = 0; len < ai32; len++) {
1200 if (len & 7) bits >>= 1;
1202 if (s >= strend) break;
1203 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1204 } else bits = *(U8 *) s++;
1205 *str++ = bits & 1 ? '1' : '0';
1209 const I32 ai32 = len;
1210 for (len = 0; len < ai32; len++) {
1211 if (len & 7) bits <<= 1;
1213 if (s >= strend) break;
1214 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1215 } else bits = *(U8 *) s++;
1216 *str++ = bits & 0x80 ? '1' : '0';
1220 SvCUR_set(sv, str - SvPVX_const(sv));
1227 /* Preliminary length estimate, acceptable for utf8 too */
1228 if (howlen == e_star || len > (strend - s) * 2)
1229 len = (strend - s) * 2;
1231 sv = sv_2mortal(newSV(len ? len : 1));
1235 if (datumtype == 'h') {
1238 for (len = 0; len < ai32; len++) {
1239 if (len & 1) bits >>= 4;
1241 if (s >= strend) break;
1242 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1243 } else bits = * (U8 *) s++;
1245 *str++ = PL_hexdigit[bits & 15];
1249 const I32 ai32 = len;
1250 for (len = 0; len < ai32; len++) {
1251 if (len & 1) bits <<= 4;
1253 if (s >= strend) break;
1254 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1255 } else bits = *(U8 *) s++;
1257 *str++ = PL_hexdigit[(bits >> 4) & 15];
1262 SvCUR_set(sv, str - SvPVX_const(sv));
1269 if (explicit_length)
1270 /* Switch to "character" mode */
1271 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1276 while (len-- > 0 && s < strend) {
1281 aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1282 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1283 if (retlen == (STRLEN) -1 || retlen == 0)
1284 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1288 aint = *(U8 *)(s)++;
1289 if (aint >= 128 && datumtype != 'C') /* fake up signed chars */
1293 else if (checksum > bits_in_uv)
1294 cdouble += (NV)aint;
1302 while (len-- > 0 && s < strend) {
1304 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1305 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1306 if (retlen == (STRLEN) -1 || retlen == 0)
1307 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1311 else if (checksum > bits_in_uv)
1312 cdouble += (NV) val;
1316 } else if (!checksum)
1318 const U8 ch = *(U8 *) s++;
1321 else if (checksum > bits_in_uv)
1322 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1324 while (len-- > 0) cuv += *(U8 *) s++;
1328 if (explicit_length && howlen != e_star) {
1329 /* Switch to "bytes in UTF-8" mode */
1330 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
1332 /* Should be impossible due to the need_utf8() test */
1333 Perl_croak(aTHX_ "U0 mode on a byte string");
1337 if (len > strend - s) len = strend - s;
1339 if (len && unpack_only_one) len = 1;
1343 while (len-- > 0 && s < strend) {
1347 U8 result[UTF8_MAXLEN];
1348 const char *ptr = s;
1350 /* Bug: warns about bad utf8 even if we are short on bytes
1351 and will break out of the loop */
1352 if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1355 len = UTF8SKIP(result);
1356 if (!uni_to_bytes(aTHX_ &ptr, strend,
1357 (char *) &result[1], len-1, 'U')) break;
1358 auv = utf8n_to_uvuni(result, len, &retlen, UTF8_ALLOW_DEFAULT);
1361 auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, UTF8_ALLOW_DEFAULT);
1362 if (retlen == (STRLEN) -1 || retlen == 0)
1363 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1368 else if (checksum > bits_in_uv)
1369 cdouble += (NV) auv;
1374 case 's' | TYPE_IS_SHRIEKING:
1375 #if SHORTSIZE != SIZE16
1378 SHIFT_VAR(utf8, s, strend, ashort, datumtype);
1379 DO_BO_UNPACK(ashort, s);
1382 else if (checksum > bits_in_uv)
1383 cdouble += (NV)ashort;
1395 #if U16SIZE > SIZE16
1398 SHIFT16(utf8, s, strend, &ai16, datumtype);
1399 DO_BO_UNPACK(ai16, 16);
1400 #if U16SIZE > SIZE16
1406 else if (checksum > bits_in_uv)
1407 cdouble += (NV)ai16;
1412 case 'S' | TYPE_IS_SHRIEKING:
1413 #if SHORTSIZE != SIZE16
1415 unsigned short aushort;
1416 SHIFT_VAR(utf8, s, strend, aushort, datumtype);
1417 DO_BO_UNPACK(aushort, s);
1420 else if (checksum > bits_in_uv)
1421 cdouble += (NV)aushort;
1434 #if U16SIZE > SIZE16
1437 SHIFT16(utf8, s, strend, &au16, datumtype);
1438 DO_BO_UNPACK(au16, 16);
1439 if (datumtype == 'n')
1440 au16 = PerlSock_ntohs(au16);
1441 if (datumtype == 'v')
1445 else if (checksum > bits_in_uv)
1446 cdouble += (NV) au16;
1451 case 'v' | TYPE_IS_SHRIEKING:
1452 case 'n' | TYPE_IS_SHRIEKING:
1455 # if U16SIZE > SIZE16
1458 SHIFT16(utf8, s, strend, &ai16, datumtype);
1459 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
1460 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1461 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
1462 ai16 = (I16) vtohs((U16) ai16);
1465 else if (checksum > bits_in_uv)
1466 cdouble += (NV) ai16;
1472 case 'i' | TYPE_IS_SHRIEKING:
1475 SHIFT_VAR(utf8, s, strend, aint, datumtype);
1476 DO_BO_UNPACK(aint, i);
1479 else if (checksum > bits_in_uv)
1480 cdouble += (NV)aint;
1486 case 'I' | TYPE_IS_SHRIEKING:
1489 SHIFT_VAR(utf8, s, strend, auint, datumtype);
1490 DO_BO_UNPACK(auint, i);
1493 else if (checksum > bits_in_uv)
1494 cdouble += (NV)auint;
1502 SHIFT_VAR(utf8, s, strend, aiv, datumtype);
1503 #if IVSIZE == INTSIZE
1504 DO_BO_UNPACK(aiv, i);
1505 #elif IVSIZE == LONGSIZE
1506 DO_BO_UNPACK(aiv, l);
1507 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
1508 DO_BO_UNPACK(aiv, 64);
1510 Perl_croak(aTHX_ "'j' not supported on this platform");
1514 else if (checksum > bits_in_uv)
1523 SHIFT_VAR(utf8, s, strend, auv, datumtype);
1524 #if IVSIZE == INTSIZE
1525 DO_BO_UNPACK(auv, i);
1526 #elif IVSIZE == LONGSIZE
1527 DO_BO_UNPACK(auv, l);
1528 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
1529 DO_BO_UNPACK(auv, 64);
1531 Perl_croak(aTHX_ "'J' not supported on this platform");
1535 else if (checksum > bits_in_uv)
1541 case 'l' | TYPE_IS_SHRIEKING:
1542 #if LONGSIZE != SIZE32
1545 SHIFT_VAR(utf8, s, strend, along, datumtype);
1546 DO_BO_UNPACK(along, l);
1549 else if (checksum > bits_in_uv)
1550 cdouble += (NV)along;
1561 #if U32SIZE > SIZE32
1564 SHIFT32(utf8, s, strend, &ai32, datumtype);
1565 DO_BO_UNPACK(ai32, 32);
1566 #if U32SIZE > SIZE32
1567 if (ai32 > 2147483647) ai32 -= 4294967296;
1571 else if (checksum > bits_in_uv)
1572 cdouble += (NV)ai32;
1577 case 'L' | TYPE_IS_SHRIEKING:
1578 #if LONGSIZE != SIZE32
1580 unsigned long aulong;
1581 SHIFT_VAR(utf8, s, strend, aulong, datumtype);
1582 DO_BO_UNPACK(aulong, l);
1585 else if (checksum > bits_in_uv)
1586 cdouble += (NV)aulong;
1599 #if U32SIZE > SIZE32
1602 SHIFT32(utf8, s, strend, &au32, datumtype);
1603 DO_BO_UNPACK(au32, 32);
1604 if (datumtype == 'N')
1605 au32 = PerlSock_ntohl(au32);
1606 if (datumtype == 'V')
1610 else if (checksum > bits_in_uv)
1611 cdouble += (NV)au32;
1616 case 'V' | TYPE_IS_SHRIEKING:
1617 case 'N' | TYPE_IS_SHRIEKING:
1620 #if U32SIZE > SIZE32
1623 SHIFT32(utf8, s, strend, &ai32, datumtype);
1624 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1625 ai32 = (I32)PerlSock_ntohl((U32)ai32);
1626 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1627 ai32 = (I32)vtohl((U32)ai32);
1630 else if (checksum > bits_in_uv)
1631 cdouble += (NV)ai32;
1639 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
1640 DO_BO_UNPACK(aptr, pointer);
1641 /* newSVpv generates undef if aptr is NULL */
1642 mPUSHs(newSVpv(aptr, 0));
1650 while (len > 0 && s < strend) {
1652 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1653 auv = (auv << 7) | (ch & 0x7f);
1654 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
1662 if (++bytes >= sizeof(UV)) { /* promote to string */
1665 sv = Perl_newSVpvf(aTHX_ "%.*"UVuf, (int)TYPE_DIGITS(UV), auv);
1666 while (s < strend) {
1667 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1668 sv = mul128(sv, (U8)(ch & 0x7f));
1674 t = SvPV_nolen_const(sv);
1683 if ((s >= strend) && bytes)
1684 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
1688 if (symptr->howlen == e_star)
1689 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
1691 if (s + sizeof(char*) <= strend) {
1693 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
1694 DO_BO_UNPACK(aptr, pointer);
1695 /* newSVpvn generates undef if aptr is NULL */
1696 PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP));
1703 SHIFT_VAR(utf8, s, strend, aquad, datumtype);
1704 DO_BO_UNPACK(aquad, 64);
1706 mPUSHs(aquad >= IV_MIN && aquad <= IV_MAX ?
1707 newSViv((IV)aquad) : newSVnv((NV)aquad));
1708 else if (checksum > bits_in_uv)
1709 cdouble += (NV)aquad;
1717 SHIFT_VAR(utf8, s, strend, auquad, datumtype);
1718 DO_BO_UNPACK(auquad, 64);
1720 mPUSHs(auquad <= UV_MAX ?
1721 newSVuv((UV)auquad) : newSVnv((NV)auquad));
1722 else if (checksum > bits_in_uv)
1723 cdouble += (NV)auquad;
1728 #endif /* HAS_QUAD */
1729 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
1733 SHIFT_VAR(utf8, s, strend, afloat, datumtype);
1734 DO_BO_UNPACK(afloat, float);
1744 SHIFT_VAR(utf8, s, strend, adouble, datumtype);
1745 DO_BO_UNPACK(adouble, double);
1755 SHIFT_BYTES(utf8, s, strend, anv.bytes, sizeof(anv.bytes), datumtype);
1756 DO_BO_UNPACK(anv.nv, NV);
1763 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
1767 SHIFT_BYTES(utf8, s, strend, aldouble.bytes, sizeof(aldouble.bytes), datumtype);
1768 DO_BO_UNPACK(aldouble.ld, long double);
1770 mPUSHn(aldouble.ld);
1772 cdouble += aldouble.ld;
1778 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
1779 sv = sv_2mortal(newSV(l));
1780 if (l) SvPOK_on(sv);
1783 while (next_uni_uu(aTHX_ &s, strend, &len)) {
1788 next_uni_uu(aTHX_ &s, strend, &a);
1789 next_uni_uu(aTHX_ &s, strend, &b);
1790 next_uni_uu(aTHX_ &s, strend, &c);
1791 next_uni_uu(aTHX_ &s, strend, &d);
1792 hunk[0] = (char)((a << 2) | (b >> 4));
1793 hunk[1] = (char)((b << 4) | (c >> 2));
1794 hunk[2] = (char)((c << 6) | d);
1796 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
1804 /* possible checksum byte */
1805 const char *skip = s+UTF8SKIP(s);
1806 if (skip < strend && *skip == '\n')
1812 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
1816 len = PL_uudmap[*(U8*)s++] & 077;
1818 if (s < strend && ISUUCHAR(*s))
1819 a = PL_uudmap[*(U8*)s++] & 077;
1822 if (s < strend && ISUUCHAR(*s))
1823 b = PL_uudmap[*(U8*)s++] & 077;
1826 if (s < strend && ISUUCHAR(*s))
1827 c = PL_uudmap[*(U8*)s++] & 077;
1830 if (s < strend && ISUUCHAR(*s))
1831 d = PL_uudmap[*(U8*)s++] & 077;
1834 hunk[0] = (char)((a << 2) | (b >> 4));
1835 hunk[1] = (char)((b << 4) | (c >> 2));
1836 hunk[2] = (char)((c << 6) | d);
1838 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
1843 else /* possible checksum byte */
1844 if (s + 1 < strend && s[1] == '\n')
1854 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
1855 (checksum > bits_in_uv &&
1856 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
1859 anv = (NV) (1 << (checksum & 15));
1860 while (checksum >= 16) {
1864 while (cdouble < 0.0)
1866 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
1867 sv = newSVnv(cdouble);
1870 if (checksum < bits_in_uv) {
1871 UV mask = ((UV)1 << checksum) - 1;
1880 if (symptr->flags & FLAG_SLASH){
1881 if (SP - PL_stack_base - start_sp_offset <= 0)
1883 if( next_symbol(symptr) ){
1884 if( symptr->howlen == e_number )
1885 Perl_croak(aTHX_ "Count after length/code in unpack" );
1887 /* ...end of char buffer then no decent length available */
1888 Perl_croak(aTHX_ "length/code after end of string in unpack" );
1890 /* take top of stack (hope it's numeric) */
1893 Perl_croak(aTHX_ "Negative '/' count in unpack" );
1896 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
1898 datumtype = symptr->code;
1899 explicit_length = FALSE;
1907 return SP - PL_stack_base - start_sp_offset;
1915 I32 gimme = GIMME_V;
1918 const char *pat = SvPV_const(left, llen);
1919 const char *s = SvPV_const(right, rlen);
1920 const char *strend = s + rlen;
1921 const char *patend = pat + llen;
1925 cnt = unpackstring(pat, patend, s, strend,
1926 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
1927 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
1930 if ( !cnt && gimme == G_SCALAR )
1931 PUSHs(&PL_sv_undef);
1936 doencodes(U8 *h, const char *s, I32 len)
1938 *h++ = PL_uuemap[len];
1940 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1941 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
1942 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
1943 *h++ = PL_uuemap[(077 & (s[2] & 077))];
1948 const char r = (len > 1 ? s[1] : '\0');
1949 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1950 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
1951 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
1952 *h++ = PL_uuemap[0];
1959 S_is_an_int(pTHX_ const char *s, STRLEN l)
1961 SV *result = newSVpvn(s, l);
1962 char *const result_c = SvPV_nolen(result); /* convenience */
1963 char *out = result_c;
1967 PERL_ARGS_ASSERT_IS_AN_INT;
1975 SvREFCNT_dec(result);
1998 SvREFCNT_dec(result);
2004 SvCUR_set(result, out - result_c);
2008 /* pnum must be '\0' terminated */
2010 S_div128(pTHX_ SV *pnum, bool *done)
2013 char * const s = SvPV(pnum, len);
2017 PERL_ARGS_ASSERT_DIV128;
2021 const int i = m * 10 + (*t - '0');
2022 const int r = (i >> 7); /* r < 10 */
2030 SvCUR_set(pnum, (STRLEN) (t - s));
2035 =for apidoc packlist
2037 The engine implementing pack() Perl function.
2043 Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, SV **beglist, SV **endlist )
2048 PERL_ARGS_ASSERT_PACKLIST;
2050 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
2052 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
2053 Also make sure any UTF8 flag is loaded */
2054 SvPV_force_nolen(cat);
2056 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
2058 (void)pack_rec( cat, &sym, beglist, endlist );
2061 /* like sv_utf8_upgrade, but also repoint the group start markers */
2063 marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2066 const char *from_ptr, *from_start, *from_end, **marks, **m;
2067 char *to_start, *to_ptr;
2069 if (SvUTF8(sv)) return;
2071 from_start = SvPVX_const(sv);
2072 from_end = from_start + SvCUR(sv);
2073 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2074 if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
2075 if (from_ptr == from_end) {
2076 /* Simple case: no character needs to be changed */
2081 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
2082 Newx(to_start, len, char);
2083 Copy(from_start, to_start, from_ptr-from_start, char);
2084 to_ptr = to_start + (from_ptr-from_start);
2086 Newx(marks, sym_ptr->level+2, const char *);
2087 for (group=sym_ptr; group; group = group->previous)
2088 marks[group->level] = from_start + group->strbeg;
2089 marks[sym_ptr->level+1] = from_end+1;
2090 for (m = marks; *m < from_ptr; m++)
2091 *m = to_start + (*m-from_start);
2093 for (;from_ptr < from_end; from_ptr++) {
2094 while (*m == from_ptr) *m++ = to_ptr;
2095 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
2099 while (*m == from_ptr) *m++ = to_ptr;
2100 if (m != marks + sym_ptr->level+1) {
2103 Perl_croak(aTHX_ "panic: marks beyond string end, m=%p, marks=%p, "
2104 "level=%d", m, marks, sym_ptr->level);
2106 for (group=sym_ptr; group; group = group->previous)
2107 group->strbeg = marks[group->level] - to_start;
2112 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
2113 from_start -= SvIVX(sv);
2116 SvFLAGS(sv) &= ~SVf_OOK;
2119 Safefree(from_start);
2120 SvPV_set(sv, to_start);
2121 SvCUR_set(sv, to_ptr - to_start);
2126 /* Exponential string grower. Makes string extension effectively O(n)
2127 needed says how many extra bytes we need (not counting the final '\0')
2128 Only grows the string if there is an actual lack of space
2131 S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
2132 const STRLEN cur = SvCUR(sv);
2133 const STRLEN len = SvLEN(sv);
2136 PERL_ARGS_ASSERT_SV_EXP_GROW;
2138 if (len - cur > needed) return SvPVX(sv);
2139 extend = needed > len ? needed : len;
2140 return SvGROW(sv, len+extend+1);
2145 S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
2148 tempsym_t lookahead;
2149 I32 items = endlist - beglist;
2150 bool found = next_symbol(symptr);
2151 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
2152 bool warn_utf8 = ckWARN(WARN_UTF8);
2154 PERL_ARGS_ASSERT_PACK_REC;
2156 if (symptr->level == 0 && found && symptr->code == 'U') {
2157 marked_upgrade(aTHX_ cat, symptr);
2158 symptr->flags |= FLAG_DO_UTF8;
2161 symptr->strbeg = SvCUR(cat);
2167 SV *lengthcode = NULL;
2168 I32 datumtype = symptr->code;
2169 howlen_t howlen = symptr->howlen;
2170 char *start = SvPVX(cat);
2171 char *cur = start + SvCUR(cat);
2174 #define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2178 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2182 /* e_no_len and e_number */
2183 len = symptr->length;
2188 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
2190 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2191 /* We can process this letter. */
2192 STRLEN size = props & PACK_SIZE_MASK;
2193 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2197 /* Look ahead for next symbol. Do we have code/code? */
2198 lookahead = *symptr;
2199 found = next_symbol(&lookahead);
2200 if (symptr->flags & FLAG_SLASH) {
2202 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
2203 if (strchr("aAZ", lookahead.code)) {
2204 if (lookahead.howlen == e_number) count = lookahead.length;
2207 count = sv_len_utf8(*beglist);
2210 if (lookahead.code == 'Z') count++;
2213 if (lookahead.howlen == e_number && lookahead.length < items)
2214 count = lookahead.length;
2217 lookahead.howlen = e_number;
2218 lookahead.length = count;
2219 lengthcode = sv_2mortal(newSViv(count));
2222 needs_swap = NEEDS_SWAP(datumtype);
2224 /* Code inside the switch must take care to properly update
2225 cat (CUR length and '\0' termination) if it updated *cur and
2226 doesn't simply leave using break */
2227 switch(TYPE_NO_ENDIANNESS(datumtype)) {
2229 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2230 (int) TYPE_NO_MODIFIERS(datumtype));
2232 Perl_croak(aTHX_ "'%%' may not be used in pack");
2235 case '.' | TYPE_IS_SHRIEKING:
2237 if (howlen == e_star) from = start;
2238 else if (len == 0) from = cur;
2240 tempsym_t *group = symptr;
2242 while (--len && group) group = group->previous;
2243 from = group ? start + group->strbeg : start;
2246 len = SvIV(fromstr);
2248 case '@' | TYPE_IS_SHRIEKING:
2250 from = start + symptr->strbeg;
2252 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
2254 while (len && from < cur) {
2255 from += UTF8SKIP(from);
2259 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2261 /* Here we know from == cur */
2263 GROWING(0, cat, start, cur, len);
2264 Zero(cur, len, char);
2266 } else if (from < cur) {
2269 } else goto no_change;
2277 if (len > 0) goto grow;
2278 if (len == 0) goto no_change;
2285 tempsym_t savsym = *symptr;
2286 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2287 symptr->flags |= group_modifiers;
2288 symptr->patend = savsym.grpend;
2290 symptr->previous = &lookahead;
2293 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2294 else symptr->flags &= ~FLAG_PARSE_UTF8;
2295 was_utf8 = SvUTF8(cat);
2296 symptr->patptr = savsym.grpbeg;
2297 beglist = pack_rec(cat, symptr, beglist, endlist);
2298 if (SvUTF8(cat) != was_utf8)
2299 /* This had better be an upgrade while in utf8==0 mode */
2302 if (savsym.howlen == e_star && beglist == endlist)
2303 break; /* No way to continue */
2305 items = endlist - beglist;
2306 lookahead.flags = symptr->flags & ~group_modifiers;
2309 case 'X' | TYPE_IS_SHRIEKING:
2310 if (!len) /* Avoid division by 0 */
2317 hop += UTF8SKIP(hop);
2324 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2328 len = (cur-start) % len;
2332 if (len < 1) goto no_change;
2336 Perl_croak(aTHX_ "'%c' outside of string in pack",
2337 (int) TYPE_NO_MODIFIERS(datumtype));
2338 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2340 Perl_croak(aTHX_ "'%c' outside of string in pack",
2341 (int) TYPE_NO_MODIFIERS(datumtype));
2347 if (cur - start < len)
2348 Perl_croak(aTHX_ "'%c' outside of string in pack",
2349 (int) TYPE_NO_MODIFIERS(datumtype));
2352 if (cur < start+symptr->strbeg) {
2353 /* Make sure group starts don't point into the void */
2355 const STRLEN length = cur-start;
2356 for (group = symptr;
2357 group && length < group->strbeg;
2358 group = group->previous) group->strbeg = length;
2359 lookahead.strbeg = length;
2362 case 'x' | TYPE_IS_SHRIEKING: {
2364 if (!len) /* Avoid division by 0 */
2366 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
2367 else ai32 = (cur - start) % len;
2368 if (ai32 == 0) goto no_change;
2380 aptr = SvPV_const(fromstr, fromlen);
2381 if (DO_UTF8(fromstr)) {
2382 const char *end, *s;
2384 if (!utf8 && !SvUTF8(cat)) {
2385 marked_upgrade(aTHX_ cat, symptr);
2386 lookahead.flags |= FLAG_DO_UTF8;
2387 lookahead.strbeg = symptr->strbeg;
2390 cur = start + SvCUR(cat);
2392 if (howlen == e_star) {
2393 if (utf8) goto string_copy;
2397 end = aptr + fromlen;
2398 fromlen = datumtype == 'Z' ? len-1 : len;
2399 while ((I32) fromlen > 0 && s < end) {
2404 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2407 if (datumtype == 'Z') len++;
2413 fromlen = len - fromlen;
2414 if (datumtype == 'Z') fromlen--;
2415 if (howlen == e_star) {
2417 if (datumtype == 'Z') len++;
2419 GROWING(0, cat, start, cur, len);
2420 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
2421 datumtype | TYPE_IS_PACK))
2422 Perl_croak(aTHX_ "panic: predicted utf8 length not available, "
2423 "for '%c', aptr=%p end=%p cur=%p, fromlen=%"UVuf,
2424 (int)datumtype, aptr, end, cur, (UV)fromlen);
2428 if (howlen == e_star) {
2430 if (datumtype == 'Z') len++;
2432 if (len <= (I32) fromlen) {
2434 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2436 /* assumes a byte expands to at most UTF8_EXPAND bytes on
2438 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2439 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
2441 while (fromlen > 0) {
2442 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
2448 if (howlen == e_star) {
2450 if (datumtype == 'Z') len++;
2452 if (len <= (I32) fromlen) {
2454 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2456 GROWING(0, cat, start, cur, len);
2457 Copy(aptr, cur, fromlen, char);
2461 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2468 const char *str, *end;
2475 str = SvPV_const(fromstr, fromlen);
2476 end = str + fromlen;
2477 if (DO_UTF8(fromstr)) {
2479 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2481 utf8_source = FALSE;
2482 utf8_flags = 0; /* Unused, but keep compilers happy */
2484 if (howlen == e_star) len = fromlen;
2485 field_len = (len+7)/8;
2486 GROWING(utf8, cat, start, cur, field_len);
2487 if (len > (I32)fromlen) len = fromlen;
2490 if (datumtype == 'B')
2494 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2496 } else bits |= *str++ & 1;
2497 if (l & 7) bits <<= 1;
2499 PUSH_BYTE(utf8, cur, bits);
2504 /* datumtype == 'b' */
2508 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2509 if (val & 1) bits |= 0x80;
2510 } else if (*str++ & 1)
2512 if (l & 7) bits >>= 1;
2514 PUSH_BYTE(utf8, cur, bits);
2520 if (datumtype == 'B')
2521 bits <<= 7 - (l & 7);
2523 bits >>= 7 - (l & 7);
2524 PUSH_BYTE(utf8, cur, bits);
2527 /* Determine how many chars are left in the requested field */
2529 if (howlen == e_star) field_len = 0;
2530 else field_len -= l;
2531 Zero(cur, field_len, char);
2537 const char *str, *end;
2544 str = SvPV_const(fromstr, fromlen);
2545 end = str + fromlen;
2546 if (DO_UTF8(fromstr)) {
2548 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2550 utf8_source = FALSE;
2551 utf8_flags = 0; /* Unused, but keep compilers happy */
2553 if (howlen == e_star) len = fromlen;
2554 field_len = (len+1)/2;
2555 GROWING(utf8, cat, start, cur, field_len);
2556 if (!utf8 && len > (I32)fromlen) len = fromlen;
2559 if (datumtype == 'H')
2563 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2564 if (val < 256 && isALPHA(val))
2565 bits |= (val + 9) & 0xf;
2568 } else if (isALPHA(*str))
2569 bits |= (*str++ + 9) & 0xf;
2571 bits |= *str++ & 0xf;
2572 if (l & 1) bits <<= 4;
2574 PUSH_BYTE(utf8, cur, bits);
2582 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2583 if (val < 256 && isALPHA(val))
2584 bits |= ((val + 9) & 0xf) << 4;
2586 bits |= (val & 0xf) << 4;
2587 } else if (isALPHA(*str))
2588 bits |= ((*str++ + 9) & 0xf) << 4;
2590 bits |= (*str++ & 0xf) << 4;
2591 if (l & 1) bits >>= 4;
2593 PUSH_BYTE(utf8, cur, bits);
2599 PUSH_BYTE(utf8, cur, bits);
2602 /* Determine how many chars are left in the requested field */
2604 if (howlen == e_star) field_len = 0;
2605 else field_len -= l;
2606 Zero(cur, field_len, char);
2614 aiv = SvIV(fromstr);
2615 if ((-128 > aiv || aiv > 127))
2616 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2617 "Character in 'c' format wrapped in pack");
2618 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
2623 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2629 aiv = SvIV(fromstr);
2630 if ((0 > aiv || aiv > 0xff))
2631 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2632 "Character in 'C' format wrapped in pack");
2633 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
2638 U8 in_bytes = (U8)IN_BYTES;
2640 end = start+SvLEN(cat)-1;
2641 if (utf8) end -= UTF8_MAXLEN-1;
2645 auv = SvUV(fromstr);
2646 if (in_bytes) auv = auv % 0x100;
2651 SvCUR_set(cat, cur - start);
2653 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2654 end = start+SvLEN(cat)-UTF8_MAXLEN;
2656 cur = (char *) uvuni_to_utf8_flags((U8 *) cur,
2659 0 : UNICODE_ALLOW_ANY);
2664 SvCUR_set(cat, cur - start);
2665 marked_upgrade(aTHX_ cat, symptr);
2666 lookahead.flags |= FLAG_DO_UTF8;
2667 lookahead.strbeg = symptr->strbeg;
2670 cur = start + SvCUR(cat);
2671 end = start+SvLEN(cat)-UTF8_MAXLEN;
2674 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2675 "Character in 'W' format wrapped in pack");
2680 SvCUR_set(cat, cur - start);
2681 GROWING(0, cat, start, cur, len+1);
2682 end = start+SvLEN(cat)-1;
2684 *(U8 *) cur++ = (U8)auv;
2693 if (!(symptr->flags & FLAG_DO_UTF8)) {
2694 marked_upgrade(aTHX_ cat, symptr);
2695 lookahead.flags |= FLAG_DO_UTF8;
2696 lookahead.strbeg = symptr->strbeg;
2702 end = start+SvLEN(cat);
2703 if (!utf8) end -= UTF8_MAXLEN;
2707 auv = SvUV(fromstr);
2709 U8 buffer[UTF8_MAXLEN], *endb;
2710 endb = uvuni_to_utf8_flags(buffer, auv,
2712 0 : UNICODE_ALLOW_ANY);
2713 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
2715 SvCUR_set(cat, cur - start);
2716 GROWING(0, cat, start, cur,
2717 len+(endb-buffer)*UTF8_EXPAND);
2718 end = start+SvLEN(cat);
2720 cur = bytes_to_uni(buffer, endb-buffer, cur);
2724 SvCUR_set(cat, cur - start);
2725 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2726 end = start+SvLEN(cat)-UTF8_MAXLEN;
2728 cur = (char *) uvuni_to_utf8_flags((U8 *) cur, auv,
2730 0 : UNICODE_ALLOW_ANY);
2735 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
2741 anv = SvNV(fromstr);
2742 # if defined(VMS) && !defined(_IEEE_FP)
2743 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
2744 * on Alpha; fake it if we don't have them.
2748 else if (anv < -FLT_MAX)
2750 else afloat = (float)anv;
2752 afloat = (float)anv;
2754 DO_BO_PACK(afloat, float);
2755 PUSH_VAR(utf8, cur, afloat);
2763 anv = SvNV(fromstr);
2764 # if defined(VMS) && !defined(_IEEE_FP)
2765 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
2766 * on Alpha; fake it if we don't have them.
2770 else if (anv < -DBL_MAX)
2772 else adouble = (double)anv;
2774 adouble = (double)anv;
2776 DO_BO_PACK(adouble, double);
2777 PUSH_VAR(utf8, cur, adouble);
2782 Zero(&anv, 1, NV); /* can be long double with unused bits */
2786 /* to work round a gcc/x86 bug; don't use SvNV */
2787 anv.nv = sv_2nv(fromstr);
2789 anv.nv = SvNV(fromstr);
2791 DO_BO_PACK(anv, NV);
2792 PUSH_BYTES(utf8, cur, anv.bytes, sizeof(anv.bytes));
2796 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2799 /* long doubles can have unused bits, which may be nonzero */
2800 Zero(&aldouble, 1, long double);
2804 /* to work round a gcc/x86 bug; don't use SvNV */
2805 aldouble.ld = (long double)sv_2nv(fromstr);
2807 aldouble.ld = (long double)SvNV(fromstr);
2809 DO_BO_PACK(aldouble, long double);
2810 PUSH_BYTES(utf8, cur, aldouble.bytes, sizeof(aldouble.bytes));
2815 case 'n' | TYPE_IS_SHRIEKING:
2820 ai16 = (I16)SvIV(fromstr);
2821 ai16 = PerlSock_htons(ai16);
2822 PUSH16(utf8, cur, &ai16);
2825 case 'v' | TYPE_IS_SHRIEKING:
2830 ai16 = (I16)SvIV(fromstr);
2832 PUSH16(utf8, cur, &ai16);
2835 case 'S' | TYPE_IS_SHRIEKING:
2836 #if SHORTSIZE != SIZE16
2838 unsigned short aushort;
2840 aushort = SvUV(fromstr);
2841 DO_BO_PACK(aushort, s);
2842 PUSH_VAR(utf8, cur, aushort);
2852 au16 = (U16)SvUV(fromstr);
2853 DO_BO_PACK(au16, 16);
2854 PUSH16(utf8, cur, &au16);
2857 case 's' | TYPE_IS_SHRIEKING:
2858 #if SHORTSIZE != SIZE16
2862 ashort = SvIV(fromstr);
2863 DO_BO_PACK(ashort, s);
2864 PUSH_VAR(utf8, cur, ashort);
2874 ai16 = (I16)SvIV(fromstr);
2875 DO_BO_PACK(ai16, 16);
2876 PUSH16(utf8, cur, &ai16);
2880 case 'I' | TYPE_IS_SHRIEKING:
2884 auint = SvUV(fromstr);
2885 DO_BO_PACK(auint, i);
2886 PUSH_VAR(utf8, cur, auint);
2893 aiv = SvIV(fromstr);
2894 #if IVSIZE == INTSIZE
2896 #elif IVSIZE == LONGSIZE
2898 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
2899 DO_BO_PACK(aiv, 64);
2901 Perl_croak(aTHX_ "'j' not supported on this platform");
2903 PUSH_VAR(utf8, cur, aiv);
2910 auv = SvUV(fromstr);
2911 #if UVSIZE == INTSIZE
2913 #elif UVSIZE == LONGSIZE
2915 #elif defined(HAS_QUAD) && UVSIZE == U64SIZE
2916 DO_BO_PACK(auv, 64);
2918 Perl_croak(aTHX_ "'J' not supported on this platform");
2920 PUSH_VAR(utf8, cur, auv);
2927 anv = SvNV(fromstr);
2931 SvCUR_set(cat, cur - start);
2932 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
2935 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
2936 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
2937 any negative IVs will have already been got by the croak()
2938 above. IOK is untrue for fractions, so we test them
2939 against UV_MAX_P1. */
2940 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
2941 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
2942 char *in = buf + sizeof(buf);
2943 UV auv = SvUV(fromstr);
2946 *--in = (char)((auv & 0x7f) | 0x80);
2949 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
2950 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2951 in, (buf + sizeof(buf)) - in);
2952 } else if (SvPOKp(fromstr))
2954 else if (SvNOKp(fromstr)) {
2955 /* 10**NV_MAX_10_EXP is the largest power of 10
2956 so 10**(NV_MAX_10_EXP+1) is definitely unrepresentable
2957 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
2958 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
2959 And with that many bytes only Inf can overflow.
2960 Some C compilers are strict about integral constant
2961 expressions so we conservatively divide by a slightly
2962 smaller integer instead of multiplying by the exact
2963 floating-point value.
2965 #ifdef NV_MAX_10_EXP
2966 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
2967 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
2969 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
2970 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
2972 char *in = buf + sizeof(buf);
2974 anv = Perl_floor(anv);
2976 const NV next = Perl_floor(anv / 128);
2977 if (in <= buf) /* this cannot happen ;-) */
2978 Perl_croak(aTHX_ "Cannot compress integer in pack");
2979 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
2982 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
2983 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2984 in, (buf + sizeof(buf)) - in);
2993 /* Copy string and check for compliance */
2994 from = SvPV_const(fromstr, len);
2995 if ((norm = is_an_int(from, len)) == NULL)
2996 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
2998 Newx(result, len, char);
3001 while (!done) *--in = div128(norm, &done) | 0x80;
3002 result[len - 1] &= 0x7F; /* clear continue bit */
3003 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3004 in, (result + len) - in);
3006 SvREFCNT_dec(norm); /* free norm */
3011 case 'i' | TYPE_IS_SHRIEKING:
3015 aint = SvIV(fromstr);
3016 DO_BO_PACK(aint, i);
3017 PUSH_VAR(utf8, cur, aint);
3020 case 'N' | TYPE_IS_SHRIEKING:
3025 au32 = SvUV(fromstr);
3026 au32 = PerlSock_htonl(au32);
3027 PUSH32(utf8, cur, &au32);
3030 case 'V' | TYPE_IS_SHRIEKING:
3035 au32 = SvUV(fromstr);
3037 PUSH32(utf8, cur, &au32);
3040 case 'L' | TYPE_IS_SHRIEKING:
3041 #if LONGSIZE != SIZE32
3043 unsigned long aulong;
3045 aulong = SvUV(fromstr);
3046 DO_BO_PACK(aulong, l);
3047 PUSH_VAR(utf8, cur, aulong);
3057 au32 = SvUV(fromstr);
3058 DO_BO_PACK(au32, 32);
3059 PUSH32(utf8, cur, &au32);
3062 case 'l' | TYPE_IS_SHRIEKING:
3063 #if LONGSIZE != SIZE32
3067 along = SvIV(fromstr);
3068 DO_BO_PACK(along, l);
3069 PUSH_VAR(utf8, cur, along);
3079 ai32 = SvIV(fromstr);
3080 DO_BO_PACK(ai32, 32);
3081 PUSH32(utf8, cur, &ai32);
3089 auquad = (Uquad_t) SvUV(fromstr);
3090 DO_BO_PACK(auquad, 64);
3091 PUSH_VAR(utf8, cur, auquad);
3098 aquad = (Quad_t)SvIV(fromstr);
3099 DO_BO_PACK(aquad, 64);
3100 PUSH_VAR(utf8, cur, aquad);
3103 #endif /* HAS_QUAD */
3105 len = 1; /* assume SV is correct length */
3106 GROWING(utf8, cat, start, cur, sizeof(char *));
3113 SvGETMAGIC(fromstr);
3114 if (!SvOK(fromstr)) aptr = NULL;
3116 /* XXX better yet, could spirit away the string to
3117 * a safe spot and hang on to it until the result
3118 * of pack() (and all copies of the result) are
3121 if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
3122 !SvREADONLY(fromstr)))) {
3123 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3124 "Attempt to pack pointer to temporary value");
3126 if (SvPOK(fromstr) || SvNIOK(fromstr))
3127 aptr = SvPV_nomg_const_nolen(fromstr);
3129 aptr = SvPV_force_flags_nolen(fromstr, 0);
3131 DO_BO_PACK(aptr, pointer);
3132 PUSH_VAR(utf8, cur, aptr);
3136 const char *aptr, *aend;
3140 if (len <= 2) len = 45;
3141 else len = len / 3 * 3;
3143 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3144 "Field too wide in 'u' format in pack");
3147 aptr = SvPV_const(fromstr, fromlen);
3148 from_utf8 = DO_UTF8(fromstr);
3150 aend = aptr + fromlen;
3151 fromlen = sv_len_utf8_nomg(fromstr);
3152 } else aend = NULL; /* Unused, but keep compilers happy */
3153 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
3154 while (fromlen > 0) {
3157 U8 hunk[1+63/3*4+1];
3159 if ((I32)fromlen > len)
3165 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3166 'u' | TYPE_IS_PACK)) {
3168 SvCUR_set(cat, cur - start);
3169 Perl_croak(aTHX_ "panic: string is shorter than advertised, "
3170 "aptr=%p, aend=%p, buffer=%p, todo=%ld",
3171 aptr, aend, buffer, (long) todo);
3173 end = doencodes(hunk, buffer, todo);
3175 end = doencodes(hunk, aptr, todo);
3178 PUSH_BYTES(utf8, cur, hunk, end-hunk);
3185 SvCUR_set(cat, cur - start);
3187 *symptr = lookahead;
3196 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
3199 SV *pat_sv = *++MARK;
3200 const char *pat = SvPV_const(pat_sv, fromlen);
3201 const char *patend = pat + fromlen;
3207 packlist(cat, pat, patend, MARK, SP + 1);
3217 * c-indentation-style: bsd
3219 * indent-tabs-mode: nil
3222 * ex: set ts=8 sts=4 sw=4 et: