3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
18 * [p.653 of _The Lord of the Rings_, IV/iv: "Of Herbs and Stewed Rabbit"]
21 /* This file contains pp ("push/pop") functions that
22 * execute the opcodes that make up a perl program. A typical pp function
23 * expects to find its arguments on the stack, and usually pushes its
24 * results onto the stack, hence the 'pp' terminology. Each OP structure
25 * contains a pointer to the relevant pp_foo() function.
27 * This particular file just contains pp_pack() and pp_unpack(). See the
28 * other pp*.c files for the rest of the pp_ functions.
32 #define PERL_IN_PP_PACK_C
35 /* Types used by pack/unpack */
37 e_no_len, /* no length */
38 e_number, /* number, [] */
42 typedef struct tempsym {
43 const char* patptr; /* current template char */
44 const char* patend; /* one after last char */
45 const char* grpbeg; /* 1st char of ()-group */
46 const char* grpend; /* end of ()-group */
47 I32 code; /* template code (!<>) */
48 I32 length; /* length/repeat count */
49 howlen_t howlen; /* how length is given */
50 int level; /* () nesting level */
51 U32 flags; /* /=4, comma=2, pack=1 */
52 /* and group modifiers */
53 STRLEN strbeg; /* offset of group start */
54 struct tempsym *previous; /* previous group */
57 #define TEMPSYM_INIT(symptr, p, e, f) \
59 (symptr)->patptr = (p); \
60 (symptr)->patend = (e); \
61 (symptr)->grpbeg = NULL; \
62 (symptr)->grpend = NULL; \
63 (symptr)->grpend = NULL; \
65 (symptr)->length = 0; \
66 (symptr)->howlen = e_no_len; \
67 (symptr)->level = 0; \
68 (symptr)->flags = (f); \
69 (symptr)->strbeg = 0; \
70 (symptr)->previous = NULL; \
78 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
81 U8 bytes[sizeof(long double)];
88 /* Maximum number of bytes to which a byte can grow due to upgrade */
92 * Offset for integer pack/unpack.
94 * On architectures where I16 and I32 aren't really 16 and 32 bits,
95 * which for now are all Crays, pack and unpack have to play games.
99 * These values are required for portability of pack() output.
100 * If they're not right on your machine, then pack() and unpack()
101 * wouldn't work right anyway; you'll need to apply the Cray hack.
102 * (I'd like to check them with #if, but you can't use sizeof() in
103 * the preprocessor.) --???
106 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
107 defines are now in config.h. --Andy Dougherty April 1998
112 /* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
115 #if U16SIZE > SIZE16 || U32SIZE > SIZE32
116 # if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
117 # define OFF16(p) ((char*)(p))
118 # define OFF32(p) ((char*)(p))
120 # if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
121 # define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
122 # define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
124 ++++ bad cray byte order
128 # define OFF16(p) ((char *) (p))
129 # define OFF32(p) ((char *) (p))
132 /* Only to be used inside a loop (see the break) */
133 #define SHIFT16(utf8, s, strend, p, datumtype) STMT_START { \
135 if (!uni_to_bytes(aTHX_ &(s), strend, OFF16(p), SIZE16, datumtype)) break; \
137 Copy(s, OFF16(p), SIZE16, char); \
142 /* Only to be used inside a loop (see the break) */
143 #define SHIFT32(utf8, s, strend, p, datumtype) STMT_START { \
145 if (!uni_to_bytes(aTHX_ &(s), strend, OFF32(p), SIZE32, datumtype)) break; \
147 Copy(s, OFF32(p), SIZE32, char); \
152 #define PUSH16(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF16(p), SIZE16)
153 #define PUSH32(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF32(p), SIZE32)
155 /* Only to be used inside a loop (see the break) */
156 #define SHIFT_BYTES(utf8, s, strend, buf, len, datumtype) \
159 if (!uni_to_bytes(aTHX_ &s, strend, \
160 (char *) (buf), len, datumtype)) break; \
162 Copy(s, (char *) (buf), len, char); \
167 #define SHIFT_VAR(utf8, s, strend, var, datumtype) \
168 SHIFT_BYTES(utf8, s, strend, &(var), sizeof(var), datumtype)
170 #define PUSH_VAR(utf8, aptr, var) \
171 PUSH_BYTES(utf8, aptr, &(var), sizeof(var))
173 /* Avoid stack overflow due to pathological templates. 100 should be plenty. */
174 #define MAX_SUB_TEMPLATE_LEVEL 100
176 /* flags (note that type modifiers can also be used as flags!) */
177 #define FLAG_WAS_UTF8 0x40
178 #define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
179 #define FLAG_UNPACK_ONLY_ONE 0x10
180 #define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
181 #define FLAG_SLASH 0x04
182 #define FLAG_COMMA 0x02
183 #define FLAG_PACK 0x01
186 S_mul128(pTHX_ SV *sv, U8 m)
189 char *s = SvPV(sv, len);
192 PERL_ARGS_ASSERT_MUL128;
194 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
195 SV * const tmpNew = newSVpvs("0000000000");
197 sv_catsv(tmpNew, sv);
198 SvREFCNT_dec(sv); /* free old sv */
203 while (!*t) /* trailing '\0'? */
206 const U32 i = ((*t - '0') << 7) + m;
207 *(t--) = '0' + (char)(i % 10);
213 /* Explosives and implosives. */
215 #if 'I' == 73 && 'J' == 74
216 /* On an ASCII/ISO kind of system */
217 #define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
220 Some other sort of character set - use memchr() so we don't match
223 #define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
227 #define TYPE_IS_SHRIEKING 0x100
228 #define TYPE_IS_BIG_ENDIAN 0x200
229 #define TYPE_IS_LITTLE_ENDIAN 0x400
230 #define TYPE_IS_PACK 0x800
231 #define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
232 #define TYPE_MODIFIERS(t) ((t) & ~0xFF)
233 #define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
235 # define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
236 # define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
238 # define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
240 #if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
242 # define DO_BO_UNPACK(var, type) \
244 if (TYPE_ENDIANNESS(datumtype) == TYPE_IS_LITTLE_ENDIAN) { \
245 my_letohn(&var, sizeof(var)); \
249 # define DO_BO_PACK(var, type) \
251 if (TYPE_ENDIANNESS(datumtype) == TYPE_IS_LITTLE_ENDIAN) { \
252 my_htolen(&var, sizeof(var)); \
256 # elif BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
258 # define DO_BO_UNPACK(var, type) \
260 if (TYPE_ENDIANNESS(datumtype) == TYPE_IS_BIG_ENDIAN) { \
261 my_betohn(&var, sizeof(var)); \
265 # define DO_BO_PACK(var, type) \
267 if (TYPE_ENDIANNESS(datumtype) == TYPE_IS_BIG_ENDIAN) { \
268 my_htoben(&var, sizeof(var)); \
273 # define DO_BO_UNPACK(var, type) BO_CANT_DOIT(unpack, type)
274 # define DO_BO_PACK(var, type) BO_CANT_DOIT(pack, type)
277 # define BO_CANT_DOIT(action, type) \
279 switch (TYPE_ENDIANNESS(datumtype)) { \
280 case TYPE_IS_BIG_ENDIAN: \
281 Perl_croak(aTHX_ "Can't %s big-endian %ss on this " \
282 "platform", #action, #type); \
284 case TYPE_IS_LITTLE_ENDIAN: \
285 Perl_croak(aTHX_ "Can't %s little-endian %ss on this " \
286 "platform", #action, #type); \
293 #define PACK_SIZE_CANNOT_CSUM 0x80
294 #define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
295 #define PACK_SIZE_MASK 0x3F
297 #include "packsizetables.c"
300 uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
303 UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
304 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
305 /* We try to process malformed UTF-8 as much as possible (preferably with
306 warnings), but these two mean we make no progress in the string and
307 might enter an infinite loop */
308 if (retlen == (STRLEN) -1 || retlen == 0)
309 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
310 (int) TYPE_NO_MODIFIERS(datumtype));
312 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
313 "Character in '%c' format wrapped in unpack",
314 (int) TYPE_NO_MODIFIERS(datumtype));
321 #define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
322 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
326 uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
330 const char *from = *s;
332 const U32 flags = ckWARN(WARN_UTF8) ?
333 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
334 for (;buf_len > 0; buf_len--) {
335 if (from >= end) return FALSE;
336 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
337 if (retlen == (STRLEN) -1 || retlen == 0) {
338 from += UTF8SKIP(from);
340 } else from += retlen;
345 *(U8 *)buf++ = (U8)val;
347 /* We have enough characters for the buffer. Did we have problems ? */
350 /* Rewalk the string fragment while warning */
352 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
353 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
354 if (ptr >= end) break;
355 utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
357 if (from > end) from = end;
360 Perl_ck_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
361 WARN_PACK : WARN_UNPACK),
362 "Character(s) in '%c' format wrapped in %s",
363 (int) TYPE_NO_MODIFIERS(datumtype),
364 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
371 next_uni_uu(pTHX_ const char **s, const char *end, I32 *out)
375 const UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
376 if (val >= 0x100 || !ISUUCHAR(val) ||
377 retlen == (STRLEN) -1 || retlen == 0) {
381 *out = PL_uudmap[val] & 077;
387 S_bytes_to_uni(const U8 *start, STRLEN len, char *dest) {
388 const U8 * const end = start + len;
390 PERL_ARGS_ASSERT_BYTES_TO_UNI;
392 while (start < end) {
393 const UV uv = NATIVE_TO_ASCII(*start);
394 if (UNI_IS_INVARIANT(uv))
395 *dest++ = (char)(U8)UTF_TO_NATIVE(uv);
397 *dest++ = (char)(U8)UTF8_EIGHT_BIT_HI(uv);
398 *dest++ = (char)(U8)UTF8_EIGHT_BIT_LO(uv);
405 #define PUSH_BYTES(utf8, cur, buf, len) \
408 (cur) = bytes_to_uni((U8 *) buf, len, (cur)); \
410 Copy(buf, cur, len, char); \
415 #define GROWING(utf8, cat, start, cur, in_len) \
417 STRLEN glen = (in_len); \
418 if (utf8) glen *= UTF8_EXPAND; \
419 if ((cur) + glen >= (start) + SvLEN(cat)) { \
420 (start) = sv_exp_grow(cat, glen); \
421 (cur) = (start) + SvCUR(cat); \
425 #define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
427 const STRLEN glen = (in_len); \
429 if (utf8) gl *= UTF8_EXPAND; \
430 if ((cur) + gl >= (start) + SvLEN(cat)) { \
432 SvCUR_set((cat), (cur) - (start)); \
433 (start) = sv_exp_grow(cat, gl); \
434 (cur) = (start) + SvCUR(cat); \
436 PUSH_BYTES(utf8, cur, buf, glen); \
439 #define PUSH_BYTE(utf8, s, byte) \
442 const U8 au8 = (byte); \
443 (s) = bytes_to_uni(&au8, 1, (s)); \
444 } else *(U8 *)(s)++ = (byte); \
447 /* Only to be used inside a loop (see the break) */
448 #define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
451 if (str >= end) break; \
452 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
453 if (retlen == (STRLEN) -1 || retlen == 0) { \
455 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
460 static const char *_action( const tempsym_t* symptr )
462 return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
465 /* Returns the sizeof() struct described by pat */
467 S_measure_struct(pTHX_ tempsym_t* symptr)
471 PERL_ARGS_ASSERT_MEASURE_STRUCT;
473 while (next_symbol(symptr)) {
477 switch (symptr->howlen) {
479 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
483 /* e_no_len and e_number */
484 len = symptr->length;
488 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
491 /* endianness doesn't influence the size of a type */
492 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
494 Perl_croak(aTHX_ "Invalid type '%c' in %s",
495 (int)TYPE_NO_MODIFIERS(symptr->code),
497 case '.' | TYPE_IS_SHRIEKING:
498 case '@' | TYPE_IS_SHRIEKING:
502 case 'U': /* XXXX Is it correct? */
505 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
506 (int) TYPE_NO_MODIFIERS(symptr->code),
513 tempsym_t savsym = *symptr;
514 symptr->patptr = savsym.grpbeg;
515 symptr->patend = savsym.grpend;
516 /* XXXX Theoretically, we need to measure many times at
517 different positions, since the subexpression may contain
518 alignment commands, but be not of aligned length.
519 Need to detect this and croak(). */
520 size = measure_struct(symptr);
524 case 'X' | TYPE_IS_SHRIEKING:
525 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
527 if (!len) /* Avoid division by 0 */
529 len = total % len; /* Assumed: the start is aligned. */
534 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
536 case 'x' | TYPE_IS_SHRIEKING:
537 if (!len) /* Avoid division by 0 */
539 star = total % len; /* Assumed: the start is aligned. */
540 if (star) /* Other portable ways? */
564 size = sizeof(char*);
574 /* locate matching closing parenthesis or bracket
575 * returns char pointer to char after match, or NULL
578 S_group_end(pTHX_ const char *patptr, const char *patend, char ender)
580 PERL_ARGS_ASSERT_GROUP_END;
582 while (patptr < patend) {
583 const char c = *patptr++;
590 while (patptr < patend && *patptr != '\n')
594 patptr = group_end(patptr, patend, ')') + 1;
596 patptr = group_end(patptr, patend, ']') + 1;
598 Perl_croak(aTHX_ "No group ending character '%c' found in template",
604 /* Convert unsigned decimal number to binary.
605 * Expects a pointer to the first digit and address of length variable
606 * Advances char pointer to 1st non-digit char and returns number
609 S_get_num(pTHX_ const char *patptr, I32 *lenptr )
611 I32 len = *patptr++ - '0';
613 PERL_ARGS_ASSERT_GET_NUM;
615 while (isDIGIT(*patptr)) {
616 if (len >= 0x7FFFFFFF/10)
617 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
618 len = (len * 10) + (*patptr++ - '0');
624 /* The marvellous template parsing routine: Using state stored in *symptr,
625 * locates next template code and count
628 S_next_symbol(pTHX_ tempsym_t* symptr )
630 const char* patptr = symptr->patptr;
631 const char* const patend = symptr->patend;
633 PERL_ARGS_ASSERT_NEXT_SYMBOL;
635 symptr->flags &= ~FLAG_SLASH;
637 while (patptr < patend) {
638 if (isSPACE(*patptr))
640 else if (*patptr == '#') {
642 while (patptr < patend && *patptr != '\n')
647 /* We should have found a template code */
648 I32 code = *patptr++ & 0xFF;
649 U32 inherited_modifiers = 0;
651 if (code == ','){ /* grandfather in commas but with a warning */
652 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
653 symptr->flags |= FLAG_COMMA;
654 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
655 "Invalid type ',' in %s", _action( symptr ) );
660 /* for '(', skip to ')' */
662 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
663 Perl_croak(aTHX_ "()-group starts with a count in %s",
665 symptr->grpbeg = patptr;
666 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
667 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
668 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
672 /* look for group modifiers to inherit */
673 if (TYPE_ENDIANNESS(symptr->flags)) {
674 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
675 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
678 /* look for modifiers */
679 while (patptr < patend) {
684 modifier = TYPE_IS_SHRIEKING;
685 allowed = "sSiIlLxXnNvV@.";
688 modifier = TYPE_IS_BIG_ENDIAN;
689 allowed = ENDIANNESS_ALLOWED_TYPES;
692 modifier = TYPE_IS_LITTLE_ENDIAN;
693 allowed = ENDIANNESS_ALLOWED_TYPES;
704 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
705 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
706 allowed, _action( symptr ) );
708 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
709 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
710 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
711 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
712 TYPE_ENDIANNESS_MASK)
713 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
714 *patptr, _action( symptr ) );
716 if ((code & modifier)) {
717 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
718 "Duplicate modifier '%c' after '%c' in %s",
719 *patptr, (int) TYPE_NO_MODIFIERS(code),
727 /* inherit modifiers */
728 code |= inherited_modifiers;
730 /* look for count and/or / */
731 if (patptr < patend) {
732 if (isDIGIT(*patptr)) {
733 patptr = get_num( patptr, &symptr->length );
734 symptr->howlen = e_number;
736 } else if (*patptr == '*') {
738 symptr->howlen = e_star;
740 } else if (*patptr == '[') {
741 const char* lenptr = ++patptr;
742 symptr->howlen = e_number;
743 patptr = group_end( patptr, patend, ']' ) + 1;
744 /* what kind of [] is it? */
745 if (isDIGIT(*lenptr)) {
746 lenptr = get_num( lenptr, &symptr->length );
748 Perl_croak(aTHX_ "Malformed integer in [] in %s",
751 tempsym_t savsym = *symptr;
752 symptr->patend = patptr-1;
753 symptr->patptr = lenptr;
754 savsym.length = measure_struct(symptr);
758 symptr->howlen = e_no_len;
763 while (patptr < patend) {
764 if (isSPACE(*patptr))
766 else if (*patptr == '#') {
768 while (patptr < patend && *patptr != '\n')
773 if (*patptr == '/') {
774 symptr->flags |= FLAG_SLASH;
776 if (patptr < patend &&
777 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
778 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
785 /* at end - no count, no / */
786 symptr->howlen = e_no_len;
791 symptr->patptr = patptr;
795 symptr->patptr = patptr;
800 There is no way to cleanly handle the case where we should process the
801 string per byte in its upgraded form while it's really in downgraded form
802 (e.g. estimates like strend-s as an upper bound for the number of
803 characters left wouldn't work). So if we foresee the need of this
804 (pattern starts with U or contains U0), we want to work on the encoded
805 version of the string. Users are advised to upgrade their pack string
806 themselves if they need to do a lot of unpacks like this on it
809 need_utf8(const char *pat, const char *patend)
813 PERL_ARGS_ASSERT_NEED_UTF8;
815 while (pat < patend) {
818 pat = (const char *) memchr(pat, '\n', patend-pat);
819 if (!pat) return FALSE;
820 } else if (pat[0] == 'U') {
821 if (first || pat[1] == '0') return TRUE;
822 } else first = FALSE;
829 first_symbol(const char *pat, const char *patend) {
830 PERL_ARGS_ASSERT_FIRST_SYMBOL;
832 while (pat < patend) {
833 if (pat[0] != '#') return pat[0];
835 pat = (const char *) memchr(pat, '\n', patend-pat);
843 =for apidoc unpackstring
845 The engine implementing the unpack() Perl function.
847 Using the template pat..patend, this function unpacks the string
848 s..strend into a number of mortal SVs, which it pushes onto the perl
849 argument (@_) stack (so you will need to issue a C<PUTBACK> before and
850 C<SPAGAIN> after the call to this function). It returns the number of
853 The strend and patend pointers should point to the byte following the last
854 character of each string.
856 Although this function returns its values on the perl argument stack, it
857 doesn't take any parameters from that stack (and thus in particular
858 there's no need to do a PUSHMARK before calling it, unlike L</call_pv> for
864 Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
868 PERL_ARGS_ASSERT_UNPACKSTRING;
870 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
871 else if (need_utf8(pat, patend)) {
872 /* We probably should try to avoid this in case a scalar context call
873 wouldn't get to the "U0" */
874 STRLEN len = strend - s;
875 s = (char *) bytes_to_utf8((U8 *) s, &len);
878 flags |= FLAG_DO_UTF8;
881 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
882 flags |= FLAG_PARSE_UTF8;
884 TEMPSYM_INIT(&sym, pat, patend, flags);
886 return unpack_rec(&sym, s, s, strend, NULL );
890 S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
894 const I32 start_sp_offset = SP - PL_stack_base;
899 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
901 bool explicit_length;
902 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
903 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
905 PERL_ARGS_ASSERT_UNPACK_REC;
907 symptr->strbeg = s - strbeg;
909 while (next_symbol(symptr)) {
912 I32 datumtype = symptr->code;
913 /* do first one only unless in list context
914 / is implemented by unpacking the count, then popping it from the
915 stack, so must check that we're not in the middle of a / */
917 && (SP - PL_stack_base == start_sp_offset + 1)
918 && (datumtype != '/') ) /* XXX can this be omitted */
921 switch (howlen = symptr->howlen) {
923 len = strend - strbeg; /* long enough */
926 /* e_no_len and e_number */
927 len = symptr->length;
931 explicit_length = TRUE;
933 beyond = s >= strend;
935 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
937 /* props nonzero means we can process this letter. */
938 const long size = props & PACK_SIZE_MASK;
939 const long howmany = (strend - s) / size;
943 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
944 if (len && unpack_only_one) len = 1;
950 switch(TYPE_NO_ENDIANNESS(datumtype)) {
952 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
955 if (howlen == e_no_len)
956 len = 16; /* len is not specified */
964 tempsym_t savsym = *symptr;
965 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
966 symptr->flags |= group_modifiers;
967 symptr->patend = savsym.grpend;
968 symptr->previous = &savsym;
971 if (len && unpack_only_one) len = 1;
973 symptr->patptr = savsym.grpbeg;
974 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
975 else symptr->flags &= ~FLAG_PARSE_UTF8;
976 unpack_rec(symptr, s, strbeg, strend, &s);
977 if (s == strend && savsym.howlen == e_star)
978 break; /* No way to continue */
981 savsym.flags = symptr->flags & ~group_modifiers;
985 case '.' | TYPE_IS_SHRIEKING:
989 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
990 if (howlen == e_star) from = strbeg;
991 else if (len <= 0) from = s;
993 tempsym_t *group = symptr;
995 while (--len && group) group = group->previous;
996 from = group ? strbeg + group->strbeg : strbeg;
999 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
1000 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
1004 case '@' | TYPE_IS_SHRIEKING:
1006 s = strbeg + symptr->strbeg;
1007 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
1011 Perl_croak(aTHX_ "'@' outside of string in unpack");
1016 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
1019 Perl_croak(aTHX_ "'@' outside of string in unpack");
1023 case 'X' | TYPE_IS_SHRIEKING:
1024 if (!len) /* Avoid division by 0 */
1027 const char *hop, *last;
1029 hop = last = strbeg;
1031 hop += UTF8SKIP(hop);
1038 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1042 len = (s - strbeg) % len;
1048 Perl_croak(aTHX_ "'X' outside of string in unpack");
1049 while (--s, UTF8_IS_CONTINUATION(*s)) {
1051 Perl_croak(aTHX_ "'X' outside of string in unpack");
1056 if (len > s - strbeg)
1057 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1061 case 'x' | TYPE_IS_SHRIEKING: {
1063 if (!len) /* Avoid division by 0 */
1065 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1066 else ai32 = (s - strbeg) % len;
1067 if (ai32 == 0) break;
1075 Perl_croak(aTHX_ "'x' outside of string in unpack");
1080 if (len > strend - s)
1081 Perl_croak(aTHX_ "'x' outside of string in unpack");
1086 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1092 /* Preliminary length estimate is assumed done in 'W' */
1093 if (len > strend - s) len = strend - s;
1099 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1100 if (hop >= strend) {
1102 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1107 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1109 } else if (len > strend - s)
1112 if (datumtype == 'Z') {
1113 /* 'Z' strips stuff after first null */
1114 const char *ptr, *end;
1116 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
1117 sv = newSVpvn(s, ptr-s);
1118 if (howlen == e_star) /* exact for 'Z*' */
1119 len = ptr-s + (ptr != strend ? 1 : 0);
1120 } else if (datumtype == 'A') {
1121 /* 'A' strips both nulls and spaces */
1123 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1124 for (ptr = s+len-1; ptr >= s; ptr--)
1125 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
1126 !isSPACE_utf8(ptr)) break;
1127 if (ptr >= s) ptr += UTF8SKIP(ptr);
1130 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1132 for (ptr = s+len-1; ptr >= s; ptr--)
1133 if (*ptr != 0 && !isSPACE(*ptr)) break;
1136 sv = newSVpvn(s, ptr-s);
1137 } else sv = newSVpvn(s, len);
1141 /* Undo any upgrade done due to need_utf8() */
1142 if (!(symptr->flags & FLAG_WAS_UTF8))
1143 sv_utf8_downgrade(sv, 0);
1151 if (howlen == e_star || len > (strend - s) * 8)
1152 len = (strend - s) * 8;
1155 while (len >= 8 && s < strend) {
1156 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
1161 cuv += PL_bitcount[*(U8 *)s++];
1164 if (len && s < strend) {
1166 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1167 if (datumtype == 'b')
1169 if (bits & 1) cuv++;
1174 if (bits & 0x80) cuv++;
1181 sv = sv_2mortal(newSV(len ? len : 1));
1184 if (datumtype == 'b') {
1186 const I32 ai32 = len;
1187 for (len = 0; len < ai32; len++) {
1188 if (len & 7) bits >>= 1;
1190 if (s >= strend) break;
1191 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1192 } else bits = *(U8 *) s++;
1193 *str++ = bits & 1 ? '1' : '0';
1197 const I32 ai32 = len;
1198 for (len = 0; len < ai32; len++) {
1199 if (len & 7) bits <<= 1;
1201 if (s >= strend) break;
1202 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1203 } else bits = *(U8 *) s++;
1204 *str++ = bits & 0x80 ? '1' : '0';
1208 SvCUR_set(sv, str - SvPVX_const(sv));
1215 /* Preliminary length estimate, acceptable for utf8 too */
1216 if (howlen == e_star || len > (strend - s) * 2)
1217 len = (strend - s) * 2;
1219 sv = sv_2mortal(newSV(len ? len : 1));
1223 if (datumtype == 'h') {
1226 for (len = 0; len < ai32; len++) {
1227 if (len & 1) bits >>= 4;
1229 if (s >= strend) break;
1230 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1231 } else bits = * (U8 *) s++;
1233 *str++ = PL_hexdigit[bits & 15];
1237 const I32 ai32 = len;
1238 for (len = 0; len < ai32; len++) {
1239 if (len & 1) bits <<= 4;
1241 if (s >= strend) break;
1242 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1243 } else bits = *(U8 *) s++;
1245 *str++ = PL_hexdigit[(bits >> 4) & 15];
1250 SvCUR_set(sv, str - SvPVX_const(sv));
1257 if (explicit_length)
1258 /* Switch to "character" mode */
1259 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1264 while (len-- > 0 && s < strend) {
1269 aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1270 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1271 if (retlen == (STRLEN) -1 || retlen == 0)
1272 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1276 aint = *(U8 *)(s)++;
1277 if (aint >= 128 && datumtype != 'C') /* fake up signed chars */
1281 else if (checksum > bits_in_uv)
1282 cdouble += (NV)aint;
1290 while (len-- > 0 && s < strend) {
1292 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1293 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1294 if (retlen == (STRLEN) -1 || retlen == 0)
1295 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1299 else if (checksum > bits_in_uv)
1300 cdouble += (NV) val;
1304 } else if (!checksum)
1306 const U8 ch = *(U8 *) s++;
1309 else if (checksum > bits_in_uv)
1310 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1312 while (len-- > 0) cuv += *(U8 *) s++;
1316 if (explicit_length && howlen != e_star) {
1317 /* Switch to "bytes in UTF-8" mode */
1318 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
1320 /* Should be impossible due to the need_utf8() test */
1321 Perl_croak(aTHX_ "U0 mode on a byte string");
1325 if (len > strend - s) len = strend - s;
1327 if (len && unpack_only_one) len = 1;
1331 while (len-- > 0 && s < strend) {
1335 U8 result[UTF8_MAXLEN];
1336 const char *ptr = s;
1338 /* Bug: warns about bad utf8 even if we are short on bytes
1339 and will break out of the loop */
1340 if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1343 len = UTF8SKIP(result);
1344 if (!uni_to_bytes(aTHX_ &ptr, strend,
1345 (char *) &result[1], len-1, 'U')) break;
1346 auv = utf8n_to_uvuni(result, len, &retlen, UTF8_ALLOW_DEFAULT);
1349 auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, UTF8_ALLOW_DEFAULT);
1350 if (retlen == (STRLEN) -1 || retlen == 0)
1351 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1356 else if (checksum > bits_in_uv)
1357 cdouble += (NV) auv;
1362 case 's' | TYPE_IS_SHRIEKING:
1363 #if SHORTSIZE != SIZE16
1366 SHIFT_VAR(utf8, s, strend, ashort, datumtype);
1367 DO_BO_UNPACK(ashort, s);
1370 else if (checksum > bits_in_uv)
1371 cdouble += (NV)ashort;
1383 #if U16SIZE > SIZE16
1386 SHIFT16(utf8, s, strend, &ai16, datumtype);
1387 DO_BO_UNPACK(ai16, 16);
1388 #if U16SIZE > SIZE16
1394 else if (checksum > bits_in_uv)
1395 cdouble += (NV)ai16;
1400 case 'S' | TYPE_IS_SHRIEKING:
1401 #if SHORTSIZE != SIZE16
1403 unsigned short aushort;
1404 SHIFT_VAR(utf8, s, strend, aushort, datumtype);
1405 DO_BO_UNPACK(aushort, s);
1408 else if (checksum > bits_in_uv)
1409 cdouble += (NV)aushort;
1422 #if U16SIZE > SIZE16
1425 SHIFT16(utf8, s, strend, &au16, datumtype);
1426 DO_BO_UNPACK(au16, 16);
1427 if (datumtype == 'n')
1428 au16 = PerlSock_ntohs(au16);
1429 if (datumtype == 'v')
1433 else if (checksum > bits_in_uv)
1434 cdouble += (NV) au16;
1439 case 'v' | TYPE_IS_SHRIEKING:
1440 case 'n' | TYPE_IS_SHRIEKING:
1443 # if U16SIZE > SIZE16
1446 SHIFT16(utf8, s, strend, &ai16, datumtype);
1447 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
1448 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1449 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
1450 ai16 = (I16) vtohs((U16) ai16);
1453 else if (checksum > bits_in_uv)
1454 cdouble += (NV) ai16;
1460 case 'i' | TYPE_IS_SHRIEKING:
1463 SHIFT_VAR(utf8, s, strend, aint, datumtype);
1464 DO_BO_UNPACK(aint, i);
1467 else if (checksum > bits_in_uv)
1468 cdouble += (NV)aint;
1474 case 'I' | TYPE_IS_SHRIEKING:
1477 SHIFT_VAR(utf8, s, strend, auint, datumtype);
1478 DO_BO_UNPACK(auint, i);
1481 else if (checksum > bits_in_uv)
1482 cdouble += (NV)auint;
1490 SHIFT_VAR(utf8, s, strend, aiv, datumtype);
1491 #if IVSIZE == INTSIZE
1492 DO_BO_UNPACK(aiv, i);
1493 #elif IVSIZE == LONGSIZE
1494 DO_BO_UNPACK(aiv, l);
1495 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
1496 DO_BO_UNPACK(aiv, 64);
1498 Perl_croak(aTHX_ "'j' not supported on this platform");
1502 else if (checksum > bits_in_uv)
1511 SHIFT_VAR(utf8, s, strend, auv, datumtype);
1512 #if IVSIZE == INTSIZE
1513 DO_BO_UNPACK(auv, i);
1514 #elif IVSIZE == LONGSIZE
1515 DO_BO_UNPACK(auv, l);
1516 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
1517 DO_BO_UNPACK(auv, 64);
1519 Perl_croak(aTHX_ "'J' not supported on this platform");
1523 else if (checksum > bits_in_uv)
1529 case 'l' | TYPE_IS_SHRIEKING:
1530 #if LONGSIZE != SIZE32
1533 SHIFT_VAR(utf8, s, strend, along, datumtype);
1534 DO_BO_UNPACK(along, l);
1537 else if (checksum > bits_in_uv)
1538 cdouble += (NV)along;
1549 #if U32SIZE > SIZE32
1552 SHIFT32(utf8, s, strend, &ai32, datumtype);
1553 DO_BO_UNPACK(ai32, 32);
1554 #if U32SIZE > SIZE32
1555 if (ai32 > 2147483647) ai32 -= 4294967296;
1559 else if (checksum > bits_in_uv)
1560 cdouble += (NV)ai32;
1565 case 'L' | TYPE_IS_SHRIEKING:
1566 #if LONGSIZE != SIZE32
1568 unsigned long aulong;
1569 SHIFT_VAR(utf8, s, strend, aulong, datumtype);
1570 DO_BO_UNPACK(aulong, l);
1573 else if (checksum > bits_in_uv)
1574 cdouble += (NV)aulong;
1587 #if U32SIZE > SIZE32
1590 SHIFT32(utf8, s, strend, &au32, datumtype);
1591 DO_BO_UNPACK(au32, 32);
1592 if (datumtype == 'N')
1593 au32 = PerlSock_ntohl(au32);
1594 if (datumtype == 'V')
1598 else if (checksum > bits_in_uv)
1599 cdouble += (NV)au32;
1604 case 'V' | TYPE_IS_SHRIEKING:
1605 case 'N' | TYPE_IS_SHRIEKING:
1608 #if U32SIZE > SIZE32
1611 SHIFT32(utf8, s, strend, &ai32, datumtype);
1612 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1613 ai32 = (I32)PerlSock_ntohl((U32)ai32);
1614 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1615 ai32 = (I32)vtohl((U32)ai32);
1618 else if (checksum > bits_in_uv)
1619 cdouble += (NV)ai32;
1627 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
1628 DO_BO_UNPACK(aptr, pointer);
1629 /* newSVpv generates undef if aptr is NULL */
1630 mPUSHs(newSVpv(aptr, 0));
1638 while (len > 0 && s < strend) {
1640 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1641 auv = (auv << 7) | (ch & 0x7f);
1642 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
1650 if (++bytes >= sizeof(UV)) { /* promote to string */
1653 sv = Perl_newSVpvf(aTHX_ "%.*"UVuf, (int)TYPE_DIGITS(UV), auv);
1654 while (s < strend) {
1655 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1656 sv = mul128(sv, (U8)(ch & 0x7f));
1662 t = SvPV_nolen_const(sv);
1671 if ((s >= strend) && bytes)
1672 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
1676 if (symptr->howlen == e_star)
1677 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
1679 if (s + sizeof(char*) <= strend) {
1681 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
1682 DO_BO_UNPACK(aptr, pointer);
1683 /* newSVpvn generates undef if aptr is NULL */
1684 PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP));
1691 SHIFT_VAR(utf8, s, strend, aquad, datumtype);
1692 DO_BO_UNPACK(aquad, 64);
1694 mPUSHs(aquad >= IV_MIN && aquad <= IV_MAX ?
1695 newSViv((IV)aquad) : newSVnv((NV)aquad));
1696 else if (checksum > bits_in_uv)
1697 cdouble += (NV)aquad;
1705 SHIFT_VAR(utf8, s, strend, auquad, datumtype);
1706 DO_BO_UNPACK(auquad, 64);
1708 mPUSHs(auquad <= UV_MAX ?
1709 newSVuv((UV)auquad) : newSVnv((NV)auquad));
1710 else if (checksum > bits_in_uv)
1711 cdouble += (NV)auquad;
1716 #endif /* HAS_QUAD */
1717 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
1721 SHIFT_VAR(utf8, s, strend, afloat, datumtype);
1722 DO_BO_UNPACK(afloat, float);
1732 SHIFT_VAR(utf8, s, strend, adouble, datumtype);
1733 DO_BO_UNPACK(adouble, double);
1743 SHIFT_BYTES(utf8, s, strend, anv.bytes, sizeof(anv.bytes), datumtype);
1744 DO_BO_UNPACK(anv.nv, NV);
1751 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
1755 SHIFT_BYTES(utf8, s, strend, aldouble.bytes, sizeof(aldouble.bytes), datumtype);
1756 DO_BO_UNPACK(aldouble.ld, long double);
1758 mPUSHn(aldouble.ld);
1760 cdouble += aldouble.ld;
1766 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
1767 sv = sv_2mortal(newSV(l));
1768 if (l) SvPOK_on(sv);
1771 while (next_uni_uu(aTHX_ &s, strend, &len)) {
1776 next_uni_uu(aTHX_ &s, strend, &a);
1777 next_uni_uu(aTHX_ &s, strend, &b);
1778 next_uni_uu(aTHX_ &s, strend, &c);
1779 next_uni_uu(aTHX_ &s, strend, &d);
1780 hunk[0] = (char)((a << 2) | (b >> 4));
1781 hunk[1] = (char)((b << 4) | (c >> 2));
1782 hunk[2] = (char)((c << 6) | d);
1784 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
1792 /* possible checksum byte */
1793 const char *skip = s+UTF8SKIP(s);
1794 if (skip < strend && *skip == '\n')
1800 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
1804 len = PL_uudmap[*(U8*)s++] & 077;
1806 if (s < strend && ISUUCHAR(*s))
1807 a = PL_uudmap[*(U8*)s++] & 077;
1810 if (s < strend && ISUUCHAR(*s))
1811 b = PL_uudmap[*(U8*)s++] & 077;
1814 if (s < strend && ISUUCHAR(*s))
1815 c = PL_uudmap[*(U8*)s++] & 077;
1818 if (s < strend && ISUUCHAR(*s))
1819 d = PL_uudmap[*(U8*)s++] & 077;
1822 hunk[0] = (char)((a << 2) | (b >> 4));
1823 hunk[1] = (char)((b << 4) | (c >> 2));
1824 hunk[2] = (char)((c << 6) | d);
1826 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
1831 else /* possible checksum byte */
1832 if (s + 1 < strend && s[1] == '\n')
1842 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
1843 (checksum > bits_in_uv &&
1844 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
1847 anv = (NV) (1 << (checksum & 15));
1848 while (checksum >= 16) {
1852 while (cdouble < 0.0)
1854 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
1855 sv = newSVnv(cdouble);
1858 if (checksum < bits_in_uv) {
1859 UV mask = ((UV)1 << checksum) - 1;
1868 if (symptr->flags & FLAG_SLASH){
1869 if (SP - PL_stack_base - start_sp_offset <= 0)
1871 if( next_symbol(symptr) ){
1872 if( symptr->howlen == e_number )
1873 Perl_croak(aTHX_ "Count after length/code in unpack" );
1875 /* ...end of char buffer then no decent length available */
1876 Perl_croak(aTHX_ "length/code after end of string in unpack" );
1878 /* take top of stack (hope it's numeric) */
1881 Perl_croak(aTHX_ "Negative '/' count in unpack" );
1884 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
1886 datumtype = symptr->code;
1887 explicit_length = FALSE;
1895 return SP - PL_stack_base - start_sp_offset;
1903 I32 gimme = GIMME_V;
1906 const char *pat = SvPV_const(left, llen);
1907 const char *s = SvPV_const(right, rlen);
1908 const char *strend = s + rlen;
1909 const char *patend = pat + llen;
1913 cnt = unpackstring(pat, patend, s, strend,
1914 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
1915 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
1918 if ( !cnt && gimme == G_SCALAR )
1919 PUSHs(&PL_sv_undef);
1924 doencodes(U8 *h, const char *s, I32 len)
1926 *h++ = PL_uuemap[len];
1928 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1929 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
1930 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
1931 *h++ = PL_uuemap[(077 & (s[2] & 077))];
1936 const char r = (len > 1 ? s[1] : '\0');
1937 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1938 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
1939 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
1940 *h++ = PL_uuemap[0];
1947 S_is_an_int(pTHX_ const char *s, STRLEN l)
1949 SV *result = newSVpvn(s, l);
1950 char *const result_c = SvPV_nolen(result); /* convenience */
1951 char *out = result_c;
1955 PERL_ARGS_ASSERT_IS_AN_INT;
1963 SvREFCNT_dec(result);
1986 SvREFCNT_dec(result);
1992 SvCUR_set(result, out - result_c);
1996 /* pnum must be '\0' terminated */
1998 S_div128(pTHX_ SV *pnum, bool *done)
2001 char * const s = SvPV(pnum, len);
2005 PERL_ARGS_ASSERT_DIV128;
2009 const int i = m * 10 + (*t - '0');
2010 const int r = (i >> 7); /* r < 10 */
2018 SvCUR_set(pnum, (STRLEN) (t - s));
2023 =for apidoc packlist
2025 The engine implementing pack() Perl function.
2031 Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, SV **beglist, SV **endlist )
2036 PERL_ARGS_ASSERT_PACKLIST;
2038 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
2040 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
2041 Also make sure any UTF8 flag is loaded */
2042 SvPV_force_nolen(cat);
2044 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
2046 (void)pack_rec( cat, &sym, beglist, endlist );
2049 /* like sv_utf8_upgrade, but also repoint the group start markers */
2051 marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2054 const char *from_ptr, *from_start, *from_end, **marks, **m;
2055 char *to_start, *to_ptr;
2057 if (SvUTF8(sv)) return;
2059 from_start = SvPVX_const(sv);
2060 from_end = from_start + SvCUR(sv);
2061 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2062 if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
2063 if (from_ptr == from_end) {
2064 /* Simple case: no character needs to be changed */
2069 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
2070 Newx(to_start, len, char);
2071 Copy(from_start, to_start, from_ptr-from_start, char);
2072 to_ptr = to_start + (from_ptr-from_start);
2074 Newx(marks, sym_ptr->level+2, const char *);
2075 for (group=sym_ptr; group; group = group->previous)
2076 marks[group->level] = from_start + group->strbeg;
2077 marks[sym_ptr->level+1] = from_end+1;
2078 for (m = marks; *m < from_ptr; m++)
2079 *m = to_start + (*m-from_start);
2081 for (;from_ptr < from_end; from_ptr++) {
2082 while (*m == from_ptr) *m++ = to_ptr;
2083 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
2087 while (*m == from_ptr) *m++ = to_ptr;
2088 if (m != marks + sym_ptr->level+1) {
2091 Perl_croak(aTHX_ "panic: marks beyond string end, m=%p, marks=%p, "
2092 "level=%d", m, marks, sym_ptr->level);
2094 for (group=sym_ptr; group; group = group->previous)
2095 group->strbeg = marks[group->level] - to_start;
2100 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
2101 from_start -= SvIVX(sv);
2104 SvFLAGS(sv) &= ~SVf_OOK;
2107 Safefree(from_start);
2108 SvPV_set(sv, to_start);
2109 SvCUR_set(sv, to_ptr - to_start);
2114 /* Exponential string grower. Makes string extension effectively O(n)
2115 needed says how many extra bytes we need (not counting the final '\0')
2116 Only grows the string if there is an actual lack of space
2119 S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
2120 const STRLEN cur = SvCUR(sv);
2121 const STRLEN len = SvLEN(sv);
2124 PERL_ARGS_ASSERT_SV_EXP_GROW;
2126 if (len - cur > needed) return SvPVX(sv);
2127 extend = needed > len ? needed : len;
2128 return SvGROW(sv, len+extend+1);
2133 S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
2136 tempsym_t lookahead;
2137 I32 items = endlist - beglist;
2138 bool found = next_symbol(symptr);
2139 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
2140 bool warn_utf8 = ckWARN(WARN_UTF8);
2142 PERL_ARGS_ASSERT_PACK_REC;
2144 if (symptr->level == 0 && found && symptr->code == 'U') {
2145 marked_upgrade(aTHX_ cat, symptr);
2146 symptr->flags |= FLAG_DO_UTF8;
2149 symptr->strbeg = SvCUR(cat);
2155 SV *lengthcode = NULL;
2156 I32 datumtype = symptr->code;
2157 howlen_t howlen = symptr->howlen;
2158 char *start = SvPVX(cat);
2159 char *cur = start + SvCUR(cat);
2161 #define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2165 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2169 /* e_no_len and e_number */
2170 len = symptr->length;
2175 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
2177 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2178 /* We can process this letter. */
2179 STRLEN size = props & PACK_SIZE_MASK;
2180 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2184 /* Look ahead for next symbol. Do we have code/code? */
2185 lookahead = *symptr;
2186 found = next_symbol(&lookahead);
2187 if (symptr->flags & FLAG_SLASH) {
2189 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
2190 if (strchr("aAZ", lookahead.code)) {
2191 if (lookahead.howlen == e_number) count = lookahead.length;
2194 count = sv_len_utf8(*beglist);
2197 if (lookahead.code == 'Z') count++;
2200 if (lookahead.howlen == e_number && lookahead.length < items)
2201 count = lookahead.length;
2204 lookahead.howlen = e_number;
2205 lookahead.length = count;
2206 lengthcode = sv_2mortal(newSViv(count));
2209 /* Code inside the switch must take care to properly update
2210 cat (CUR length and '\0' termination) if it updated *cur and
2211 doesn't simply leave using break */
2212 switch(TYPE_NO_ENDIANNESS(datumtype)) {
2214 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2215 (int) TYPE_NO_MODIFIERS(datumtype));
2217 Perl_croak(aTHX_ "'%%' may not be used in pack");
2220 case '.' | TYPE_IS_SHRIEKING:
2222 if (howlen == e_star) from = start;
2223 else if (len == 0) from = cur;
2225 tempsym_t *group = symptr;
2227 while (--len && group) group = group->previous;
2228 from = group ? start + group->strbeg : start;
2231 len = SvIV(fromstr);
2233 case '@' | TYPE_IS_SHRIEKING:
2235 from = start + symptr->strbeg;
2237 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
2239 while (len && from < cur) {
2240 from += UTF8SKIP(from);
2244 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2246 /* Here we know from == cur */
2248 GROWING(0, cat, start, cur, len);
2249 Zero(cur, len, char);
2251 } else if (from < cur) {
2254 } else goto no_change;
2262 if (len > 0) goto grow;
2263 if (len == 0) goto no_change;
2270 tempsym_t savsym = *symptr;
2271 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2272 symptr->flags |= group_modifiers;
2273 symptr->patend = savsym.grpend;
2275 symptr->previous = &lookahead;
2278 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2279 else symptr->flags &= ~FLAG_PARSE_UTF8;
2280 was_utf8 = SvUTF8(cat);
2281 symptr->patptr = savsym.grpbeg;
2282 beglist = pack_rec(cat, symptr, beglist, endlist);
2283 if (SvUTF8(cat) != was_utf8)
2284 /* This had better be an upgrade while in utf8==0 mode */
2287 if (savsym.howlen == e_star && beglist == endlist)
2288 break; /* No way to continue */
2290 items = endlist - beglist;
2291 lookahead.flags = symptr->flags & ~group_modifiers;
2294 case 'X' | TYPE_IS_SHRIEKING:
2295 if (!len) /* Avoid division by 0 */
2302 hop += UTF8SKIP(hop);
2309 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2313 len = (cur-start) % len;
2317 if (len < 1) goto no_change;
2321 Perl_croak(aTHX_ "'%c' outside of string in pack",
2322 (int) TYPE_NO_MODIFIERS(datumtype));
2323 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2325 Perl_croak(aTHX_ "'%c' outside of string in pack",
2326 (int) TYPE_NO_MODIFIERS(datumtype));
2332 if (cur - start < len)
2333 Perl_croak(aTHX_ "'%c' outside of string in pack",
2334 (int) TYPE_NO_MODIFIERS(datumtype));
2337 if (cur < start+symptr->strbeg) {
2338 /* Make sure group starts don't point into the void */
2340 const STRLEN length = cur-start;
2341 for (group = symptr;
2342 group && length < group->strbeg;
2343 group = group->previous) group->strbeg = length;
2344 lookahead.strbeg = length;
2347 case 'x' | TYPE_IS_SHRIEKING: {
2349 if (!len) /* Avoid division by 0 */
2351 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
2352 else ai32 = (cur - start) % len;
2353 if (ai32 == 0) goto no_change;
2365 aptr = SvPV_const(fromstr, fromlen);
2366 if (DO_UTF8(fromstr)) {
2367 const char *end, *s;
2369 if (!utf8 && !SvUTF8(cat)) {
2370 marked_upgrade(aTHX_ cat, symptr);
2371 lookahead.flags |= FLAG_DO_UTF8;
2372 lookahead.strbeg = symptr->strbeg;
2375 cur = start + SvCUR(cat);
2377 if (howlen == e_star) {
2378 if (utf8) goto string_copy;
2382 end = aptr + fromlen;
2383 fromlen = datumtype == 'Z' ? len-1 : len;
2384 while ((I32) fromlen > 0 && s < end) {
2389 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2392 if (datumtype == 'Z') len++;
2398 fromlen = len - fromlen;
2399 if (datumtype == 'Z') fromlen--;
2400 if (howlen == e_star) {
2402 if (datumtype == 'Z') len++;
2404 GROWING(0, cat, start, cur, len);
2405 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
2406 datumtype | TYPE_IS_PACK))
2407 Perl_croak(aTHX_ "panic: predicted utf8 length not available, "
2408 "for '%c', aptr=%p end=%p cur=%p, fromlen=%"UVuf,
2409 (int)datumtype, aptr, end, cur, (UV)fromlen);
2413 if (howlen == e_star) {
2415 if (datumtype == 'Z') len++;
2417 if (len <= (I32) fromlen) {
2419 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2421 /* assumes a byte expands to at most UTF8_EXPAND bytes on
2423 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2424 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
2426 while (fromlen > 0) {
2427 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
2433 if (howlen == e_star) {
2435 if (datumtype == 'Z') len++;
2437 if (len <= (I32) fromlen) {
2439 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2441 GROWING(0, cat, start, cur, len);
2442 Copy(aptr, cur, fromlen, char);
2446 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2453 const char *str, *end;
2460 str = SvPV_const(fromstr, fromlen);
2461 end = str + fromlen;
2462 if (DO_UTF8(fromstr)) {
2464 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2466 utf8_source = FALSE;
2467 utf8_flags = 0; /* Unused, but keep compilers happy */
2469 if (howlen == e_star) len = fromlen;
2470 field_len = (len+7)/8;
2471 GROWING(utf8, cat, start, cur, field_len);
2472 if (len > (I32)fromlen) len = fromlen;
2475 if (datumtype == 'B')
2479 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2481 } else bits |= *str++ & 1;
2482 if (l & 7) bits <<= 1;
2484 PUSH_BYTE(utf8, cur, bits);
2489 /* datumtype == 'b' */
2493 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2494 if (val & 1) bits |= 0x80;
2495 } else if (*str++ & 1)
2497 if (l & 7) bits >>= 1;
2499 PUSH_BYTE(utf8, cur, bits);
2505 if (datumtype == 'B')
2506 bits <<= 7 - (l & 7);
2508 bits >>= 7 - (l & 7);
2509 PUSH_BYTE(utf8, cur, bits);
2512 /* Determine how many chars are left in the requested field */
2514 if (howlen == e_star) field_len = 0;
2515 else field_len -= l;
2516 Zero(cur, field_len, char);
2522 const char *str, *end;
2529 str = SvPV_const(fromstr, fromlen);
2530 end = str + fromlen;
2531 if (DO_UTF8(fromstr)) {
2533 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2535 utf8_source = FALSE;
2536 utf8_flags = 0; /* Unused, but keep compilers happy */
2538 if (howlen == e_star) len = fromlen;
2539 field_len = (len+1)/2;
2540 GROWING(utf8, cat, start, cur, field_len);
2541 if (!utf8 && len > (I32)fromlen) len = fromlen;
2544 if (datumtype == 'H')
2548 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2549 if (val < 256 && isALPHA(val))
2550 bits |= (val + 9) & 0xf;
2553 } else if (isALPHA(*str))
2554 bits |= (*str++ + 9) & 0xf;
2556 bits |= *str++ & 0xf;
2557 if (l & 1) bits <<= 4;
2559 PUSH_BYTE(utf8, cur, bits);
2567 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2568 if (val < 256 && isALPHA(val))
2569 bits |= ((val + 9) & 0xf) << 4;
2571 bits |= (val & 0xf) << 4;
2572 } else if (isALPHA(*str))
2573 bits |= ((*str++ + 9) & 0xf) << 4;
2575 bits |= (*str++ & 0xf) << 4;
2576 if (l & 1) bits >>= 4;
2578 PUSH_BYTE(utf8, cur, bits);
2584 PUSH_BYTE(utf8, cur, bits);
2587 /* Determine how many chars are left in the requested field */
2589 if (howlen == e_star) field_len = 0;
2590 else field_len -= l;
2591 Zero(cur, field_len, char);
2599 aiv = SvIV(fromstr);
2600 if ((-128 > aiv || aiv > 127))
2601 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2602 "Character in 'c' format wrapped in pack");
2603 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
2608 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2614 aiv = SvIV(fromstr);
2615 if ((0 > aiv || aiv > 0xff))
2616 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2617 "Character in 'C' format wrapped in pack");
2618 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
2623 U8 in_bytes = (U8)IN_BYTES;
2625 end = start+SvLEN(cat)-1;
2626 if (utf8) end -= UTF8_MAXLEN-1;
2630 auv = SvUV(fromstr);
2631 if (in_bytes) auv = auv % 0x100;
2636 SvCUR_set(cat, cur - start);
2638 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2639 end = start+SvLEN(cat)-UTF8_MAXLEN;
2641 cur = (char *) uvuni_to_utf8_flags((U8 *) cur,
2644 0 : UNICODE_ALLOW_ANY);
2649 SvCUR_set(cat, cur - start);
2650 marked_upgrade(aTHX_ cat, symptr);
2651 lookahead.flags |= FLAG_DO_UTF8;
2652 lookahead.strbeg = symptr->strbeg;
2655 cur = start + SvCUR(cat);
2656 end = start+SvLEN(cat)-UTF8_MAXLEN;
2659 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2660 "Character in 'W' format wrapped in pack");
2665 SvCUR_set(cat, cur - start);
2666 GROWING(0, cat, start, cur, len+1);
2667 end = start+SvLEN(cat)-1;
2669 *(U8 *) cur++ = (U8)auv;
2678 if (!(symptr->flags & FLAG_DO_UTF8)) {
2679 marked_upgrade(aTHX_ cat, symptr);
2680 lookahead.flags |= FLAG_DO_UTF8;
2681 lookahead.strbeg = symptr->strbeg;
2687 end = start+SvLEN(cat);
2688 if (!utf8) end -= UTF8_MAXLEN;
2692 auv = SvUV(fromstr);
2694 U8 buffer[UTF8_MAXLEN], *endb;
2695 endb = uvuni_to_utf8_flags(buffer, auv,
2697 0 : UNICODE_ALLOW_ANY);
2698 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
2700 SvCUR_set(cat, cur - start);
2701 GROWING(0, cat, start, cur,
2702 len+(endb-buffer)*UTF8_EXPAND);
2703 end = start+SvLEN(cat);
2705 cur = bytes_to_uni(buffer, endb-buffer, cur);
2709 SvCUR_set(cat, cur - start);
2710 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2711 end = start+SvLEN(cat)-UTF8_MAXLEN;
2713 cur = (char *) uvuni_to_utf8_flags((U8 *) cur, auv,
2715 0 : UNICODE_ALLOW_ANY);
2720 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
2726 anv = SvNV(fromstr);
2727 # if defined(VMS) && !defined(_IEEE_FP)
2728 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
2729 * on Alpha; fake it if we don't have them.
2733 else if (anv < -FLT_MAX)
2735 else afloat = (float)anv;
2737 afloat = (float)anv;
2739 DO_BO_PACK(afloat, float);
2740 PUSH_VAR(utf8, cur, afloat);
2748 anv = SvNV(fromstr);
2749 # if defined(VMS) && !defined(_IEEE_FP)
2750 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
2751 * on Alpha; fake it if we don't have them.
2755 else if (anv < -DBL_MAX)
2757 else adouble = (double)anv;
2759 adouble = (double)anv;
2761 DO_BO_PACK(adouble, double);
2762 PUSH_VAR(utf8, cur, adouble);
2767 Zero(&anv, 1, NV); /* can be long double with unused bits */
2771 /* to work round a gcc/x86 bug; don't use SvNV */
2772 anv.nv = sv_2nv(fromstr);
2774 anv.nv = SvNV(fromstr);
2776 DO_BO_PACK(anv, NV);
2777 PUSH_BYTES(utf8, cur, anv.bytes, sizeof(anv.bytes));
2781 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2784 /* long doubles can have unused bits, which may be nonzero */
2785 Zero(&aldouble, 1, long double);
2789 /* to work round a gcc/x86 bug; don't use SvNV */
2790 aldouble.ld = (long double)sv_2nv(fromstr);
2792 aldouble.ld = (long double)SvNV(fromstr);
2794 DO_BO_PACK(aldouble, long double);
2795 PUSH_BYTES(utf8, cur, aldouble.bytes, sizeof(aldouble.bytes));
2800 case 'n' | TYPE_IS_SHRIEKING:
2805 ai16 = (I16)SvIV(fromstr);
2806 ai16 = PerlSock_htons(ai16);
2807 PUSH16(utf8, cur, &ai16);
2810 case 'v' | TYPE_IS_SHRIEKING:
2815 ai16 = (I16)SvIV(fromstr);
2817 PUSH16(utf8, cur, &ai16);
2820 case 'S' | TYPE_IS_SHRIEKING:
2821 #if SHORTSIZE != SIZE16
2823 unsigned short aushort;
2825 aushort = SvUV(fromstr);
2826 DO_BO_PACK(aushort, s);
2827 PUSH_VAR(utf8, cur, aushort);
2837 au16 = (U16)SvUV(fromstr);
2838 DO_BO_PACK(au16, 16);
2839 PUSH16(utf8, cur, &au16);
2842 case 's' | TYPE_IS_SHRIEKING:
2843 #if SHORTSIZE != SIZE16
2847 ashort = SvIV(fromstr);
2848 DO_BO_PACK(ashort, s);
2849 PUSH_VAR(utf8, cur, ashort);
2859 ai16 = (I16)SvIV(fromstr);
2860 DO_BO_PACK(ai16, 16);
2861 PUSH16(utf8, cur, &ai16);
2865 case 'I' | TYPE_IS_SHRIEKING:
2869 auint = SvUV(fromstr);
2870 DO_BO_PACK(auint, i);
2871 PUSH_VAR(utf8, cur, auint);
2878 aiv = SvIV(fromstr);
2879 #if IVSIZE == INTSIZE
2881 #elif IVSIZE == LONGSIZE
2883 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
2884 DO_BO_PACK(aiv, 64);
2886 Perl_croak(aTHX_ "'j' not supported on this platform");
2888 PUSH_VAR(utf8, cur, aiv);
2895 auv = SvUV(fromstr);
2896 #if UVSIZE == INTSIZE
2898 #elif UVSIZE == LONGSIZE
2900 #elif defined(HAS_QUAD) && UVSIZE == U64SIZE
2901 DO_BO_PACK(auv, 64);
2903 Perl_croak(aTHX_ "'J' not supported on this platform");
2905 PUSH_VAR(utf8, cur, auv);
2912 anv = SvNV(fromstr);
2916 SvCUR_set(cat, cur - start);
2917 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
2920 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
2921 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
2922 any negative IVs will have already been got by the croak()
2923 above. IOK is untrue for fractions, so we test them
2924 against UV_MAX_P1. */
2925 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
2926 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
2927 char *in = buf + sizeof(buf);
2928 UV auv = SvUV(fromstr);
2931 *--in = (char)((auv & 0x7f) | 0x80);
2934 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
2935 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2936 in, (buf + sizeof(buf)) - in);
2937 } else if (SvPOKp(fromstr))
2939 else if (SvNOKp(fromstr)) {
2940 /* 10**NV_MAX_10_EXP is the largest power of 10
2941 so 10**(NV_MAX_10_EXP+1) is definitely unrepresentable
2942 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
2943 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
2944 And with that many bytes only Inf can overflow.
2945 Some C compilers are strict about integral constant
2946 expressions so we conservatively divide by a slightly
2947 smaller integer instead of multiplying by the exact
2948 floating-point value.
2950 #ifdef NV_MAX_10_EXP
2951 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
2952 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
2954 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
2955 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
2957 char *in = buf + sizeof(buf);
2959 anv = Perl_floor(anv);
2961 const NV next = Perl_floor(anv / 128);
2962 if (in <= buf) /* this cannot happen ;-) */
2963 Perl_croak(aTHX_ "Cannot compress integer in pack");
2964 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
2967 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
2968 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2969 in, (buf + sizeof(buf)) - in);
2978 /* Copy string and check for compliance */
2979 from = SvPV_const(fromstr, len);
2980 if ((norm = is_an_int(from, len)) == NULL)
2981 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
2983 Newx(result, len, char);
2986 while (!done) *--in = div128(norm, &done) | 0x80;
2987 result[len - 1] &= 0x7F; /* clear continue bit */
2988 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2989 in, (result + len) - in);
2991 SvREFCNT_dec(norm); /* free norm */
2996 case 'i' | TYPE_IS_SHRIEKING:
3000 aint = SvIV(fromstr);
3001 DO_BO_PACK(aint, i);
3002 PUSH_VAR(utf8, cur, aint);
3005 case 'N' | TYPE_IS_SHRIEKING:
3010 au32 = SvUV(fromstr);
3011 au32 = PerlSock_htonl(au32);
3012 PUSH32(utf8, cur, &au32);
3015 case 'V' | TYPE_IS_SHRIEKING:
3020 au32 = SvUV(fromstr);
3022 PUSH32(utf8, cur, &au32);
3025 case 'L' | TYPE_IS_SHRIEKING:
3026 #if LONGSIZE != SIZE32
3028 unsigned long aulong;
3030 aulong = SvUV(fromstr);
3031 DO_BO_PACK(aulong, l);
3032 PUSH_VAR(utf8, cur, aulong);
3042 au32 = SvUV(fromstr);
3043 DO_BO_PACK(au32, 32);
3044 PUSH32(utf8, cur, &au32);
3047 case 'l' | TYPE_IS_SHRIEKING:
3048 #if LONGSIZE != SIZE32
3052 along = SvIV(fromstr);
3053 DO_BO_PACK(along, l);
3054 PUSH_VAR(utf8, cur, along);
3064 ai32 = SvIV(fromstr);
3065 DO_BO_PACK(ai32, 32);
3066 PUSH32(utf8, cur, &ai32);
3074 auquad = (Uquad_t) SvUV(fromstr);
3075 DO_BO_PACK(auquad, 64);
3076 PUSH_VAR(utf8, cur, auquad);
3083 aquad = (Quad_t)SvIV(fromstr);
3084 DO_BO_PACK(aquad, 64);
3085 PUSH_VAR(utf8, cur, aquad);
3088 #endif /* HAS_QUAD */
3090 len = 1; /* assume SV is correct length */
3091 GROWING(utf8, cat, start, cur, sizeof(char *));
3098 SvGETMAGIC(fromstr);
3099 if (!SvOK(fromstr)) aptr = NULL;
3101 /* XXX better yet, could spirit away the string to
3102 * a safe spot and hang on to it until the result
3103 * of pack() (and all copies of the result) are
3106 if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
3107 !SvREADONLY(fromstr)))) {
3108 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3109 "Attempt to pack pointer to temporary value");
3111 if (SvPOK(fromstr) || SvNIOK(fromstr))
3112 aptr = SvPV_nomg_const_nolen(fromstr);
3114 aptr = SvPV_force_flags_nolen(fromstr, 0);
3116 DO_BO_PACK(aptr, pointer);
3117 PUSH_VAR(utf8, cur, aptr);
3121 const char *aptr, *aend;
3125 if (len <= 2) len = 45;
3126 else len = len / 3 * 3;
3128 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3129 "Field too wide in 'u' format in pack");
3132 aptr = SvPV_const(fromstr, fromlen);
3133 from_utf8 = DO_UTF8(fromstr);
3135 aend = aptr + fromlen;
3136 fromlen = sv_len_utf8_nomg(fromstr);
3137 } else aend = NULL; /* Unused, but keep compilers happy */
3138 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
3139 while (fromlen > 0) {
3142 U8 hunk[1+63/3*4+1];
3144 if ((I32)fromlen > len)
3150 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3151 'u' | TYPE_IS_PACK)) {
3153 SvCUR_set(cat, cur - start);
3154 Perl_croak(aTHX_ "panic: string is shorter than advertised, "
3155 "aptr=%p, aend=%p, buffer=%p, todo=%ld",
3156 aptr, aend, buffer, (long) todo);
3158 end = doencodes(hunk, buffer, todo);
3160 end = doencodes(hunk, aptr, todo);
3163 PUSH_BYTES(utf8, cur, hunk, end-hunk);
3170 SvCUR_set(cat, cur - start);
3172 *symptr = lookahead;
3181 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
3184 SV *pat_sv = *++MARK;
3185 const char *pat = SvPV_const(pat_sv, fromlen);
3186 const char *patend = pat + fromlen;
3192 packlist(cat, pat, patend, MARK, SP + 1);
3202 * c-indentation-style: bsd
3204 * indent-tabs-mode: nil
3207 * ex: set ts=8 sts=4 sw=4 et: