3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
18 * [p.653 of _The Lord of the Rings_, IV/iv: "Of Herbs and Stewed Rabbit"]
21 /* This file contains pp ("push/pop") functions that
22 * execute the opcodes that make up a perl program. A typical pp function
23 * expects to find its arguments on the stack, and usually pushes its
24 * results onto the stack, hence the 'pp' terminology. Each OP structure
25 * contains a pointer to the relevant pp_foo() function.
27 * This particular file just contains pp_pack() and pp_unpack(). See the
28 * other pp*.c files for the rest of the pp_ functions.
32 #define PERL_IN_PP_PACK_C
35 /* Types used by pack/unpack */
37 e_no_len, /* no length */
38 e_number, /* number, [] */
42 typedef struct tempsym {
43 const char* patptr; /* current template char */
44 const char* patend; /* one after last char */
45 const char* grpbeg; /* 1st char of ()-group */
46 const char* grpend; /* end of ()-group */
47 I32 code; /* template code (!<>) */
48 I32 length; /* length/repeat count */
49 howlen_t howlen; /* how length is given */
50 int level; /* () nesting level */
51 U32 flags; /* /=4, comma=2, pack=1 */
52 /* and group modifiers */
53 STRLEN strbeg; /* offset of group start */
54 struct tempsym *previous; /* previous group */
57 #define TEMPSYM_INIT(symptr, p, e, f) \
59 (symptr)->patptr = (p); \
60 (symptr)->patend = (e); \
61 (symptr)->grpbeg = NULL; \
62 (symptr)->grpend = NULL; \
63 (symptr)->grpend = NULL; \
65 (symptr)->length = 0; \
66 (symptr)->howlen = e_no_len; \
67 (symptr)->level = 0; \
68 (symptr)->flags = (f); \
69 (symptr)->strbeg = 0; \
70 (symptr)->previous = NULL; \
78 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
81 U8 bytes[sizeof(long double)];
86 # define PERL_PACK_CAN_BYTEORDER
87 # define PERL_PACK_CAN_SHRIEKSIGN
93 /* Maximum number of bytes to which a byte can grow due to upgrade */
97 * Offset for integer pack/unpack.
99 * On architectures where I16 and I32 aren't really 16 and 32 bits,
100 * which for now are all Crays, pack and unpack have to play games.
104 * These values are required for portability of pack() output.
105 * If they're not right on your machine, then pack() and unpack()
106 * wouldn't work right anyway; you'll need to apply the Cray hack.
107 * (I'd like to check them with #if, but you can't use sizeof() in
108 * the preprocessor.) --???
111 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
112 defines are now in config.h. --Andy Dougherty April 1998
117 /* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
120 #if U16SIZE > SIZE16 || U32SIZE > SIZE32
121 # if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
122 # define OFF16(p) ((char*)(p))
123 # define OFF32(p) ((char*)(p))
125 # if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
126 # define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
127 # define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
129 ++++ bad cray byte order
133 # define OFF16(p) ((char *) (p))
134 # define OFF32(p) ((char *) (p))
137 /* Only to be used inside a loop (see the break) */
138 #define SHIFT16(utf8, s, strend, p, datumtype) STMT_START { \
140 if (!uni_to_bytes(aTHX_ &(s), strend, OFF16(p), SIZE16, datumtype)) break; \
142 Copy(s, OFF16(p), SIZE16, char); \
147 /* Only to be used inside a loop (see the break) */
148 #define SHIFT32(utf8, s, strend, p, datumtype) STMT_START { \
150 if (!uni_to_bytes(aTHX_ &(s), strend, OFF32(p), SIZE32, datumtype)) break; \
152 Copy(s, OFF32(p), SIZE32, char); \
157 #define PUSH16(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF16(p), SIZE16)
158 #define PUSH32(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF32(p), SIZE32)
160 /* Only to be used inside a loop (see the break) */
161 #define SHIFT_BYTES(utf8, s, strend, buf, len, datumtype) \
164 if (!uni_to_bytes(aTHX_ &s, strend, \
165 (char *) (buf), len, datumtype)) break; \
167 Copy(s, (char *) (buf), len, char); \
172 #define SHIFT_VAR(utf8, s, strend, var, datumtype) \
173 SHIFT_BYTES(utf8, s, strend, &(var), sizeof(var), datumtype)
175 #define PUSH_VAR(utf8, aptr, var) \
176 PUSH_BYTES(utf8, aptr, &(var), sizeof(var))
178 /* Avoid stack overflow due to pathological templates. 100 should be plenty. */
179 #define MAX_SUB_TEMPLATE_LEVEL 100
181 /* flags (note that type modifiers can also be used as flags!) */
182 #define FLAG_WAS_UTF8 0x40
183 #define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
184 #define FLAG_UNPACK_ONLY_ONE 0x10
185 #define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
186 #define FLAG_SLASH 0x04
187 #define FLAG_COMMA 0x02
188 #define FLAG_PACK 0x01
191 S_mul128(pTHX_ SV *sv, U8 m)
194 char *s = SvPV(sv, len);
197 PERL_ARGS_ASSERT_MUL128;
199 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
200 SV * const tmpNew = newSVpvs("0000000000");
202 sv_catsv(tmpNew, sv);
203 SvREFCNT_dec(sv); /* free old sv */
208 while (!*t) /* trailing '\0'? */
211 const U32 i = ((*t - '0') << 7) + m;
212 *(t--) = '0' + (char)(i % 10);
218 /* Explosives and implosives. */
220 #if 'I' == 73 && 'J' == 74
221 /* On an ASCII/ISO kind of system */
222 #define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
225 Some other sort of character set - use memchr() so we don't match
228 #define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
232 #define TYPE_IS_SHRIEKING 0x100
233 #define TYPE_IS_BIG_ENDIAN 0x200
234 #define TYPE_IS_LITTLE_ENDIAN 0x400
235 #define TYPE_IS_PACK 0x800
236 #define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
237 #define TYPE_MODIFIERS(t) ((t) & ~0xFF)
238 #define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
240 #ifdef PERL_PACK_CAN_SHRIEKSIGN
241 # define SHRIEKING_ALLOWED_TYPES "sSiIlLxXnNvV@."
243 # define SHRIEKING_ALLOWED_TYPES "sSiIlLxX"
246 #ifndef PERL_PACK_CAN_BYTEORDER
247 /* Put "can't" first because it is shorter */
248 # define TYPE_ENDIANNESS(t) 0
249 # define TYPE_NO_ENDIANNESS(t) (t)
251 # define ENDIANNESS_ALLOWED_TYPES ""
253 # define DO_BO_UNPACK(var, type)
254 # define DO_BO_PACK(var, type)
255 # define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast)
256 # define DO_BO_PACK_PTR(var, type, pre_cast, post_cast)
257 # define DO_BO_UNPACK_N(var, type)
258 # define DO_BO_PACK_N(var, type)
259 # define DO_BO_UNPACK_P(var)
260 # define DO_BO_PACK_P(var)
261 # define DO_BO_UNPACK_PC(var)
262 # define DO_BO_PACK_PC(var)
264 #else /* PERL_PACK_CAN_BYTEORDER */
266 # define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
267 # define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
269 # define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
271 # define DO_BO_UNPACK(var, type) \
273 switch (TYPE_ENDIANNESS(datumtype)) { \
274 case TYPE_IS_BIG_ENDIAN: var = my_betoh ## type (var); break; \
275 case TYPE_IS_LITTLE_ENDIAN: var = my_letoh ## type (var); break; \
280 # define DO_BO_PACK(var, type) \
282 switch (TYPE_ENDIANNESS(datumtype)) { \
283 case TYPE_IS_BIG_ENDIAN: var = my_htobe ## type (var); break; \
284 case TYPE_IS_LITTLE_ENDIAN: var = my_htole ## type (var); break; \
289 # define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast) \
291 switch (TYPE_ENDIANNESS(datumtype)) { \
292 case TYPE_IS_BIG_ENDIAN: \
293 var = (post_cast*) my_betoh ## type ((pre_cast) var); \
295 case TYPE_IS_LITTLE_ENDIAN: \
296 var = (post_cast *) my_letoh ## type ((pre_cast) var); \
303 # define DO_BO_PACK_PTR(var, type, pre_cast, post_cast) \
305 switch (TYPE_ENDIANNESS(datumtype)) { \
306 case TYPE_IS_BIG_ENDIAN: \
307 var = (post_cast *) my_htobe ## type ((pre_cast) var); \
309 case TYPE_IS_LITTLE_ENDIAN: \
310 var = (post_cast *) my_htole ## type ((pre_cast) var); \
317 # define BO_CANT_DOIT(action, type) \
319 switch (TYPE_ENDIANNESS(datumtype)) { \
320 case TYPE_IS_BIG_ENDIAN: \
321 Perl_croak(aTHX_ "Can't %s big-endian %ss on this " \
322 "platform", #action, #type); \
324 case TYPE_IS_LITTLE_ENDIAN: \
325 Perl_croak(aTHX_ "Can't %s little-endian %ss on this " \
326 "platform", #action, #type); \
333 # if PTRSIZE == INTSIZE
334 # define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, i, int, void)
335 # define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, i, int, void)
336 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, i, int, char)
337 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, i, int, char)
338 # elif PTRSIZE == LONGSIZE
339 # if LONGSIZE < IVSIZE && IVSIZE == 8
340 # define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, 64, IV, void)
341 # define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, 64, IV, void)
342 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, 64, IV, char)
343 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, 64, IV, char)
345 # define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, IV, void)
346 # define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, IV, void)
347 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, IV, char)
348 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, IV, char)
350 # elif PTRSIZE == IVSIZE
351 # define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, IV, void)
352 # define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, IV, void)
353 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, IV, char)
354 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, IV, char)
356 # define DO_BO_UNPACK_P(var) BO_CANT_DOIT(unpack, pointer)
357 # define DO_BO_PACK_P(var) BO_CANT_DOIT(pack, pointer)
358 # define DO_BO_UNPACK_PC(var) BO_CANT_DOIT(unpack, pointer)
359 # define DO_BO_PACK_PC(var) BO_CANT_DOIT(pack, pointer)
362 # if defined(my_htolen) && defined(my_letohn) && \
363 defined(my_htoben) && defined(my_betohn)
364 # define DO_BO_UNPACK_N(var, type) \
366 switch (TYPE_ENDIANNESS(datumtype)) { \
367 case TYPE_IS_BIG_ENDIAN: my_betohn(&var, sizeof(type)); break;\
368 case TYPE_IS_LITTLE_ENDIAN: my_letohn(&var, sizeof(type)); break;\
373 # define DO_BO_PACK_N(var, type) \
375 switch (TYPE_ENDIANNESS(datumtype)) { \
376 case TYPE_IS_BIG_ENDIAN: my_htoben(&var, sizeof(type)); break;\
377 case TYPE_IS_LITTLE_ENDIAN: my_htolen(&var, sizeof(type)); break;\
382 # define DO_BO_UNPACK_N(var, type) BO_CANT_DOIT(unpack, type)
383 # define DO_BO_PACK_N(var, type) BO_CANT_DOIT(pack, type)
386 #endif /* PERL_PACK_CAN_BYTEORDER */
388 #define PACK_SIZE_CANNOT_CSUM 0x80
389 #define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
390 #define PACK_SIZE_MASK 0x3F
392 #include "packsizetables.c"
395 uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
398 UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
399 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
400 /* We try to process malformed UTF-8 as much as possible (preferably with
401 warnings), but these two mean we make no progress in the string and
402 might enter an infinite loop */
403 if (retlen == (STRLEN) -1 || retlen == 0)
404 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
405 (int) TYPE_NO_MODIFIERS(datumtype));
407 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
408 "Character in '%c' format wrapped in unpack",
409 (int) TYPE_NO_MODIFIERS(datumtype));
416 #define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
417 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
421 uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
425 const char *from = *s;
427 const U32 flags = ckWARN(WARN_UTF8) ?
428 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
429 for (;buf_len > 0; buf_len--) {
430 if (from >= end) return FALSE;
431 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
432 if (retlen == (STRLEN) -1 || retlen == 0) {
433 from += UTF8SKIP(from);
435 } else from += retlen;
440 *(U8 *)buf++ = (U8)val;
442 /* We have enough characters for the buffer. Did we have problems ? */
445 /* Rewalk the string fragment while warning */
447 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
448 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
449 if (ptr >= end) break;
450 utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
452 if (from > end) from = end;
455 Perl_ck_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
456 WARN_PACK : WARN_UNPACK),
457 "Character(s) in '%c' format wrapped in %s",
458 (int) TYPE_NO_MODIFIERS(datumtype),
459 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
466 next_uni_uu(pTHX_ const char **s, const char *end, I32 *out)
470 const UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
471 if (val >= 0x100 || !ISUUCHAR(val) ||
472 retlen == (STRLEN) -1 || retlen == 0) {
476 *out = PL_uudmap[val] & 077;
482 S_bytes_to_uni(const U8 *start, STRLEN len, char *dest) {
483 const U8 * const end = start + len;
485 PERL_ARGS_ASSERT_BYTES_TO_UNI;
487 while (start < end) {
488 const UV uv = NATIVE_TO_ASCII(*start);
489 if (UNI_IS_INVARIANT(uv))
490 *dest++ = (char)(U8)UTF_TO_NATIVE(uv);
492 *dest++ = (char)(U8)UTF8_EIGHT_BIT_HI(uv);
493 *dest++ = (char)(U8)UTF8_EIGHT_BIT_LO(uv);
500 #define PUSH_BYTES(utf8, cur, buf, len) \
503 (cur) = bytes_to_uni((U8 *) buf, len, (cur)); \
505 Copy(buf, cur, len, char); \
510 #define GROWING(utf8, cat, start, cur, in_len) \
512 STRLEN glen = (in_len); \
513 if (utf8) glen *= UTF8_EXPAND; \
514 if ((cur) + glen >= (start) + SvLEN(cat)) { \
515 (start) = sv_exp_grow(cat, glen); \
516 (cur) = (start) + SvCUR(cat); \
520 #define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
522 const STRLEN glen = (in_len); \
524 if (utf8) gl *= UTF8_EXPAND; \
525 if ((cur) + gl >= (start) + SvLEN(cat)) { \
527 SvCUR_set((cat), (cur) - (start)); \
528 (start) = sv_exp_grow(cat, gl); \
529 (cur) = (start) + SvCUR(cat); \
531 PUSH_BYTES(utf8, cur, buf, glen); \
534 #define PUSH_BYTE(utf8, s, byte) \
537 const U8 au8 = (byte); \
538 (s) = bytes_to_uni(&au8, 1, (s)); \
539 } else *(U8 *)(s)++ = (byte); \
542 /* Only to be used inside a loop (see the break) */
543 #define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
546 if (str >= end) break; \
547 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
548 if (retlen == (STRLEN) -1 || retlen == 0) { \
550 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
555 static const char *_action( const tempsym_t* symptr )
557 return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
560 /* Returns the sizeof() struct described by pat */
562 S_measure_struct(pTHX_ tempsym_t* symptr)
566 PERL_ARGS_ASSERT_MEASURE_STRUCT;
568 while (next_symbol(symptr)) {
572 switch (symptr->howlen) {
574 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
578 /* e_no_len and e_number */
579 len = symptr->length;
583 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
586 /* endianness doesn't influence the size of a type */
587 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
589 Perl_croak(aTHX_ "Invalid type '%c' in %s",
590 (int)TYPE_NO_MODIFIERS(symptr->code),
592 #ifdef PERL_PACK_CAN_SHRIEKSIGN
593 case '.' | TYPE_IS_SHRIEKING:
594 case '@' | TYPE_IS_SHRIEKING:
599 case 'U': /* XXXX Is it correct? */
602 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
603 (int) TYPE_NO_MODIFIERS(symptr->code),
610 tempsym_t savsym = *symptr;
611 symptr->patptr = savsym.grpbeg;
612 symptr->patend = savsym.grpend;
613 /* XXXX Theoretically, we need to measure many times at
614 different positions, since the subexpression may contain
615 alignment commands, but be not of aligned length.
616 Need to detect this and croak(). */
617 size = measure_struct(symptr);
621 case 'X' | TYPE_IS_SHRIEKING:
622 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
624 if (!len) /* Avoid division by 0 */
626 len = total % len; /* Assumed: the start is aligned. */
631 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
633 case 'x' | TYPE_IS_SHRIEKING:
634 if (!len) /* Avoid division by 0 */
636 star = total % len; /* Assumed: the start is aligned. */
637 if (star) /* Other portable ways? */
661 size = sizeof(char*);
671 /* locate matching closing parenthesis or bracket
672 * returns char pointer to char after match, or NULL
675 S_group_end(pTHX_ const char *patptr, const char *patend, char ender)
677 PERL_ARGS_ASSERT_GROUP_END;
679 while (patptr < patend) {
680 const char c = *patptr++;
687 while (patptr < patend && *patptr != '\n')
691 patptr = group_end(patptr, patend, ')') + 1;
693 patptr = group_end(patptr, patend, ']') + 1;
695 Perl_croak(aTHX_ "No group ending character '%c' found in template",
701 /* Convert unsigned decimal number to binary.
702 * Expects a pointer to the first digit and address of length variable
703 * Advances char pointer to 1st non-digit char and returns number
706 S_get_num(pTHX_ const char *patptr, I32 *lenptr )
708 I32 len = *patptr++ - '0';
710 PERL_ARGS_ASSERT_GET_NUM;
712 while (isDIGIT(*patptr)) {
713 if (len >= 0x7FFFFFFF/10)
714 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
715 len = (len * 10) + (*patptr++ - '0');
721 /* The marvellous template parsing routine: Using state stored in *symptr,
722 * locates next template code and count
725 S_next_symbol(pTHX_ tempsym_t* symptr )
727 const char* patptr = symptr->patptr;
728 const char* const patend = symptr->patend;
730 PERL_ARGS_ASSERT_NEXT_SYMBOL;
732 symptr->flags &= ~FLAG_SLASH;
734 while (patptr < patend) {
735 if (isSPACE(*patptr))
737 else if (*patptr == '#') {
739 while (patptr < patend && *patptr != '\n')
744 /* We should have found a template code */
745 I32 code = *patptr++ & 0xFF;
746 U32 inherited_modifiers = 0;
748 if (code == ','){ /* grandfather in commas but with a warning */
749 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
750 symptr->flags |= FLAG_COMMA;
751 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
752 "Invalid type ',' in %s", _action( symptr ) );
757 /* for '(', skip to ')' */
759 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
760 Perl_croak(aTHX_ "()-group starts with a count in %s",
762 symptr->grpbeg = patptr;
763 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
764 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
765 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
769 /* look for group modifiers to inherit */
770 if (TYPE_ENDIANNESS(symptr->flags)) {
771 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
772 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
775 /* look for modifiers */
776 while (patptr < patend) {
781 modifier = TYPE_IS_SHRIEKING;
782 allowed = SHRIEKING_ALLOWED_TYPES;
784 #ifdef PERL_PACK_CAN_BYTEORDER
786 modifier = TYPE_IS_BIG_ENDIAN;
787 allowed = ENDIANNESS_ALLOWED_TYPES;
790 modifier = TYPE_IS_LITTLE_ENDIAN;
791 allowed = ENDIANNESS_ALLOWED_TYPES;
793 #endif /* PERL_PACK_CAN_BYTEORDER */
803 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
804 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
805 allowed, _action( symptr ) );
807 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
808 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
809 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
810 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
811 TYPE_ENDIANNESS_MASK)
812 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
813 *patptr, _action( symptr ) );
815 if ((code & modifier)) {
816 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
817 "Duplicate modifier '%c' after '%c' in %s",
818 *patptr, (int) TYPE_NO_MODIFIERS(code),
826 /* inherit modifiers */
827 code |= inherited_modifiers;
829 /* look for count and/or / */
830 if (patptr < patend) {
831 if (isDIGIT(*patptr)) {
832 patptr = get_num( patptr, &symptr->length );
833 symptr->howlen = e_number;
835 } else if (*patptr == '*') {
837 symptr->howlen = e_star;
839 } else if (*patptr == '[') {
840 const char* lenptr = ++patptr;
841 symptr->howlen = e_number;
842 patptr = group_end( patptr, patend, ']' ) + 1;
843 /* what kind of [] is it? */
844 if (isDIGIT(*lenptr)) {
845 lenptr = get_num( lenptr, &symptr->length );
847 Perl_croak(aTHX_ "Malformed integer in [] in %s",
850 tempsym_t savsym = *symptr;
851 symptr->patend = patptr-1;
852 symptr->patptr = lenptr;
853 savsym.length = measure_struct(symptr);
857 symptr->howlen = e_no_len;
862 while (patptr < patend) {
863 if (isSPACE(*patptr))
865 else if (*patptr == '#') {
867 while (patptr < patend && *patptr != '\n')
872 if (*patptr == '/') {
873 symptr->flags |= FLAG_SLASH;
875 if (patptr < patend &&
876 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
877 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
884 /* at end - no count, no / */
885 symptr->howlen = e_no_len;
890 symptr->patptr = patptr;
894 symptr->patptr = patptr;
899 There is no way to cleanly handle the case where we should process the
900 string per byte in its upgraded form while it's really in downgraded form
901 (e.g. estimates like strend-s as an upper bound for the number of
902 characters left wouldn't work). So if we foresee the need of this
903 (pattern starts with U or contains U0), we want to work on the encoded
904 version of the string. Users are advised to upgrade their pack string
905 themselves if they need to do a lot of unpacks like this on it
908 need_utf8(const char *pat, const char *patend)
912 PERL_ARGS_ASSERT_NEED_UTF8;
914 while (pat < patend) {
917 pat = (const char *) memchr(pat, '\n', patend-pat);
918 if (!pat) return FALSE;
919 } else if (pat[0] == 'U') {
920 if (first || pat[1] == '0') return TRUE;
921 } else first = FALSE;
928 first_symbol(const char *pat, const char *patend) {
929 PERL_ARGS_ASSERT_FIRST_SYMBOL;
931 while (pat < patend) {
932 if (pat[0] != '#') return pat[0];
934 pat = (const char *) memchr(pat, '\n', patend-pat);
942 =for apidoc unpackstring
944 The engine implementing the unpack() Perl function.
946 Using the template pat..patend, this function unpacks the string
947 s..strend into a number of mortal SVs, which it pushes onto the perl
948 argument (@_) stack (so you will need to issue a C<PUTBACK> before and
949 C<SPAGAIN> after the call to this function). It returns the number of
952 The strend and patend pointers should point to the byte following the last
953 character of each string.
955 Although this function returns its values on the perl argument stack, it
956 doesn't take any parameters from that stack (and thus in particular
957 there's no need to do a PUSHMARK before calling it, unlike L</call_pv> for
963 Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
967 PERL_ARGS_ASSERT_UNPACKSTRING;
969 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
970 else if (need_utf8(pat, patend)) {
971 /* We probably should try to avoid this in case a scalar context call
972 wouldn't get to the "U0" */
973 STRLEN len = strend - s;
974 s = (char *) bytes_to_utf8((U8 *) s, &len);
977 flags |= FLAG_DO_UTF8;
980 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
981 flags |= FLAG_PARSE_UTF8;
983 TEMPSYM_INIT(&sym, pat, patend, flags);
985 return unpack_rec(&sym, s, s, strend, NULL );
989 S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
993 const I32 start_sp_offset = SP - PL_stack_base;
998 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
1000 bool explicit_length;
1001 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
1002 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
1004 PERL_ARGS_ASSERT_UNPACK_REC;
1006 symptr->strbeg = s - strbeg;
1008 while (next_symbol(symptr)) {
1011 I32 datumtype = symptr->code;
1012 /* do first one only unless in list context
1013 / is implemented by unpacking the count, then popping it from the
1014 stack, so must check that we're not in the middle of a / */
1015 if ( unpack_only_one
1016 && (SP - PL_stack_base == start_sp_offset + 1)
1017 && (datumtype != '/') ) /* XXX can this be omitted */
1020 switch (howlen = symptr->howlen) {
1022 len = strend - strbeg; /* long enough */
1025 /* e_no_len and e_number */
1026 len = symptr->length;
1030 explicit_length = TRUE;
1032 beyond = s >= strend;
1034 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
1036 /* props nonzero means we can process this letter. */
1037 const long size = props & PACK_SIZE_MASK;
1038 const long howmany = (strend - s) / size;
1042 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
1043 if (len && unpack_only_one) len = 1;
1049 switch(TYPE_NO_ENDIANNESS(datumtype)) {
1051 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
1054 if (howlen == e_no_len)
1055 len = 16; /* len is not specified */
1063 tempsym_t savsym = *symptr;
1064 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
1065 symptr->flags |= group_modifiers;
1066 symptr->patend = savsym.grpend;
1067 symptr->previous = &savsym;
1070 if (len && unpack_only_one) len = 1;
1072 symptr->patptr = savsym.grpbeg;
1073 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
1074 else symptr->flags &= ~FLAG_PARSE_UTF8;
1075 unpack_rec(symptr, s, strbeg, strend, &s);
1076 if (s == strend && savsym.howlen == e_star)
1077 break; /* No way to continue */
1080 savsym.flags = symptr->flags & ~group_modifiers;
1084 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1085 case '.' | TYPE_IS_SHRIEKING:
1090 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1091 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
1092 #else /* PERL_PACK_CAN_SHRIEKSIGN */
1093 const bool u8 = utf8;
1095 if (howlen == e_star) from = strbeg;
1096 else if (len <= 0) from = s;
1098 tempsym_t *group = symptr;
1100 while (--len && group) group = group->previous;
1101 from = group ? strbeg + group->strbeg : strbeg;
1104 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
1105 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
1109 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1110 case '@' | TYPE_IS_SHRIEKING:
1113 s = strbeg + symptr->strbeg;
1114 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1115 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
1116 #else /* PERL_PACK_CAN_SHRIEKSIGN */
1122 Perl_croak(aTHX_ "'@' outside of string in unpack");
1127 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
1130 Perl_croak(aTHX_ "'@' outside of string in unpack");
1134 case 'X' | TYPE_IS_SHRIEKING:
1135 if (!len) /* Avoid division by 0 */
1138 const char *hop, *last;
1140 hop = last = strbeg;
1142 hop += UTF8SKIP(hop);
1149 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1153 len = (s - strbeg) % len;
1159 Perl_croak(aTHX_ "'X' outside of string in unpack");
1160 while (--s, UTF8_IS_CONTINUATION(*s)) {
1162 Perl_croak(aTHX_ "'X' outside of string in unpack");
1167 if (len > s - strbeg)
1168 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1172 case 'x' | TYPE_IS_SHRIEKING: {
1174 if (!len) /* Avoid division by 0 */
1176 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1177 else ai32 = (s - strbeg) % len;
1178 if (ai32 == 0) break;
1186 Perl_croak(aTHX_ "'x' outside of string in unpack");
1191 if (len > strend - s)
1192 Perl_croak(aTHX_ "'x' outside of string in unpack");
1197 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1203 /* Preliminary length estimate is assumed done in 'W' */
1204 if (len > strend - s) len = strend - s;
1210 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1211 if (hop >= strend) {
1213 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1218 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1220 } else if (len > strend - s)
1223 if (datumtype == 'Z') {
1224 /* 'Z' strips stuff after first null */
1225 const char *ptr, *end;
1227 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
1228 sv = newSVpvn(s, ptr-s);
1229 if (howlen == e_star) /* exact for 'Z*' */
1230 len = ptr-s + (ptr != strend ? 1 : 0);
1231 } else if (datumtype == 'A') {
1232 /* 'A' strips both nulls and spaces */
1234 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1235 for (ptr = s+len-1; ptr >= s; ptr--)
1236 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
1237 !isSPACE_utf8(ptr)) break;
1238 if (ptr >= s) ptr += UTF8SKIP(ptr);
1241 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1243 for (ptr = s+len-1; ptr >= s; ptr--)
1244 if (*ptr != 0 && !isSPACE(*ptr)) break;
1247 sv = newSVpvn(s, ptr-s);
1248 } else sv = newSVpvn(s, len);
1252 /* Undo any upgrade done due to need_utf8() */
1253 if (!(symptr->flags & FLAG_WAS_UTF8))
1254 sv_utf8_downgrade(sv, 0);
1262 if (howlen == e_star || len > (strend - s) * 8)
1263 len = (strend - s) * 8;
1266 while (len >= 8 && s < strend) {
1267 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
1272 cuv += PL_bitcount[*(U8 *)s++];
1275 if (len && s < strend) {
1277 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1278 if (datumtype == 'b')
1280 if (bits & 1) cuv++;
1285 if (bits & 0x80) cuv++;
1292 sv = sv_2mortal(newSV(len ? len : 1));
1295 if (datumtype == 'b') {
1297 const I32 ai32 = len;
1298 for (len = 0; len < ai32; len++) {
1299 if (len & 7) bits >>= 1;
1301 if (s >= strend) break;
1302 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1303 } else bits = *(U8 *) s++;
1304 *str++ = bits & 1 ? '1' : '0';
1308 const I32 ai32 = len;
1309 for (len = 0; len < ai32; len++) {
1310 if (len & 7) bits <<= 1;
1312 if (s >= strend) break;
1313 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1314 } else bits = *(U8 *) s++;
1315 *str++ = bits & 0x80 ? '1' : '0';
1319 SvCUR_set(sv, str - SvPVX_const(sv));
1326 /* Preliminary length estimate, acceptable for utf8 too */
1327 if (howlen == e_star || len > (strend - s) * 2)
1328 len = (strend - s) * 2;
1330 sv = sv_2mortal(newSV(len ? len : 1));
1334 if (datumtype == 'h') {
1337 for (len = 0; len < ai32; len++) {
1338 if (len & 1) bits >>= 4;
1340 if (s >= strend) break;
1341 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1342 } else bits = * (U8 *) s++;
1344 *str++ = PL_hexdigit[bits & 15];
1348 const I32 ai32 = len;
1349 for (len = 0; len < ai32; len++) {
1350 if (len & 1) bits <<= 4;
1352 if (s >= strend) break;
1353 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1354 } else bits = *(U8 *) s++;
1356 *str++ = PL_hexdigit[(bits >> 4) & 15];
1361 SvCUR_set(sv, str - SvPVX_const(sv));
1368 if (explicit_length)
1369 /* Switch to "character" mode */
1370 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1375 while (len-- > 0 && s < strend) {
1380 aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1381 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1382 if (retlen == (STRLEN) -1 || retlen == 0)
1383 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1387 aint = *(U8 *)(s)++;
1388 if (aint >= 128 && datumtype != 'C') /* fake up signed chars */
1392 else if (checksum > bits_in_uv)
1393 cdouble += (NV)aint;
1401 while (len-- > 0 && s < strend) {
1403 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1404 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1405 if (retlen == (STRLEN) -1 || retlen == 0)
1406 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1410 else if (checksum > bits_in_uv)
1411 cdouble += (NV) val;
1415 } else if (!checksum)
1417 const U8 ch = *(U8 *) s++;
1420 else if (checksum > bits_in_uv)
1421 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1423 while (len-- > 0) cuv += *(U8 *) s++;
1427 if (explicit_length && howlen != e_star) {
1428 /* Switch to "bytes in UTF-8" mode */
1429 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
1431 /* Should be impossible due to the need_utf8() test */
1432 Perl_croak(aTHX_ "U0 mode on a byte string");
1436 if (len > strend - s) len = strend - s;
1438 if (len && unpack_only_one) len = 1;
1442 while (len-- > 0 && s < strend) {
1446 U8 result[UTF8_MAXLEN];
1447 const char *ptr = s;
1449 /* Bug: warns about bad utf8 even if we are short on bytes
1450 and will break out of the loop */
1451 if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1454 len = UTF8SKIP(result);
1455 if (!uni_to_bytes(aTHX_ &ptr, strend,
1456 (char *) &result[1], len-1, 'U')) break;
1457 auv = utf8n_to_uvuni(result, len, &retlen, UTF8_ALLOW_DEFAULT);
1460 auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, UTF8_ALLOW_DEFAULT);
1461 if (retlen == (STRLEN) -1 || retlen == 0)
1462 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1467 else if (checksum > bits_in_uv)
1468 cdouble += (NV) auv;
1473 case 's' | TYPE_IS_SHRIEKING:
1474 #if SHORTSIZE != SIZE16
1477 SHIFT_VAR(utf8, s, strend, ashort, datumtype);
1478 DO_BO_UNPACK(ashort, s);
1481 else if (checksum > bits_in_uv)
1482 cdouble += (NV)ashort;
1494 #if U16SIZE > SIZE16
1497 SHIFT16(utf8, s, strend, &ai16, datumtype);
1498 DO_BO_UNPACK(ai16, 16);
1499 #if U16SIZE > SIZE16
1505 else if (checksum > bits_in_uv)
1506 cdouble += (NV)ai16;
1511 case 'S' | TYPE_IS_SHRIEKING:
1512 #if SHORTSIZE != SIZE16
1514 unsigned short aushort;
1515 SHIFT_VAR(utf8, s, strend, aushort, datumtype);
1516 DO_BO_UNPACK(aushort, s);
1519 else if (checksum > bits_in_uv)
1520 cdouble += (NV)aushort;
1533 #if U16SIZE > SIZE16
1536 SHIFT16(utf8, s, strend, &au16, datumtype);
1537 DO_BO_UNPACK(au16, 16);
1539 if (datumtype == 'n')
1540 au16 = PerlSock_ntohs(au16);
1543 if (datumtype == 'v')
1548 else if (checksum > bits_in_uv)
1549 cdouble += (NV) au16;
1554 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1555 case 'v' | TYPE_IS_SHRIEKING:
1556 case 'n' | TYPE_IS_SHRIEKING:
1559 # if U16SIZE > SIZE16
1562 SHIFT16(utf8, s, strend, &ai16, datumtype);
1564 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
1565 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1566 # endif /* HAS_NTOHS */
1568 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
1569 ai16 = (I16) vtohs((U16) ai16);
1570 # endif /* HAS_VTOHS */
1573 else if (checksum > bits_in_uv)
1574 cdouble += (NV) ai16;
1579 #endif /* PERL_PACK_CAN_SHRIEKSIGN */
1581 case 'i' | TYPE_IS_SHRIEKING:
1584 SHIFT_VAR(utf8, s, strend, aint, datumtype);
1585 DO_BO_UNPACK(aint, i);
1588 else if (checksum > bits_in_uv)
1589 cdouble += (NV)aint;
1595 case 'I' | TYPE_IS_SHRIEKING:
1598 SHIFT_VAR(utf8, s, strend, auint, datumtype);
1599 DO_BO_UNPACK(auint, i);
1602 else if (checksum > bits_in_uv)
1603 cdouble += (NV)auint;
1611 SHIFT_VAR(utf8, s, strend, aiv, datumtype);
1612 #if IVSIZE == INTSIZE
1613 DO_BO_UNPACK(aiv, i);
1614 #elif IVSIZE == LONGSIZE
1615 DO_BO_UNPACK(aiv, l);
1616 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
1617 DO_BO_UNPACK(aiv, 64);
1619 Perl_croak(aTHX_ "'j' not supported on this platform");
1623 else if (checksum > bits_in_uv)
1632 SHIFT_VAR(utf8, s, strend, auv, datumtype);
1633 #if IVSIZE == INTSIZE
1634 DO_BO_UNPACK(auv, i);
1635 #elif IVSIZE == LONGSIZE
1636 DO_BO_UNPACK(auv, l);
1637 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
1638 DO_BO_UNPACK(auv, 64);
1640 Perl_croak(aTHX_ "'J' not supported on this platform");
1644 else if (checksum > bits_in_uv)
1650 case 'l' | TYPE_IS_SHRIEKING:
1651 #if LONGSIZE != SIZE32
1654 SHIFT_VAR(utf8, s, strend, along, datumtype);
1655 DO_BO_UNPACK(along, l);
1658 else if (checksum > bits_in_uv)
1659 cdouble += (NV)along;
1670 #if U32SIZE > SIZE32
1673 SHIFT32(utf8, s, strend, &ai32, datumtype);
1674 DO_BO_UNPACK(ai32, 32);
1675 #if U32SIZE > SIZE32
1676 if (ai32 > 2147483647) ai32 -= 4294967296;
1680 else if (checksum > bits_in_uv)
1681 cdouble += (NV)ai32;
1686 case 'L' | TYPE_IS_SHRIEKING:
1687 #if LONGSIZE != SIZE32
1689 unsigned long aulong;
1690 SHIFT_VAR(utf8, s, strend, aulong, datumtype);
1691 DO_BO_UNPACK(aulong, l);
1694 else if (checksum > bits_in_uv)
1695 cdouble += (NV)aulong;
1708 #if U32SIZE > SIZE32
1711 SHIFT32(utf8, s, strend, &au32, datumtype);
1712 DO_BO_UNPACK(au32, 32);
1714 if (datumtype == 'N')
1715 au32 = PerlSock_ntohl(au32);
1718 if (datumtype == 'V')
1723 else if (checksum > bits_in_uv)
1724 cdouble += (NV)au32;
1729 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1730 case 'V' | TYPE_IS_SHRIEKING:
1731 case 'N' | TYPE_IS_SHRIEKING:
1734 # if U32SIZE > SIZE32
1737 SHIFT32(utf8, s, strend, &ai32, datumtype);
1739 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1740 ai32 = (I32)PerlSock_ntohl((U32)ai32);
1743 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1744 ai32 = (I32)vtohl((U32)ai32);
1748 else if (checksum > bits_in_uv)
1749 cdouble += (NV)ai32;
1754 #endif /* PERL_PACK_CAN_SHRIEKSIGN */
1758 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
1759 DO_BO_UNPACK_PC(aptr);
1760 /* newSVpv generates undef if aptr is NULL */
1761 mPUSHs(newSVpv(aptr, 0));
1769 while (len > 0 && s < strend) {
1771 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1772 auv = (auv << 7) | (ch & 0x7f);
1773 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
1781 if (++bytes >= sizeof(UV)) { /* promote to string */
1784 sv = Perl_newSVpvf(aTHX_ "%.*"UVuf, (int)TYPE_DIGITS(UV), auv);
1785 while (s < strend) {
1786 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1787 sv = mul128(sv, (U8)(ch & 0x7f));
1793 t = SvPV_nolen_const(sv);
1802 if ((s >= strend) && bytes)
1803 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
1807 if (symptr->howlen == e_star)
1808 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
1810 if (s + sizeof(char*) <= strend) {
1812 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
1813 DO_BO_UNPACK_PC(aptr);
1814 /* newSVpvn generates undef if aptr is NULL */
1815 PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP));
1822 SHIFT_VAR(utf8, s, strend, aquad, datumtype);
1823 DO_BO_UNPACK(aquad, 64);
1825 mPUSHs(aquad >= IV_MIN && aquad <= IV_MAX ?
1826 newSViv((IV)aquad) : newSVnv((NV)aquad));
1827 else if (checksum > bits_in_uv)
1828 cdouble += (NV)aquad;
1836 SHIFT_VAR(utf8, s, strend, auquad, datumtype);
1837 DO_BO_UNPACK(auquad, 64);
1839 mPUSHs(auquad <= UV_MAX ?
1840 newSVuv((UV)auquad) : newSVnv((NV)auquad));
1841 else if (checksum > bits_in_uv)
1842 cdouble += (NV)auquad;
1847 #endif /* HAS_QUAD */
1848 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
1852 SHIFT_VAR(utf8, s, strend, afloat, datumtype);
1853 DO_BO_UNPACK_N(afloat, float);
1863 SHIFT_VAR(utf8, s, strend, adouble, datumtype);
1864 DO_BO_UNPACK_N(adouble, double);
1874 SHIFT_BYTES(utf8, s, strend, anv.bytes, sizeof(anv.bytes), datumtype);
1875 DO_BO_UNPACK_N(anv.nv, NV);
1882 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
1886 SHIFT_BYTES(utf8, s, strend, aldouble.bytes, sizeof(aldouble.bytes), datumtype);
1887 DO_BO_UNPACK_N(aldouble.ld, long double);
1889 mPUSHn(aldouble.ld);
1891 cdouble += aldouble.ld;
1897 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
1898 sv = sv_2mortal(newSV(l));
1899 if (l) SvPOK_on(sv);
1902 while (next_uni_uu(aTHX_ &s, strend, &len)) {
1907 next_uni_uu(aTHX_ &s, strend, &a);
1908 next_uni_uu(aTHX_ &s, strend, &b);
1909 next_uni_uu(aTHX_ &s, strend, &c);
1910 next_uni_uu(aTHX_ &s, strend, &d);
1911 hunk[0] = (char)((a << 2) | (b >> 4));
1912 hunk[1] = (char)((b << 4) | (c >> 2));
1913 hunk[2] = (char)((c << 6) | d);
1915 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
1923 /* possible checksum byte */
1924 const char *skip = s+UTF8SKIP(s);
1925 if (skip < strend && *skip == '\n')
1931 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
1935 len = PL_uudmap[*(U8*)s++] & 077;
1937 if (s < strend && ISUUCHAR(*s))
1938 a = PL_uudmap[*(U8*)s++] & 077;
1941 if (s < strend && ISUUCHAR(*s))
1942 b = PL_uudmap[*(U8*)s++] & 077;
1945 if (s < strend && ISUUCHAR(*s))
1946 c = PL_uudmap[*(U8*)s++] & 077;
1949 if (s < strend && ISUUCHAR(*s))
1950 d = PL_uudmap[*(U8*)s++] & 077;
1953 hunk[0] = (char)((a << 2) | (b >> 4));
1954 hunk[1] = (char)((b << 4) | (c >> 2));
1955 hunk[2] = (char)((c << 6) | d);
1957 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
1962 else /* possible checksum byte */
1963 if (s + 1 < strend && s[1] == '\n')
1973 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
1974 (checksum > bits_in_uv &&
1975 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
1978 anv = (NV) (1 << (checksum & 15));
1979 while (checksum >= 16) {
1983 while (cdouble < 0.0)
1985 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
1986 sv = newSVnv(cdouble);
1989 if (checksum < bits_in_uv) {
1990 UV mask = ((UV)1 << checksum) - 1;
1999 if (symptr->flags & FLAG_SLASH){
2000 if (SP - PL_stack_base - start_sp_offset <= 0)
2002 if( next_symbol(symptr) ){
2003 if( symptr->howlen == e_number )
2004 Perl_croak(aTHX_ "Count after length/code in unpack" );
2006 /* ...end of char buffer then no decent length available */
2007 Perl_croak(aTHX_ "length/code after end of string in unpack" );
2009 /* take top of stack (hope it's numeric) */
2012 Perl_croak(aTHX_ "Negative '/' count in unpack" );
2015 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
2017 datumtype = symptr->code;
2018 explicit_length = FALSE;
2026 return SP - PL_stack_base - start_sp_offset;
2034 I32 gimme = GIMME_V;
2037 const char *pat = SvPV_const(left, llen);
2038 const char *s = SvPV_const(right, rlen);
2039 const char *strend = s + rlen;
2040 const char *patend = pat + llen;
2044 cnt = unpackstring(pat, patend, s, strend,
2045 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
2046 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
2049 if ( !cnt && gimme == G_SCALAR )
2050 PUSHs(&PL_sv_undef);
2055 doencodes(U8 *h, const char *s, I32 len)
2057 *h++ = PL_uuemap[len];
2059 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2060 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
2061 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
2062 *h++ = PL_uuemap[(077 & (s[2] & 077))];
2067 const char r = (len > 1 ? s[1] : '\0');
2068 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2069 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
2070 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
2071 *h++ = PL_uuemap[0];
2078 S_is_an_int(pTHX_ const char *s, STRLEN l)
2080 SV *result = newSVpvn(s, l);
2081 char *const result_c = SvPV_nolen(result); /* convenience */
2082 char *out = result_c;
2086 PERL_ARGS_ASSERT_IS_AN_INT;
2094 SvREFCNT_dec(result);
2117 SvREFCNT_dec(result);
2123 SvCUR_set(result, out - result_c);
2127 /* pnum must be '\0' terminated */
2129 S_div128(pTHX_ SV *pnum, bool *done)
2132 char * const s = SvPV(pnum, len);
2136 PERL_ARGS_ASSERT_DIV128;
2140 const int i = m * 10 + (*t - '0');
2141 const int r = (i >> 7); /* r < 10 */
2149 SvCUR_set(pnum, (STRLEN) (t - s));
2154 =for apidoc packlist
2156 The engine implementing pack() Perl function.
2162 Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, SV **beglist, SV **endlist )
2167 PERL_ARGS_ASSERT_PACKLIST;
2169 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
2171 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
2172 Also make sure any UTF8 flag is loaded */
2173 SvPV_force_nolen(cat);
2175 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
2177 (void)pack_rec( cat, &sym, beglist, endlist );
2180 /* like sv_utf8_upgrade, but also repoint the group start markers */
2182 marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2185 const char *from_ptr, *from_start, *from_end, **marks, **m;
2186 char *to_start, *to_ptr;
2188 if (SvUTF8(sv)) return;
2190 from_start = SvPVX_const(sv);
2191 from_end = from_start + SvCUR(sv);
2192 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2193 if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
2194 if (from_ptr == from_end) {
2195 /* Simple case: no character needs to be changed */
2200 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
2201 Newx(to_start, len, char);
2202 Copy(from_start, to_start, from_ptr-from_start, char);
2203 to_ptr = to_start + (from_ptr-from_start);
2205 Newx(marks, sym_ptr->level+2, const char *);
2206 for (group=sym_ptr; group; group = group->previous)
2207 marks[group->level] = from_start + group->strbeg;
2208 marks[sym_ptr->level+1] = from_end+1;
2209 for (m = marks; *m < from_ptr; m++)
2210 *m = to_start + (*m-from_start);
2212 for (;from_ptr < from_end; from_ptr++) {
2213 while (*m == from_ptr) *m++ = to_ptr;
2214 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
2218 while (*m == from_ptr) *m++ = to_ptr;
2219 if (m != marks + sym_ptr->level+1) {
2222 Perl_croak(aTHX_ "panic: marks beyond string end, m=%p, marks=%p, "
2223 "level=%d", m, marks, sym_ptr->level);
2225 for (group=sym_ptr; group; group = group->previous)
2226 group->strbeg = marks[group->level] - to_start;
2231 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
2232 from_start -= SvIVX(sv);
2235 SvFLAGS(sv) &= ~SVf_OOK;
2238 Safefree(from_start);
2239 SvPV_set(sv, to_start);
2240 SvCUR_set(sv, to_ptr - to_start);
2245 /* Exponential string grower. Makes string extension effectively O(n)
2246 needed says how many extra bytes we need (not counting the final '\0')
2247 Only grows the string if there is an actual lack of space
2250 S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
2251 const STRLEN cur = SvCUR(sv);
2252 const STRLEN len = SvLEN(sv);
2255 PERL_ARGS_ASSERT_SV_EXP_GROW;
2257 if (len - cur > needed) return SvPVX(sv);
2258 extend = needed > len ? needed : len;
2259 return SvGROW(sv, len+extend+1);
2264 S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
2267 tempsym_t lookahead;
2268 I32 items = endlist - beglist;
2269 bool found = next_symbol(symptr);
2270 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
2271 bool warn_utf8 = ckWARN(WARN_UTF8);
2273 PERL_ARGS_ASSERT_PACK_REC;
2275 if (symptr->level == 0 && found && symptr->code == 'U') {
2276 marked_upgrade(aTHX_ cat, symptr);
2277 symptr->flags |= FLAG_DO_UTF8;
2280 symptr->strbeg = SvCUR(cat);
2286 SV *lengthcode = NULL;
2287 I32 datumtype = symptr->code;
2288 howlen_t howlen = symptr->howlen;
2289 char *start = SvPVX(cat);
2290 char *cur = start + SvCUR(cat);
2292 #define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2296 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2300 /* e_no_len and e_number */
2301 len = symptr->length;
2306 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
2308 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2309 /* We can process this letter. */
2310 STRLEN size = props & PACK_SIZE_MASK;
2311 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2315 /* Look ahead for next symbol. Do we have code/code? */
2316 lookahead = *symptr;
2317 found = next_symbol(&lookahead);
2318 if (symptr->flags & FLAG_SLASH) {
2320 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
2321 if (strchr("aAZ", lookahead.code)) {
2322 if (lookahead.howlen == e_number) count = lookahead.length;
2325 count = sv_len_utf8(*beglist);
2328 if (lookahead.code == 'Z') count++;
2331 if (lookahead.howlen == e_number && lookahead.length < items)
2332 count = lookahead.length;
2335 lookahead.howlen = e_number;
2336 lookahead.length = count;
2337 lengthcode = sv_2mortal(newSViv(count));
2340 /* Code inside the switch must take care to properly update
2341 cat (CUR length and '\0' termination) if it updated *cur and
2342 doesn't simply leave using break */
2343 switch(TYPE_NO_ENDIANNESS(datumtype)) {
2345 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2346 (int) TYPE_NO_MODIFIERS(datumtype));
2348 Perl_croak(aTHX_ "'%%' may not be used in pack");
2351 #ifdef PERL_PACK_CAN_SHRIEKSIGN
2352 case '.' | TYPE_IS_SHRIEKING:
2355 if (howlen == e_star) from = start;
2356 else if (len == 0) from = cur;
2358 tempsym_t *group = symptr;
2360 while (--len && group) group = group->previous;
2361 from = group ? start + group->strbeg : start;
2364 len = SvIV(fromstr);
2366 #ifdef PERL_PACK_CAN_SHRIEKSIGN
2367 case '@' | TYPE_IS_SHRIEKING:
2370 from = start + symptr->strbeg;
2372 #ifdef PERL_PACK_CAN_SHRIEKSIGN
2373 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
2374 #else /* PERL_PACK_CAN_SHRIEKSIGN */
2378 while (len && from < cur) {
2379 from += UTF8SKIP(from);
2383 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2385 /* Here we know from == cur */
2387 GROWING(0, cat, start, cur, len);
2388 Zero(cur, len, char);
2390 } else if (from < cur) {
2393 } else goto no_change;
2401 if (len > 0) goto grow;
2402 if (len == 0) goto no_change;
2409 tempsym_t savsym = *symptr;
2410 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2411 symptr->flags |= group_modifiers;
2412 symptr->patend = savsym.grpend;
2414 symptr->previous = &lookahead;
2417 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2418 else symptr->flags &= ~FLAG_PARSE_UTF8;
2419 was_utf8 = SvUTF8(cat);
2420 symptr->patptr = savsym.grpbeg;
2421 beglist = pack_rec(cat, symptr, beglist, endlist);
2422 if (SvUTF8(cat) != was_utf8)
2423 /* This had better be an upgrade while in utf8==0 mode */
2426 if (savsym.howlen == e_star && beglist == endlist)
2427 break; /* No way to continue */
2429 items = endlist - beglist;
2430 lookahead.flags = symptr->flags & ~group_modifiers;
2433 case 'X' | TYPE_IS_SHRIEKING:
2434 if (!len) /* Avoid division by 0 */
2441 hop += UTF8SKIP(hop);
2448 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2452 len = (cur-start) % len;
2456 if (len < 1) goto no_change;
2460 Perl_croak(aTHX_ "'%c' outside of string in pack",
2461 (int) TYPE_NO_MODIFIERS(datumtype));
2462 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2464 Perl_croak(aTHX_ "'%c' outside of string in pack",
2465 (int) TYPE_NO_MODIFIERS(datumtype));
2471 if (cur - start < len)
2472 Perl_croak(aTHX_ "'%c' outside of string in pack",
2473 (int) TYPE_NO_MODIFIERS(datumtype));
2476 if (cur < start+symptr->strbeg) {
2477 /* Make sure group starts don't point into the void */
2479 const STRLEN length = cur-start;
2480 for (group = symptr;
2481 group && length < group->strbeg;
2482 group = group->previous) group->strbeg = length;
2483 lookahead.strbeg = length;
2486 case 'x' | TYPE_IS_SHRIEKING: {
2488 if (!len) /* Avoid division by 0 */
2490 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
2491 else ai32 = (cur - start) % len;
2492 if (ai32 == 0) goto no_change;
2504 aptr = SvPV_const(fromstr, fromlen);
2505 if (DO_UTF8(fromstr)) {
2506 const char *end, *s;
2508 if (!utf8 && !SvUTF8(cat)) {
2509 marked_upgrade(aTHX_ cat, symptr);
2510 lookahead.flags |= FLAG_DO_UTF8;
2511 lookahead.strbeg = symptr->strbeg;
2514 cur = start + SvCUR(cat);
2516 if (howlen == e_star) {
2517 if (utf8) goto string_copy;
2521 end = aptr + fromlen;
2522 fromlen = datumtype == 'Z' ? len-1 : len;
2523 while ((I32) fromlen > 0 && s < end) {
2528 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2531 if (datumtype == 'Z') len++;
2537 fromlen = len - fromlen;
2538 if (datumtype == 'Z') fromlen--;
2539 if (howlen == e_star) {
2541 if (datumtype == 'Z') len++;
2543 GROWING(0, cat, start, cur, len);
2544 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
2545 datumtype | TYPE_IS_PACK))
2546 Perl_croak(aTHX_ "panic: predicted utf8 length not available, "
2547 "for '%c', aptr=%p end=%p cur=%p, fromlen=%"UVuf,
2548 (int)datumtype, aptr, end, cur, (UV)fromlen);
2552 if (howlen == e_star) {
2554 if (datumtype == 'Z') len++;
2556 if (len <= (I32) fromlen) {
2558 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2560 /* assumes a byte expands to at most UTF8_EXPAND bytes on
2562 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2563 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
2565 while (fromlen > 0) {
2566 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
2572 if (howlen == e_star) {
2574 if (datumtype == 'Z') len++;
2576 if (len <= (I32) fromlen) {
2578 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2580 GROWING(0, cat, start, cur, len);
2581 Copy(aptr, cur, fromlen, char);
2585 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2592 const char *str, *end;
2599 str = SvPV_const(fromstr, fromlen);
2600 end = str + fromlen;
2601 if (DO_UTF8(fromstr)) {
2603 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2605 utf8_source = FALSE;
2606 utf8_flags = 0; /* Unused, but keep compilers happy */
2608 if (howlen == e_star) len = fromlen;
2609 field_len = (len+7)/8;
2610 GROWING(utf8, cat, start, cur, field_len);
2611 if (len > (I32)fromlen) len = fromlen;
2614 if (datumtype == 'B')
2618 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2620 } else bits |= *str++ & 1;
2621 if (l & 7) bits <<= 1;
2623 PUSH_BYTE(utf8, cur, bits);
2628 /* datumtype == 'b' */
2632 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2633 if (val & 1) bits |= 0x80;
2634 } else if (*str++ & 1)
2636 if (l & 7) bits >>= 1;
2638 PUSH_BYTE(utf8, cur, bits);
2644 if (datumtype == 'B')
2645 bits <<= 7 - (l & 7);
2647 bits >>= 7 - (l & 7);
2648 PUSH_BYTE(utf8, cur, bits);
2651 /* Determine how many chars are left in the requested field */
2653 if (howlen == e_star) field_len = 0;
2654 else field_len -= l;
2655 Zero(cur, field_len, char);
2661 const char *str, *end;
2668 str = SvPV_const(fromstr, fromlen);
2669 end = str + fromlen;
2670 if (DO_UTF8(fromstr)) {
2672 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2674 utf8_source = FALSE;
2675 utf8_flags = 0; /* Unused, but keep compilers happy */
2677 if (howlen == e_star) len = fromlen;
2678 field_len = (len+1)/2;
2679 GROWING(utf8, cat, start, cur, field_len);
2680 if (!utf8 && len > (I32)fromlen) len = fromlen;
2683 if (datumtype == 'H')
2687 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2688 if (val < 256 && isALPHA(val))
2689 bits |= (val + 9) & 0xf;
2692 } else if (isALPHA(*str))
2693 bits |= (*str++ + 9) & 0xf;
2695 bits |= *str++ & 0xf;
2696 if (l & 1) bits <<= 4;
2698 PUSH_BYTE(utf8, cur, bits);
2706 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2707 if (val < 256 && isALPHA(val))
2708 bits |= ((val + 9) & 0xf) << 4;
2710 bits |= (val & 0xf) << 4;
2711 } else if (isALPHA(*str))
2712 bits |= ((*str++ + 9) & 0xf) << 4;
2714 bits |= (*str++ & 0xf) << 4;
2715 if (l & 1) bits >>= 4;
2717 PUSH_BYTE(utf8, cur, bits);
2723 PUSH_BYTE(utf8, cur, bits);
2726 /* Determine how many chars are left in the requested field */
2728 if (howlen == e_star) field_len = 0;
2729 else field_len -= l;
2730 Zero(cur, field_len, char);
2738 aiv = SvIV(fromstr);
2739 if ((-128 > aiv || aiv > 127))
2740 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2741 "Character in 'c' format wrapped in pack");
2742 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
2747 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2753 aiv = SvIV(fromstr);
2754 if ((0 > aiv || aiv > 0xff))
2755 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2756 "Character in 'C' format wrapped in pack");
2757 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
2762 U8 in_bytes = (U8)IN_BYTES;
2764 end = start+SvLEN(cat)-1;
2765 if (utf8) end -= UTF8_MAXLEN-1;
2769 auv = SvUV(fromstr);
2770 if (in_bytes) auv = auv % 0x100;
2775 SvCUR_set(cat, cur - start);
2777 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2778 end = start+SvLEN(cat)-UTF8_MAXLEN;
2780 cur = (char *) uvuni_to_utf8_flags((U8 *) cur,
2783 0 : UNICODE_ALLOW_ANY);
2788 SvCUR_set(cat, cur - start);
2789 marked_upgrade(aTHX_ cat, symptr);
2790 lookahead.flags |= FLAG_DO_UTF8;
2791 lookahead.strbeg = symptr->strbeg;
2794 cur = start + SvCUR(cat);
2795 end = start+SvLEN(cat)-UTF8_MAXLEN;
2798 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2799 "Character in 'W' format wrapped in pack");
2804 SvCUR_set(cat, cur - start);
2805 GROWING(0, cat, start, cur, len+1);
2806 end = start+SvLEN(cat)-1;
2808 *(U8 *) cur++ = (U8)auv;
2817 if (!(symptr->flags & FLAG_DO_UTF8)) {
2818 marked_upgrade(aTHX_ cat, symptr);
2819 lookahead.flags |= FLAG_DO_UTF8;
2820 lookahead.strbeg = symptr->strbeg;
2826 end = start+SvLEN(cat);
2827 if (!utf8) end -= UTF8_MAXLEN;
2831 auv = SvUV(fromstr);
2833 U8 buffer[UTF8_MAXLEN], *endb;
2834 endb = uvuni_to_utf8_flags(buffer, auv,
2836 0 : UNICODE_ALLOW_ANY);
2837 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
2839 SvCUR_set(cat, cur - start);
2840 GROWING(0, cat, start, cur,
2841 len+(endb-buffer)*UTF8_EXPAND);
2842 end = start+SvLEN(cat);
2844 cur = bytes_to_uni(buffer, endb-buffer, cur);
2848 SvCUR_set(cat, cur - start);
2849 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2850 end = start+SvLEN(cat)-UTF8_MAXLEN;
2852 cur = (char *) uvuni_to_utf8_flags((U8 *) cur, auv,
2854 0 : UNICODE_ALLOW_ANY);
2859 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
2865 anv = SvNV(fromstr);
2866 # if defined(VMS) && !defined(_IEEE_FP)
2867 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
2868 * on Alpha; fake it if we don't have them.
2872 else if (anv < -FLT_MAX)
2874 else afloat = (float)anv;
2876 afloat = (float)anv;
2878 DO_BO_PACK_N(afloat, float);
2879 PUSH_VAR(utf8, cur, afloat);
2887 anv = SvNV(fromstr);
2888 # if defined(VMS) && !defined(_IEEE_FP)
2889 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
2890 * on Alpha; fake it if we don't have them.
2894 else if (anv < -DBL_MAX)
2896 else adouble = (double)anv;
2898 adouble = (double)anv;
2900 DO_BO_PACK_N(adouble, double);
2901 PUSH_VAR(utf8, cur, adouble);
2906 Zero(&anv, 1, NV); /* can be long double with unused bits */
2910 /* to work round a gcc/x86 bug; don't use SvNV */
2911 anv.nv = sv_2nv(fromstr);
2913 anv.nv = SvNV(fromstr);
2915 DO_BO_PACK_N(anv, NV);
2916 PUSH_BYTES(utf8, cur, anv.bytes, sizeof(anv.bytes));
2920 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2923 /* long doubles can have unused bits, which may be nonzero */
2924 Zero(&aldouble, 1, long double);
2928 /* to work round a gcc/x86 bug; don't use SvNV */
2929 aldouble.ld = (long double)sv_2nv(fromstr);
2931 aldouble.ld = (long double)SvNV(fromstr);
2933 DO_BO_PACK_N(aldouble, long double);
2934 PUSH_BYTES(utf8, cur, aldouble.bytes, sizeof(aldouble.bytes));
2939 #ifdef PERL_PACK_CAN_SHRIEKSIGN
2940 case 'n' | TYPE_IS_SHRIEKING:
2946 ai16 = (I16)SvIV(fromstr);
2948 ai16 = PerlSock_htons(ai16);
2950 PUSH16(utf8, cur, &ai16);
2953 #ifdef PERL_PACK_CAN_SHRIEKSIGN
2954 case 'v' | TYPE_IS_SHRIEKING:
2960 ai16 = (I16)SvIV(fromstr);
2964 PUSH16(utf8, cur, &ai16);
2967 case 'S' | TYPE_IS_SHRIEKING:
2968 #if SHORTSIZE != SIZE16
2970 unsigned short aushort;
2972 aushort = SvUV(fromstr);
2973 DO_BO_PACK(aushort, s);
2974 PUSH_VAR(utf8, cur, aushort);
2984 au16 = (U16)SvUV(fromstr);
2985 DO_BO_PACK(au16, 16);
2986 PUSH16(utf8, cur, &au16);
2989 case 's' | TYPE_IS_SHRIEKING:
2990 #if SHORTSIZE != SIZE16
2994 ashort = SvIV(fromstr);
2995 DO_BO_PACK(ashort, s);
2996 PUSH_VAR(utf8, cur, ashort);
3006 ai16 = (I16)SvIV(fromstr);
3007 DO_BO_PACK(ai16, 16);
3008 PUSH16(utf8, cur, &ai16);
3012 case 'I' | TYPE_IS_SHRIEKING:
3016 auint = SvUV(fromstr);
3017 DO_BO_PACK(auint, i);
3018 PUSH_VAR(utf8, cur, auint);
3025 aiv = SvIV(fromstr);
3026 #if IVSIZE == INTSIZE
3028 #elif IVSIZE == LONGSIZE
3030 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
3031 DO_BO_PACK(aiv, 64);
3033 Perl_croak(aTHX_ "'j' not supported on this platform");
3035 PUSH_VAR(utf8, cur, aiv);
3042 auv = SvUV(fromstr);
3043 #if UVSIZE == INTSIZE
3045 #elif UVSIZE == LONGSIZE
3047 #elif defined(HAS_QUAD) && UVSIZE == U64SIZE
3048 DO_BO_PACK(auv, 64);
3050 Perl_croak(aTHX_ "'J' not supported on this platform");
3052 PUSH_VAR(utf8, cur, auv);
3059 anv = SvNV(fromstr);
3063 SvCUR_set(cat, cur - start);
3064 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
3067 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
3068 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
3069 any negative IVs will have already been got by the croak()
3070 above. IOK is untrue for fractions, so we test them
3071 against UV_MAX_P1. */
3072 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
3073 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
3074 char *in = buf + sizeof(buf);
3075 UV auv = SvUV(fromstr);
3078 *--in = (char)((auv & 0x7f) | 0x80);
3081 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
3082 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3083 in, (buf + sizeof(buf)) - in);
3084 } else if (SvPOKp(fromstr))
3086 else if (SvNOKp(fromstr)) {
3087 /* 10**NV_MAX_10_EXP is the largest power of 10
3088 so 10**(NV_MAX_10_EXP+1) is definitely unrepresentable
3089 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
3090 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
3091 And with that many bytes only Inf can overflow.
3092 Some C compilers are strict about integral constant
3093 expressions so we conservatively divide by a slightly
3094 smaller integer instead of multiplying by the exact
3095 floating-point value.
3097 #ifdef NV_MAX_10_EXP
3098 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
3099 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
3101 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
3102 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
3104 char *in = buf + sizeof(buf);
3106 anv = Perl_floor(anv);
3108 const NV next = Perl_floor(anv / 128);
3109 if (in <= buf) /* this cannot happen ;-) */
3110 Perl_croak(aTHX_ "Cannot compress integer in pack");
3111 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
3114 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
3115 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3116 in, (buf + sizeof(buf)) - in);
3125 /* Copy string and check for compliance */
3126 from = SvPV_const(fromstr, len);
3127 if ((norm = is_an_int(from, len)) == NULL)
3128 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
3130 Newx(result, len, char);
3133 while (!done) *--in = div128(norm, &done) | 0x80;
3134 result[len - 1] &= 0x7F; /* clear continue bit */
3135 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3136 in, (result + len) - in);
3138 SvREFCNT_dec(norm); /* free norm */
3143 case 'i' | TYPE_IS_SHRIEKING:
3147 aint = SvIV(fromstr);
3148 DO_BO_PACK(aint, i);
3149 PUSH_VAR(utf8, cur, aint);
3152 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3153 case 'N' | TYPE_IS_SHRIEKING:
3159 au32 = SvUV(fromstr);
3161 au32 = PerlSock_htonl(au32);
3163 PUSH32(utf8, cur, &au32);
3166 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3167 case 'V' | TYPE_IS_SHRIEKING:
3173 au32 = SvUV(fromstr);
3177 PUSH32(utf8, cur, &au32);
3180 case 'L' | TYPE_IS_SHRIEKING:
3181 #if LONGSIZE != SIZE32
3183 unsigned long aulong;
3185 aulong = SvUV(fromstr);
3186 DO_BO_PACK(aulong, l);
3187 PUSH_VAR(utf8, cur, aulong);
3197 au32 = SvUV(fromstr);
3198 DO_BO_PACK(au32, 32);
3199 PUSH32(utf8, cur, &au32);
3202 case 'l' | TYPE_IS_SHRIEKING:
3203 #if LONGSIZE != SIZE32
3207 along = SvIV(fromstr);
3208 DO_BO_PACK(along, l);
3209 PUSH_VAR(utf8, cur, along);
3219 ai32 = SvIV(fromstr);
3220 DO_BO_PACK(ai32, 32);
3221 PUSH32(utf8, cur, &ai32);
3229 auquad = (Uquad_t) SvUV(fromstr);
3230 DO_BO_PACK(auquad, 64);
3231 PUSH_VAR(utf8, cur, auquad);
3238 aquad = (Quad_t)SvIV(fromstr);
3239 DO_BO_PACK(aquad, 64);
3240 PUSH_VAR(utf8, cur, aquad);
3243 #endif /* HAS_QUAD */
3245 len = 1; /* assume SV is correct length */
3246 GROWING(utf8, cat, start, cur, sizeof(char *));
3253 SvGETMAGIC(fromstr);
3254 if (!SvOK(fromstr)) aptr = NULL;
3256 /* XXX better yet, could spirit away the string to
3257 * a safe spot and hang on to it until the result
3258 * of pack() (and all copies of the result) are
3261 if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
3262 !SvREADONLY(fromstr)))) {
3263 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3264 "Attempt to pack pointer to temporary value");
3266 if (SvPOK(fromstr) || SvNIOK(fromstr))
3267 aptr = SvPV_nomg_const_nolen(fromstr);
3269 aptr = SvPV_force_flags_nolen(fromstr, 0);
3271 DO_BO_PACK_PC(aptr);
3272 PUSH_VAR(utf8, cur, aptr);
3276 const char *aptr, *aend;
3280 if (len <= 2) len = 45;
3281 else len = len / 3 * 3;
3283 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3284 "Field too wide in 'u' format in pack");
3287 aptr = SvPV_const(fromstr, fromlen);
3288 from_utf8 = DO_UTF8(fromstr);
3290 aend = aptr + fromlen;
3291 fromlen = sv_len_utf8_nomg(fromstr);
3292 } else aend = NULL; /* Unused, but keep compilers happy */
3293 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
3294 while (fromlen > 0) {
3297 U8 hunk[1+63/3*4+1];
3299 if ((I32)fromlen > len)
3305 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3306 'u' | TYPE_IS_PACK)) {
3308 SvCUR_set(cat, cur - start);
3309 Perl_croak(aTHX_ "panic: string is shorter than advertised, "
3310 "aptr=%p, aend=%p, buffer=%p, todo=%ld",
3311 aptr, aend, buffer, (long) todo);
3313 end = doencodes(hunk, buffer, todo);
3315 end = doencodes(hunk, aptr, todo);
3318 PUSH_BYTES(utf8, cur, hunk, end-hunk);
3325 SvCUR_set(cat, cur - start);
3327 *symptr = lookahead;
3336 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
3339 SV *pat_sv = *++MARK;
3340 const char *pat = SvPV_const(pat_sv, fromlen);
3341 const char *patend = pat + fromlen;
3347 packlist(cat, pat, patend, MARK, SP + 1);
3357 * c-indentation-style: bsd
3359 * indent-tabs-mode: nil
3362 * ex: set ts=8 sts=4 sw=4 et: