3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
18 * [p.653 of _The Lord of the Rings_, IV/iv: "Of Herbs and Stewed Rabbit"]
21 /* This file contains pp ("push/pop") functions that
22 * execute the opcodes that make up a perl program. A typical pp function
23 * expects to find its arguments on the stack, and usually pushes its
24 * results onto the stack, hence the 'pp' terminology. Each OP structure
25 * contains a pointer to the relevant pp_foo() function.
27 * This particular file just contains pp_pack() and pp_unpack(). See the
28 * other pp*.c files for the rest of the pp_ functions.
32 #define PERL_IN_PP_PACK_C
35 /* Types used by pack/unpack */
37 e_no_len, /* no length */
38 e_number, /* number, [] */
42 typedef struct tempsym {
43 const char* patptr; /* current template char */
44 const char* patend; /* one after last char */
45 const char* grpbeg; /* 1st char of ()-group */
46 const char* grpend; /* end of ()-group */
47 I32 code; /* template code (!<>) */
48 I32 length; /* length/repeat count */
49 howlen_t howlen; /* how length is given */
50 int level; /* () nesting level */
51 U32 flags; /* /=4, comma=2, pack=1 */
52 /* and group modifiers */
53 STRLEN strbeg; /* offset of group start */
54 struct tempsym *previous; /* previous group */
57 #define TEMPSYM_INIT(symptr, p, e, f) \
59 (symptr)->patptr = (p); \
60 (symptr)->patend = (e); \
61 (symptr)->grpbeg = NULL; \
62 (symptr)->grpend = NULL; \
63 (symptr)->grpend = NULL; \
65 (symptr)->length = 0; \
66 (symptr)->howlen = e_no_len; \
67 (symptr)->level = 0; \
68 (symptr)->flags = (f); \
69 (symptr)->strbeg = 0; \
70 (symptr)->previous = NULL; \
78 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
81 U8 bytes[sizeof(long double)];
86 # define PERL_PACK_CAN_SHRIEKSIGN
92 /* Maximum number of bytes to which a byte can grow due to upgrade */
96 * Offset for integer pack/unpack.
98 * On architectures where I16 and I32 aren't really 16 and 32 bits,
99 * which for now are all Crays, pack and unpack have to play games.
103 * These values are required for portability of pack() output.
104 * If they're not right on your machine, then pack() and unpack()
105 * wouldn't work right anyway; you'll need to apply the Cray hack.
106 * (I'd like to check them with #if, but you can't use sizeof() in
107 * the preprocessor.) --???
110 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
111 defines are now in config.h. --Andy Dougherty April 1998
116 /* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
119 #if U16SIZE > SIZE16 || U32SIZE > SIZE32
120 # if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
121 # define OFF16(p) ((char*)(p))
122 # define OFF32(p) ((char*)(p))
124 # if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
125 # define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
126 # define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
128 ++++ bad cray byte order
132 # define OFF16(p) ((char *) (p))
133 # define OFF32(p) ((char *) (p))
136 /* Only to be used inside a loop (see the break) */
137 #define SHIFT16(utf8, s, strend, p, datumtype) STMT_START { \
139 if (!uni_to_bytes(aTHX_ &(s), strend, OFF16(p), SIZE16, datumtype)) break; \
141 Copy(s, OFF16(p), SIZE16, char); \
146 /* Only to be used inside a loop (see the break) */
147 #define SHIFT32(utf8, s, strend, p, datumtype) STMT_START { \
149 if (!uni_to_bytes(aTHX_ &(s), strend, OFF32(p), SIZE32, datumtype)) break; \
151 Copy(s, OFF32(p), SIZE32, char); \
156 #define PUSH16(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF16(p), SIZE16)
157 #define PUSH32(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF32(p), SIZE32)
159 /* Only to be used inside a loop (see the break) */
160 #define SHIFT_BYTES(utf8, s, strend, buf, len, datumtype) \
163 if (!uni_to_bytes(aTHX_ &s, strend, \
164 (char *) (buf), len, datumtype)) break; \
166 Copy(s, (char *) (buf), len, char); \
171 #define SHIFT_VAR(utf8, s, strend, var, datumtype) \
172 SHIFT_BYTES(utf8, s, strend, &(var), sizeof(var), datumtype)
174 #define PUSH_VAR(utf8, aptr, var) \
175 PUSH_BYTES(utf8, aptr, &(var), sizeof(var))
177 /* Avoid stack overflow due to pathological templates. 100 should be plenty. */
178 #define MAX_SUB_TEMPLATE_LEVEL 100
180 /* flags (note that type modifiers can also be used as flags!) */
181 #define FLAG_WAS_UTF8 0x40
182 #define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
183 #define FLAG_UNPACK_ONLY_ONE 0x10
184 #define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
185 #define FLAG_SLASH 0x04
186 #define FLAG_COMMA 0x02
187 #define FLAG_PACK 0x01
190 S_mul128(pTHX_ SV *sv, U8 m)
193 char *s = SvPV(sv, len);
196 PERL_ARGS_ASSERT_MUL128;
198 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
199 SV * const tmpNew = newSVpvs("0000000000");
201 sv_catsv(tmpNew, sv);
202 SvREFCNT_dec(sv); /* free old sv */
207 while (!*t) /* trailing '\0'? */
210 const U32 i = ((*t - '0') << 7) + m;
211 *(t--) = '0' + (char)(i % 10);
217 /* Explosives and implosives. */
219 #if 'I' == 73 && 'J' == 74
220 /* On an ASCII/ISO kind of system */
221 #define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
224 Some other sort of character set - use memchr() so we don't match
227 #define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
231 #define TYPE_IS_SHRIEKING 0x100
232 #define TYPE_IS_BIG_ENDIAN 0x200
233 #define TYPE_IS_LITTLE_ENDIAN 0x400
234 #define TYPE_IS_PACK 0x800
235 #define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
236 #define TYPE_MODIFIERS(t) ((t) & ~0xFF)
237 #define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
239 #ifdef PERL_PACK_CAN_SHRIEKSIGN
240 # define SHRIEKING_ALLOWED_TYPES "sSiIlLxXnNvV@."
242 # define SHRIEKING_ALLOWED_TYPES "sSiIlLxX"
245 # define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
246 # define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
248 # define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
250 # define DO_BO_UNPACK(var, type) \
252 switch (TYPE_ENDIANNESS(datumtype)) { \
253 case TYPE_IS_BIG_ENDIAN: var = my_betoh ## type (var); break; \
254 case TYPE_IS_LITTLE_ENDIAN: var = my_letoh ## type (var); break; \
259 # define DO_BO_PACK(var, type) \
261 switch (TYPE_ENDIANNESS(datumtype)) { \
262 case TYPE_IS_BIG_ENDIAN: var = my_htobe ## type (var); break; \
263 case TYPE_IS_LITTLE_ENDIAN: var = my_htole ## type (var); break; \
268 # define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast) \
270 switch (TYPE_ENDIANNESS(datumtype)) { \
271 case TYPE_IS_BIG_ENDIAN: \
272 var = (post_cast*) my_betoh ## type ((pre_cast) var); \
274 case TYPE_IS_LITTLE_ENDIAN: \
275 var = (post_cast *) my_letoh ## type ((pre_cast) var); \
282 # define DO_BO_PACK_PTR(var, type, pre_cast, post_cast) \
284 switch (TYPE_ENDIANNESS(datumtype)) { \
285 case TYPE_IS_BIG_ENDIAN: \
286 var = (post_cast *) my_htobe ## type ((pre_cast) var); \
288 case TYPE_IS_LITTLE_ENDIAN: \
289 var = (post_cast *) my_htole ## type ((pre_cast) var); \
296 # define BO_CANT_DOIT(action, type) \
298 switch (TYPE_ENDIANNESS(datumtype)) { \
299 case TYPE_IS_BIG_ENDIAN: \
300 Perl_croak(aTHX_ "Can't %s big-endian %ss on this " \
301 "platform", #action, #type); \
303 case TYPE_IS_LITTLE_ENDIAN: \
304 Perl_croak(aTHX_ "Can't %s little-endian %ss on this " \
305 "platform", #action, #type); \
312 # if PTRSIZE == INTSIZE
313 # define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, i, int, void)
314 # define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, i, int, void)
315 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, i, int, char)
316 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, i, int, char)
317 # elif PTRSIZE == LONGSIZE
318 # if LONGSIZE < IVSIZE && IVSIZE == 8
319 # define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, 64, IV, void)
320 # define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, 64, IV, void)
321 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, 64, IV, char)
322 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, 64, IV, char)
324 # define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, IV, void)
325 # define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, IV, void)
326 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, IV, char)
327 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, IV, char)
329 # elif PTRSIZE == IVSIZE
330 # define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, IV, void)
331 # define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, IV, void)
332 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, IV, char)
333 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, IV, char)
335 # define DO_BO_UNPACK_P(var) BO_CANT_DOIT(unpack, pointer)
336 # define DO_BO_PACK_P(var) BO_CANT_DOIT(pack, pointer)
337 # define DO_BO_UNPACK_PC(var) BO_CANT_DOIT(unpack, pointer)
338 # define DO_BO_PACK_PC(var) BO_CANT_DOIT(pack, pointer)
341 # if defined(my_htolen) && defined(my_letohn) && \
342 defined(my_htoben) && defined(my_betohn)
343 # define DO_BO_UNPACK_N(var, type) \
345 switch (TYPE_ENDIANNESS(datumtype)) { \
346 case TYPE_IS_BIG_ENDIAN: my_betohn(&var, sizeof(type)); break;\
347 case TYPE_IS_LITTLE_ENDIAN: my_letohn(&var, sizeof(type)); break;\
352 # define DO_BO_PACK_N(var, type) \
354 switch (TYPE_ENDIANNESS(datumtype)) { \
355 case TYPE_IS_BIG_ENDIAN: my_htoben(&var, sizeof(type)); break;\
356 case TYPE_IS_LITTLE_ENDIAN: my_htolen(&var, sizeof(type)); break;\
361 # define DO_BO_UNPACK_N(var, type) BO_CANT_DOIT(unpack, type)
362 # define DO_BO_PACK_N(var, type) BO_CANT_DOIT(pack, type)
365 #define PACK_SIZE_CANNOT_CSUM 0x80
366 #define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
367 #define PACK_SIZE_MASK 0x3F
369 #include "packsizetables.c"
372 uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
375 UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
376 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
377 /* We try to process malformed UTF-8 as much as possible (preferably with
378 warnings), but these two mean we make no progress in the string and
379 might enter an infinite loop */
380 if (retlen == (STRLEN) -1 || retlen == 0)
381 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
382 (int) TYPE_NO_MODIFIERS(datumtype));
384 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
385 "Character in '%c' format wrapped in unpack",
386 (int) TYPE_NO_MODIFIERS(datumtype));
393 #define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
394 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
398 uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
402 const char *from = *s;
404 const U32 flags = ckWARN(WARN_UTF8) ?
405 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
406 for (;buf_len > 0; buf_len--) {
407 if (from >= end) return FALSE;
408 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
409 if (retlen == (STRLEN) -1 || retlen == 0) {
410 from += UTF8SKIP(from);
412 } else from += retlen;
417 *(U8 *)buf++ = (U8)val;
419 /* We have enough characters for the buffer. Did we have problems ? */
422 /* Rewalk the string fragment while warning */
424 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
425 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
426 if (ptr >= end) break;
427 utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
429 if (from > end) from = end;
432 Perl_ck_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
433 WARN_PACK : WARN_UNPACK),
434 "Character(s) in '%c' format wrapped in %s",
435 (int) TYPE_NO_MODIFIERS(datumtype),
436 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
443 next_uni_uu(pTHX_ const char **s, const char *end, I32 *out)
447 const UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
448 if (val >= 0x100 || !ISUUCHAR(val) ||
449 retlen == (STRLEN) -1 || retlen == 0) {
453 *out = PL_uudmap[val] & 077;
459 S_bytes_to_uni(const U8 *start, STRLEN len, char *dest) {
460 const U8 * const end = start + len;
462 PERL_ARGS_ASSERT_BYTES_TO_UNI;
464 while (start < end) {
465 const UV uv = NATIVE_TO_ASCII(*start);
466 if (UNI_IS_INVARIANT(uv))
467 *dest++ = (char)(U8)UTF_TO_NATIVE(uv);
469 *dest++ = (char)(U8)UTF8_EIGHT_BIT_HI(uv);
470 *dest++ = (char)(U8)UTF8_EIGHT_BIT_LO(uv);
477 #define PUSH_BYTES(utf8, cur, buf, len) \
480 (cur) = bytes_to_uni((U8 *) buf, len, (cur)); \
482 Copy(buf, cur, len, char); \
487 #define GROWING(utf8, cat, start, cur, in_len) \
489 STRLEN glen = (in_len); \
490 if (utf8) glen *= UTF8_EXPAND; \
491 if ((cur) + glen >= (start) + SvLEN(cat)) { \
492 (start) = sv_exp_grow(cat, glen); \
493 (cur) = (start) + SvCUR(cat); \
497 #define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
499 const STRLEN glen = (in_len); \
501 if (utf8) gl *= UTF8_EXPAND; \
502 if ((cur) + gl >= (start) + SvLEN(cat)) { \
504 SvCUR_set((cat), (cur) - (start)); \
505 (start) = sv_exp_grow(cat, gl); \
506 (cur) = (start) + SvCUR(cat); \
508 PUSH_BYTES(utf8, cur, buf, glen); \
511 #define PUSH_BYTE(utf8, s, byte) \
514 const U8 au8 = (byte); \
515 (s) = bytes_to_uni(&au8, 1, (s)); \
516 } else *(U8 *)(s)++ = (byte); \
519 /* Only to be used inside a loop (see the break) */
520 #define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
523 if (str >= end) break; \
524 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
525 if (retlen == (STRLEN) -1 || retlen == 0) { \
527 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
532 static const char *_action( const tempsym_t* symptr )
534 return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
537 /* Returns the sizeof() struct described by pat */
539 S_measure_struct(pTHX_ tempsym_t* symptr)
543 PERL_ARGS_ASSERT_MEASURE_STRUCT;
545 while (next_symbol(symptr)) {
549 switch (symptr->howlen) {
551 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
555 /* e_no_len and e_number */
556 len = symptr->length;
560 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
563 /* endianness doesn't influence the size of a type */
564 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
566 Perl_croak(aTHX_ "Invalid type '%c' in %s",
567 (int)TYPE_NO_MODIFIERS(symptr->code),
569 #ifdef PERL_PACK_CAN_SHRIEKSIGN
570 case '.' | TYPE_IS_SHRIEKING:
571 case '@' | TYPE_IS_SHRIEKING:
576 case 'U': /* XXXX Is it correct? */
579 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
580 (int) TYPE_NO_MODIFIERS(symptr->code),
587 tempsym_t savsym = *symptr;
588 symptr->patptr = savsym.grpbeg;
589 symptr->patend = savsym.grpend;
590 /* XXXX Theoretically, we need to measure many times at
591 different positions, since the subexpression may contain
592 alignment commands, but be not of aligned length.
593 Need to detect this and croak(). */
594 size = measure_struct(symptr);
598 case 'X' | TYPE_IS_SHRIEKING:
599 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
601 if (!len) /* Avoid division by 0 */
603 len = total % len; /* Assumed: the start is aligned. */
608 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
610 case 'x' | TYPE_IS_SHRIEKING:
611 if (!len) /* Avoid division by 0 */
613 star = total % len; /* Assumed: the start is aligned. */
614 if (star) /* Other portable ways? */
638 size = sizeof(char*);
648 /* locate matching closing parenthesis or bracket
649 * returns char pointer to char after match, or NULL
652 S_group_end(pTHX_ const char *patptr, const char *patend, char ender)
654 PERL_ARGS_ASSERT_GROUP_END;
656 while (patptr < patend) {
657 const char c = *patptr++;
664 while (patptr < patend && *patptr != '\n')
668 patptr = group_end(patptr, patend, ')') + 1;
670 patptr = group_end(patptr, patend, ']') + 1;
672 Perl_croak(aTHX_ "No group ending character '%c' found in template",
678 /* Convert unsigned decimal number to binary.
679 * Expects a pointer to the first digit and address of length variable
680 * Advances char pointer to 1st non-digit char and returns number
683 S_get_num(pTHX_ const char *patptr, I32 *lenptr )
685 I32 len = *patptr++ - '0';
687 PERL_ARGS_ASSERT_GET_NUM;
689 while (isDIGIT(*patptr)) {
690 if (len >= 0x7FFFFFFF/10)
691 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
692 len = (len * 10) + (*patptr++ - '0');
698 /* The marvellous template parsing routine: Using state stored in *symptr,
699 * locates next template code and count
702 S_next_symbol(pTHX_ tempsym_t* symptr )
704 const char* patptr = symptr->patptr;
705 const char* const patend = symptr->patend;
707 PERL_ARGS_ASSERT_NEXT_SYMBOL;
709 symptr->flags &= ~FLAG_SLASH;
711 while (patptr < patend) {
712 if (isSPACE(*patptr))
714 else if (*patptr == '#') {
716 while (patptr < patend && *patptr != '\n')
721 /* We should have found a template code */
722 I32 code = *patptr++ & 0xFF;
723 U32 inherited_modifiers = 0;
725 if (code == ','){ /* grandfather in commas but with a warning */
726 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
727 symptr->flags |= FLAG_COMMA;
728 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
729 "Invalid type ',' in %s", _action( symptr ) );
734 /* for '(', skip to ')' */
736 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
737 Perl_croak(aTHX_ "()-group starts with a count in %s",
739 symptr->grpbeg = patptr;
740 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
741 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
742 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
746 /* look for group modifiers to inherit */
747 if (TYPE_ENDIANNESS(symptr->flags)) {
748 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
749 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
752 /* look for modifiers */
753 while (patptr < patend) {
758 modifier = TYPE_IS_SHRIEKING;
759 allowed = SHRIEKING_ALLOWED_TYPES;
762 modifier = TYPE_IS_BIG_ENDIAN;
763 allowed = ENDIANNESS_ALLOWED_TYPES;
766 modifier = TYPE_IS_LITTLE_ENDIAN;
767 allowed = ENDIANNESS_ALLOWED_TYPES;
778 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
779 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
780 allowed, _action( symptr ) );
782 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
783 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
784 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
785 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
786 TYPE_ENDIANNESS_MASK)
787 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
788 *patptr, _action( symptr ) );
790 if ((code & modifier)) {
791 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
792 "Duplicate modifier '%c' after '%c' in %s",
793 *patptr, (int) TYPE_NO_MODIFIERS(code),
801 /* inherit modifiers */
802 code |= inherited_modifiers;
804 /* look for count and/or / */
805 if (patptr < patend) {
806 if (isDIGIT(*patptr)) {
807 patptr = get_num( patptr, &symptr->length );
808 symptr->howlen = e_number;
810 } else if (*patptr == '*') {
812 symptr->howlen = e_star;
814 } else if (*patptr == '[') {
815 const char* lenptr = ++patptr;
816 symptr->howlen = e_number;
817 patptr = group_end( patptr, patend, ']' ) + 1;
818 /* what kind of [] is it? */
819 if (isDIGIT(*lenptr)) {
820 lenptr = get_num( lenptr, &symptr->length );
822 Perl_croak(aTHX_ "Malformed integer in [] in %s",
825 tempsym_t savsym = *symptr;
826 symptr->patend = patptr-1;
827 symptr->patptr = lenptr;
828 savsym.length = measure_struct(symptr);
832 symptr->howlen = e_no_len;
837 while (patptr < patend) {
838 if (isSPACE(*patptr))
840 else if (*patptr == '#') {
842 while (patptr < patend && *patptr != '\n')
847 if (*patptr == '/') {
848 symptr->flags |= FLAG_SLASH;
850 if (patptr < patend &&
851 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
852 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
859 /* at end - no count, no / */
860 symptr->howlen = e_no_len;
865 symptr->patptr = patptr;
869 symptr->patptr = patptr;
874 There is no way to cleanly handle the case where we should process the
875 string per byte in its upgraded form while it's really in downgraded form
876 (e.g. estimates like strend-s as an upper bound for the number of
877 characters left wouldn't work). So if we foresee the need of this
878 (pattern starts with U or contains U0), we want to work on the encoded
879 version of the string. Users are advised to upgrade their pack string
880 themselves if they need to do a lot of unpacks like this on it
883 need_utf8(const char *pat, const char *patend)
887 PERL_ARGS_ASSERT_NEED_UTF8;
889 while (pat < patend) {
892 pat = (const char *) memchr(pat, '\n', patend-pat);
893 if (!pat) return FALSE;
894 } else if (pat[0] == 'U') {
895 if (first || pat[1] == '0') return TRUE;
896 } else first = FALSE;
903 first_symbol(const char *pat, const char *patend) {
904 PERL_ARGS_ASSERT_FIRST_SYMBOL;
906 while (pat < patend) {
907 if (pat[0] != '#') return pat[0];
909 pat = (const char *) memchr(pat, '\n', patend-pat);
917 =for apidoc unpackstring
919 The engine implementing the unpack() Perl function.
921 Using the template pat..patend, this function unpacks the string
922 s..strend into a number of mortal SVs, which it pushes onto the perl
923 argument (@_) stack (so you will need to issue a C<PUTBACK> before and
924 C<SPAGAIN> after the call to this function). It returns the number of
927 The strend and patend pointers should point to the byte following the last
928 character of each string.
930 Although this function returns its values on the perl argument stack, it
931 doesn't take any parameters from that stack (and thus in particular
932 there's no need to do a PUSHMARK before calling it, unlike L</call_pv> for
938 Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
942 PERL_ARGS_ASSERT_UNPACKSTRING;
944 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
945 else if (need_utf8(pat, patend)) {
946 /* We probably should try to avoid this in case a scalar context call
947 wouldn't get to the "U0" */
948 STRLEN len = strend - s;
949 s = (char *) bytes_to_utf8((U8 *) s, &len);
952 flags |= FLAG_DO_UTF8;
955 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
956 flags |= FLAG_PARSE_UTF8;
958 TEMPSYM_INIT(&sym, pat, patend, flags);
960 return unpack_rec(&sym, s, s, strend, NULL );
964 S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
968 const I32 start_sp_offset = SP - PL_stack_base;
973 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
975 bool explicit_length;
976 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
977 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
979 PERL_ARGS_ASSERT_UNPACK_REC;
981 symptr->strbeg = s - strbeg;
983 while (next_symbol(symptr)) {
986 I32 datumtype = symptr->code;
987 /* do first one only unless in list context
988 / is implemented by unpacking the count, then popping it from the
989 stack, so must check that we're not in the middle of a / */
991 && (SP - PL_stack_base == start_sp_offset + 1)
992 && (datumtype != '/') ) /* XXX can this be omitted */
995 switch (howlen = symptr->howlen) {
997 len = strend - strbeg; /* long enough */
1000 /* e_no_len and e_number */
1001 len = symptr->length;
1005 explicit_length = TRUE;
1007 beyond = s >= strend;
1009 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
1011 /* props nonzero means we can process this letter. */
1012 const long size = props & PACK_SIZE_MASK;
1013 const long howmany = (strend - s) / size;
1017 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
1018 if (len && unpack_only_one) len = 1;
1024 switch(TYPE_NO_ENDIANNESS(datumtype)) {
1026 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
1029 if (howlen == e_no_len)
1030 len = 16; /* len is not specified */
1038 tempsym_t savsym = *symptr;
1039 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
1040 symptr->flags |= group_modifiers;
1041 symptr->patend = savsym.grpend;
1042 symptr->previous = &savsym;
1045 if (len && unpack_only_one) len = 1;
1047 symptr->patptr = savsym.grpbeg;
1048 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
1049 else symptr->flags &= ~FLAG_PARSE_UTF8;
1050 unpack_rec(symptr, s, strbeg, strend, &s);
1051 if (s == strend && savsym.howlen == e_star)
1052 break; /* No way to continue */
1055 savsym.flags = symptr->flags & ~group_modifiers;
1059 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1060 case '.' | TYPE_IS_SHRIEKING:
1065 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1066 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
1067 #else /* PERL_PACK_CAN_SHRIEKSIGN */
1068 const bool u8 = utf8;
1070 if (howlen == e_star) from = strbeg;
1071 else if (len <= 0) from = s;
1073 tempsym_t *group = symptr;
1075 while (--len && group) group = group->previous;
1076 from = group ? strbeg + group->strbeg : strbeg;
1079 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
1080 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
1084 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1085 case '@' | TYPE_IS_SHRIEKING:
1088 s = strbeg + symptr->strbeg;
1089 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1090 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
1091 #else /* PERL_PACK_CAN_SHRIEKSIGN */
1097 Perl_croak(aTHX_ "'@' outside of string in unpack");
1102 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
1105 Perl_croak(aTHX_ "'@' outside of string in unpack");
1109 case 'X' | TYPE_IS_SHRIEKING:
1110 if (!len) /* Avoid division by 0 */
1113 const char *hop, *last;
1115 hop = last = strbeg;
1117 hop += UTF8SKIP(hop);
1124 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1128 len = (s - strbeg) % len;
1134 Perl_croak(aTHX_ "'X' outside of string in unpack");
1135 while (--s, UTF8_IS_CONTINUATION(*s)) {
1137 Perl_croak(aTHX_ "'X' outside of string in unpack");
1142 if (len > s - strbeg)
1143 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1147 case 'x' | TYPE_IS_SHRIEKING: {
1149 if (!len) /* Avoid division by 0 */
1151 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1152 else ai32 = (s - strbeg) % len;
1153 if (ai32 == 0) break;
1161 Perl_croak(aTHX_ "'x' outside of string in unpack");
1166 if (len > strend - s)
1167 Perl_croak(aTHX_ "'x' outside of string in unpack");
1172 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1178 /* Preliminary length estimate is assumed done in 'W' */
1179 if (len > strend - s) len = strend - s;
1185 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1186 if (hop >= strend) {
1188 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1193 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1195 } else if (len > strend - s)
1198 if (datumtype == 'Z') {
1199 /* 'Z' strips stuff after first null */
1200 const char *ptr, *end;
1202 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
1203 sv = newSVpvn(s, ptr-s);
1204 if (howlen == e_star) /* exact for 'Z*' */
1205 len = ptr-s + (ptr != strend ? 1 : 0);
1206 } else if (datumtype == 'A') {
1207 /* 'A' strips both nulls and spaces */
1209 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1210 for (ptr = s+len-1; ptr >= s; ptr--)
1211 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
1212 !isSPACE_utf8(ptr)) break;
1213 if (ptr >= s) ptr += UTF8SKIP(ptr);
1216 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1218 for (ptr = s+len-1; ptr >= s; ptr--)
1219 if (*ptr != 0 && !isSPACE(*ptr)) break;
1222 sv = newSVpvn(s, ptr-s);
1223 } else sv = newSVpvn(s, len);
1227 /* Undo any upgrade done due to need_utf8() */
1228 if (!(symptr->flags & FLAG_WAS_UTF8))
1229 sv_utf8_downgrade(sv, 0);
1237 if (howlen == e_star || len > (strend - s) * 8)
1238 len = (strend - s) * 8;
1241 while (len >= 8 && s < strend) {
1242 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
1247 cuv += PL_bitcount[*(U8 *)s++];
1250 if (len && s < strend) {
1252 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1253 if (datumtype == 'b')
1255 if (bits & 1) cuv++;
1260 if (bits & 0x80) cuv++;
1267 sv = sv_2mortal(newSV(len ? len : 1));
1270 if (datumtype == 'b') {
1272 const I32 ai32 = len;
1273 for (len = 0; len < ai32; len++) {
1274 if (len & 7) bits >>= 1;
1276 if (s >= strend) break;
1277 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1278 } else bits = *(U8 *) s++;
1279 *str++ = bits & 1 ? '1' : '0';
1283 const I32 ai32 = len;
1284 for (len = 0; len < ai32; len++) {
1285 if (len & 7) bits <<= 1;
1287 if (s >= strend) break;
1288 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1289 } else bits = *(U8 *) s++;
1290 *str++ = bits & 0x80 ? '1' : '0';
1294 SvCUR_set(sv, str - SvPVX_const(sv));
1301 /* Preliminary length estimate, acceptable for utf8 too */
1302 if (howlen == e_star || len > (strend - s) * 2)
1303 len = (strend - s) * 2;
1305 sv = sv_2mortal(newSV(len ? len : 1));
1309 if (datumtype == 'h') {
1312 for (len = 0; len < ai32; len++) {
1313 if (len & 1) bits >>= 4;
1315 if (s >= strend) break;
1316 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1317 } else bits = * (U8 *) s++;
1319 *str++ = PL_hexdigit[bits & 15];
1323 const I32 ai32 = len;
1324 for (len = 0; len < ai32; len++) {
1325 if (len & 1) bits <<= 4;
1327 if (s >= strend) break;
1328 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1329 } else bits = *(U8 *) s++;
1331 *str++ = PL_hexdigit[(bits >> 4) & 15];
1336 SvCUR_set(sv, str - SvPVX_const(sv));
1343 if (explicit_length)
1344 /* Switch to "character" mode */
1345 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1350 while (len-- > 0 && s < strend) {
1355 aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1356 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1357 if (retlen == (STRLEN) -1 || retlen == 0)
1358 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1362 aint = *(U8 *)(s)++;
1363 if (aint >= 128 && datumtype != 'C') /* fake up signed chars */
1367 else if (checksum > bits_in_uv)
1368 cdouble += (NV)aint;
1376 while (len-- > 0 && s < strend) {
1378 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1379 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1380 if (retlen == (STRLEN) -1 || retlen == 0)
1381 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1385 else if (checksum > bits_in_uv)
1386 cdouble += (NV) val;
1390 } else if (!checksum)
1392 const U8 ch = *(U8 *) s++;
1395 else if (checksum > bits_in_uv)
1396 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1398 while (len-- > 0) cuv += *(U8 *) s++;
1402 if (explicit_length && howlen != e_star) {
1403 /* Switch to "bytes in UTF-8" mode */
1404 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
1406 /* Should be impossible due to the need_utf8() test */
1407 Perl_croak(aTHX_ "U0 mode on a byte string");
1411 if (len > strend - s) len = strend - s;
1413 if (len && unpack_only_one) len = 1;
1417 while (len-- > 0 && s < strend) {
1421 U8 result[UTF8_MAXLEN];
1422 const char *ptr = s;
1424 /* Bug: warns about bad utf8 even if we are short on bytes
1425 and will break out of the loop */
1426 if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1429 len = UTF8SKIP(result);
1430 if (!uni_to_bytes(aTHX_ &ptr, strend,
1431 (char *) &result[1], len-1, 'U')) break;
1432 auv = utf8n_to_uvuni(result, len, &retlen, UTF8_ALLOW_DEFAULT);
1435 auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, UTF8_ALLOW_DEFAULT);
1436 if (retlen == (STRLEN) -1 || retlen == 0)
1437 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1442 else if (checksum > bits_in_uv)
1443 cdouble += (NV) auv;
1448 case 's' | TYPE_IS_SHRIEKING:
1449 #if SHORTSIZE != SIZE16
1452 SHIFT_VAR(utf8, s, strend, ashort, datumtype);
1453 DO_BO_UNPACK(ashort, s);
1456 else if (checksum > bits_in_uv)
1457 cdouble += (NV)ashort;
1469 #if U16SIZE > SIZE16
1472 SHIFT16(utf8, s, strend, &ai16, datumtype);
1473 DO_BO_UNPACK(ai16, 16);
1474 #if U16SIZE > SIZE16
1480 else if (checksum > bits_in_uv)
1481 cdouble += (NV)ai16;
1486 case 'S' | TYPE_IS_SHRIEKING:
1487 #if SHORTSIZE != SIZE16
1489 unsigned short aushort;
1490 SHIFT_VAR(utf8, s, strend, aushort, datumtype);
1491 DO_BO_UNPACK(aushort, s);
1494 else if (checksum > bits_in_uv)
1495 cdouble += (NV)aushort;
1508 #if U16SIZE > SIZE16
1511 SHIFT16(utf8, s, strend, &au16, datumtype);
1512 DO_BO_UNPACK(au16, 16);
1514 if (datumtype == 'n')
1515 au16 = PerlSock_ntohs(au16);
1518 if (datumtype == 'v')
1523 else if (checksum > bits_in_uv)
1524 cdouble += (NV) au16;
1529 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1530 case 'v' | TYPE_IS_SHRIEKING:
1531 case 'n' | TYPE_IS_SHRIEKING:
1534 # if U16SIZE > SIZE16
1537 SHIFT16(utf8, s, strend, &ai16, datumtype);
1539 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
1540 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1541 # endif /* HAS_NTOHS */
1543 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
1544 ai16 = (I16) vtohs((U16) ai16);
1545 # endif /* HAS_VTOHS */
1548 else if (checksum > bits_in_uv)
1549 cdouble += (NV) ai16;
1554 #endif /* PERL_PACK_CAN_SHRIEKSIGN */
1556 case 'i' | TYPE_IS_SHRIEKING:
1559 SHIFT_VAR(utf8, s, strend, aint, datumtype);
1560 DO_BO_UNPACK(aint, i);
1563 else if (checksum > bits_in_uv)
1564 cdouble += (NV)aint;
1570 case 'I' | TYPE_IS_SHRIEKING:
1573 SHIFT_VAR(utf8, s, strend, auint, datumtype);
1574 DO_BO_UNPACK(auint, i);
1577 else if (checksum > bits_in_uv)
1578 cdouble += (NV)auint;
1586 SHIFT_VAR(utf8, s, strend, aiv, datumtype);
1587 #if IVSIZE == INTSIZE
1588 DO_BO_UNPACK(aiv, i);
1589 #elif IVSIZE == LONGSIZE
1590 DO_BO_UNPACK(aiv, l);
1591 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
1592 DO_BO_UNPACK(aiv, 64);
1594 Perl_croak(aTHX_ "'j' not supported on this platform");
1598 else if (checksum > bits_in_uv)
1607 SHIFT_VAR(utf8, s, strend, auv, datumtype);
1608 #if IVSIZE == INTSIZE
1609 DO_BO_UNPACK(auv, i);
1610 #elif IVSIZE == LONGSIZE
1611 DO_BO_UNPACK(auv, l);
1612 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
1613 DO_BO_UNPACK(auv, 64);
1615 Perl_croak(aTHX_ "'J' not supported on this platform");
1619 else if (checksum > bits_in_uv)
1625 case 'l' | TYPE_IS_SHRIEKING:
1626 #if LONGSIZE != SIZE32
1629 SHIFT_VAR(utf8, s, strend, along, datumtype);
1630 DO_BO_UNPACK(along, l);
1633 else if (checksum > bits_in_uv)
1634 cdouble += (NV)along;
1645 #if U32SIZE > SIZE32
1648 SHIFT32(utf8, s, strend, &ai32, datumtype);
1649 DO_BO_UNPACK(ai32, 32);
1650 #if U32SIZE > SIZE32
1651 if (ai32 > 2147483647) ai32 -= 4294967296;
1655 else if (checksum > bits_in_uv)
1656 cdouble += (NV)ai32;
1661 case 'L' | TYPE_IS_SHRIEKING:
1662 #if LONGSIZE != SIZE32
1664 unsigned long aulong;
1665 SHIFT_VAR(utf8, s, strend, aulong, datumtype);
1666 DO_BO_UNPACK(aulong, l);
1669 else if (checksum > bits_in_uv)
1670 cdouble += (NV)aulong;
1683 #if U32SIZE > SIZE32
1686 SHIFT32(utf8, s, strend, &au32, datumtype);
1687 DO_BO_UNPACK(au32, 32);
1689 if (datumtype == 'N')
1690 au32 = PerlSock_ntohl(au32);
1693 if (datumtype == 'V')
1698 else if (checksum > bits_in_uv)
1699 cdouble += (NV)au32;
1704 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1705 case 'V' | TYPE_IS_SHRIEKING:
1706 case 'N' | TYPE_IS_SHRIEKING:
1709 # if U32SIZE > SIZE32
1712 SHIFT32(utf8, s, strend, &ai32, datumtype);
1714 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1715 ai32 = (I32)PerlSock_ntohl((U32)ai32);
1718 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1719 ai32 = (I32)vtohl((U32)ai32);
1723 else if (checksum > bits_in_uv)
1724 cdouble += (NV)ai32;
1729 #endif /* PERL_PACK_CAN_SHRIEKSIGN */
1733 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
1734 DO_BO_UNPACK_PC(aptr);
1735 /* newSVpv generates undef if aptr is NULL */
1736 mPUSHs(newSVpv(aptr, 0));
1744 while (len > 0 && s < strend) {
1746 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1747 auv = (auv << 7) | (ch & 0x7f);
1748 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
1756 if (++bytes >= sizeof(UV)) { /* promote to string */
1759 sv = Perl_newSVpvf(aTHX_ "%.*"UVuf, (int)TYPE_DIGITS(UV), auv);
1760 while (s < strend) {
1761 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1762 sv = mul128(sv, (U8)(ch & 0x7f));
1768 t = SvPV_nolen_const(sv);
1777 if ((s >= strend) && bytes)
1778 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
1782 if (symptr->howlen == e_star)
1783 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
1785 if (s + sizeof(char*) <= strend) {
1787 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
1788 DO_BO_UNPACK_PC(aptr);
1789 /* newSVpvn generates undef if aptr is NULL */
1790 PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP));
1797 SHIFT_VAR(utf8, s, strend, aquad, datumtype);
1798 DO_BO_UNPACK(aquad, 64);
1800 mPUSHs(aquad >= IV_MIN && aquad <= IV_MAX ?
1801 newSViv((IV)aquad) : newSVnv((NV)aquad));
1802 else if (checksum > bits_in_uv)
1803 cdouble += (NV)aquad;
1811 SHIFT_VAR(utf8, s, strend, auquad, datumtype);
1812 DO_BO_UNPACK(auquad, 64);
1814 mPUSHs(auquad <= UV_MAX ?
1815 newSVuv((UV)auquad) : newSVnv((NV)auquad));
1816 else if (checksum > bits_in_uv)
1817 cdouble += (NV)auquad;
1822 #endif /* HAS_QUAD */
1823 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
1827 SHIFT_VAR(utf8, s, strend, afloat, datumtype);
1828 DO_BO_UNPACK_N(afloat, float);
1838 SHIFT_VAR(utf8, s, strend, adouble, datumtype);
1839 DO_BO_UNPACK_N(adouble, double);
1849 SHIFT_BYTES(utf8, s, strend, anv.bytes, sizeof(anv.bytes), datumtype);
1850 DO_BO_UNPACK_N(anv.nv, NV);
1857 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
1861 SHIFT_BYTES(utf8, s, strend, aldouble.bytes, sizeof(aldouble.bytes), datumtype);
1862 DO_BO_UNPACK_N(aldouble.ld, long double);
1864 mPUSHn(aldouble.ld);
1866 cdouble += aldouble.ld;
1872 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
1873 sv = sv_2mortal(newSV(l));
1874 if (l) SvPOK_on(sv);
1877 while (next_uni_uu(aTHX_ &s, strend, &len)) {
1882 next_uni_uu(aTHX_ &s, strend, &a);
1883 next_uni_uu(aTHX_ &s, strend, &b);
1884 next_uni_uu(aTHX_ &s, strend, &c);
1885 next_uni_uu(aTHX_ &s, strend, &d);
1886 hunk[0] = (char)((a << 2) | (b >> 4));
1887 hunk[1] = (char)((b << 4) | (c >> 2));
1888 hunk[2] = (char)((c << 6) | d);
1890 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
1898 /* possible checksum byte */
1899 const char *skip = s+UTF8SKIP(s);
1900 if (skip < strend && *skip == '\n')
1906 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
1910 len = PL_uudmap[*(U8*)s++] & 077;
1912 if (s < strend && ISUUCHAR(*s))
1913 a = PL_uudmap[*(U8*)s++] & 077;
1916 if (s < strend && ISUUCHAR(*s))
1917 b = PL_uudmap[*(U8*)s++] & 077;
1920 if (s < strend && ISUUCHAR(*s))
1921 c = PL_uudmap[*(U8*)s++] & 077;
1924 if (s < strend && ISUUCHAR(*s))
1925 d = PL_uudmap[*(U8*)s++] & 077;
1928 hunk[0] = (char)((a << 2) | (b >> 4));
1929 hunk[1] = (char)((b << 4) | (c >> 2));
1930 hunk[2] = (char)((c << 6) | d);
1932 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
1937 else /* possible checksum byte */
1938 if (s + 1 < strend && s[1] == '\n')
1948 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
1949 (checksum > bits_in_uv &&
1950 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
1953 anv = (NV) (1 << (checksum & 15));
1954 while (checksum >= 16) {
1958 while (cdouble < 0.0)
1960 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
1961 sv = newSVnv(cdouble);
1964 if (checksum < bits_in_uv) {
1965 UV mask = ((UV)1 << checksum) - 1;
1974 if (symptr->flags & FLAG_SLASH){
1975 if (SP - PL_stack_base - start_sp_offset <= 0)
1977 if( next_symbol(symptr) ){
1978 if( symptr->howlen == e_number )
1979 Perl_croak(aTHX_ "Count after length/code in unpack" );
1981 /* ...end of char buffer then no decent length available */
1982 Perl_croak(aTHX_ "length/code after end of string in unpack" );
1984 /* take top of stack (hope it's numeric) */
1987 Perl_croak(aTHX_ "Negative '/' count in unpack" );
1990 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
1992 datumtype = symptr->code;
1993 explicit_length = FALSE;
2001 return SP - PL_stack_base - start_sp_offset;
2009 I32 gimme = GIMME_V;
2012 const char *pat = SvPV_const(left, llen);
2013 const char *s = SvPV_const(right, rlen);
2014 const char *strend = s + rlen;
2015 const char *patend = pat + llen;
2019 cnt = unpackstring(pat, patend, s, strend,
2020 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
2021 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
2024 if ( !cnt && gimme == G_SCALAR )
2025 PUSHs(&PL_sv_undef);
2030 doencodes(U8 *h, const char *s, I32 len)
2032 *h++ = PL_uuemap[len];
2034 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2035 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
2036 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
2037 *h++ = PL_uuemap[(077 & (s[2] & 077))];
2042 const char r = (len > 1 ? s[1] : '\0');
2043 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2044 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
2045 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
2046 *h++ = PL_uuemap[0];
2053 S_is_an_int(pTHX_ const char *s, STRLEN l)
2055 SV *result = newSVpvn(s, l);
2056 char *const result_c = SvPV_nolen(result); /* convenience */
2057 char *out = result_c;
2061 PERL_ARGS_ASSERT_IS_AN_INT;
2069 SvREFCNT_dec(result);
2092 SvREFCNT_dec(result);
2098 SvCUR_set(result, out - result_c);
2102 /* pnum must be '\0' terminated */
2104 S_div128(pTHX_ SV *pnum, bool *done)
2107 char * const s = SvPV(pnum, len);
2111 PERL_ARGS_ASSERT_DIV128;
2115 const int i = m * 10 + (*t - '0');
2116 const int r = (i >> 7); /* r < 10 */
2124 SvCUR_set(pnum, (STRLEN) (t - s));
2129 =for apidoc packlist
2131 The engine implementing pack() Perl function.
2137 Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, SV **beglist, SV **endlist )
2142 PERL_ARGS_ASSERT_PACKLIST;
2144 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
2146 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
2147 Also make sure any UTF8 flag is loaded */
2148 SvPV_force_nolen(cat);
2150 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
2152 (void)pack_rec( cat, &sym, beglist, endlist );
2155 /* like sv_utf8_upgrade, but also repoint the group start markers */
2157 marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2160 const char *from_ptr, *from_start, *from_end, **marks, **m;
2161 char *to_start, *to_ptr;
2163 if (SvUTF8(sv)) return;
2165 from_start = SvPVX_const(sv);
2166 from_end = from_start + SvCUR(sv);
2167 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2168 if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
2169 if (from_ptr == from_end) {
2170 /* Simple case: no character needs to be changed */
2175 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
2176 Newx(to_start, len, char);
2177 Copy(from_start, to_start, from_ptr-from_start, char);
2178 to_ptr = to_start + (from_ptr-from_start);
2180 Newx(marks, sym_ptr->level+2, const char *);
2181 for (group=sym_ptr; group; group = group->previous)
2182 marks[group->level] = from_start + group->strbeg;
2183 marks[sym_ptr->level+1] = from_end+1;
2184 for (m = marks; *m < from_ptr; m++)
2185 *m = to_start + (*m-from_start);
2187 for (;from_ptr < from_end; from_ptr++) {
2188 while (*m == from_ptr) *m++ = to_ptr;
2189 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
2193 while (*m == from_ptr) *m++ = to_ptr;
2194 if (m != marks + sym_ptr->level+1) {
2197 Perl_croak(aTHX_ "panic: marks beyond string end, m=%p, marks=%p, "
2198 "level=%d", m, marks, sym_ptr->level);
2200 for (group=sym_ptr; group; group = group->previous)
2201 group->strbeg = marks[group->level] - to_start;
2206 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
2207 from_start -= SvIVX(sv);
2210 SvFLAGS(sv) &= ~SVf_OOK;
2213 Safefree(from_start);
2214 SvPV_set(sv, to_start);
2215 SvCUR_set(sv, to_ptr - to_start);
2220 /* Exponential string grower. Makes string extension effectively O(n)
2221 needed says how many extra bytes we need (not counting the final '\0')
2222 Only grows the string if there is an actual lack of space
2225 S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
2226 const STRLEN cur = SvCUR(sv);
2227 const STRLEN len = SvLEN(sv);
2230 PERL_ARGS_ASSERT_SV_EXP_GROW;
2232 if (len - cur > needed) return SvPVX(sv);
2233 extend = needed > len ? needed : len;
2234 return SvGROW(sv, len+extend+1);
2239 S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
2242 tempsym_t lookahead;
2243 I32 items = endlist - beglist;
2244 bool found = next_symbol(symptr);
2245 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
2246 bool warn_utf8 = ckWARN(WARN_UTF8);
2248 PERL_ARGS_ASSERT_PACK_REC;
2250 if (symptr->level == 0 && found && symptr->code == 'U') {
2251 marked_upgrade(aTHX_ cat, symptr);
2252 symptr->flags |= FLAG_DO_UTF8;
2255 symptr->strbeg = SvCUR(cat);
2261 SV *lengthcode = NULL;
2262 I32 datumtype = symptr->code;
2263 howlen_t howlen = symptr->howlen;
2264 char *start = SvPVX(cat);
2265 char *cur = start + SvCUR(cat);
2267 #define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2271 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2275 /* e_no_len and e_number */
2276 len = symptr->length;
2281 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
2283 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2284 /* We can process this letter. */
2285 STRLEN size = props & PACK_SIZE_MASK;
2286 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2290 /* Look ahead for next symbol. Do we have code/code? */
2291 lookahead = *symptr;
2292 found = next_symbol(&lookahead);
2293 if (symptr->flags & FLAG_SLASH) {
2295 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
2296 if (strchr("aAZ", lookahead.code)) {
2297 if (lookahead.howlen == e_number) count = lookahead.length;
2300 count = sv_len_utf8(*beglist);
2303 if (lookahead.code == 'Z') count++;
2306 if (lookahead.howlen == e_number && lookahead.length < items)
2307 count = lookahead.length;
2310 lookahead.howlen = e_number;
2311 lookahead.length = count;
2312 lengthcode = sv_2mortal(newSViv(count));
2315 /* Code inside the switch must take care to properly update
2316 cat (CUR length and '\0' termination) if it updated *cur and
2317 doesn't simply leave using break */
2318 switch(TYPE_NO_ENDIANNESS(datumtype)) {
2320 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2321 (int) TYPE_NO_MODIFIERS(datumtype));
2323 Perl_croak(aTHX_ "'%%' may not be used in pack");
2326 #ifdef PERL_PACK_CAN_SHRIEKSIGN
2327 case '.' | TYPE_IS_SHRIEKING:
2330 if (howlen == e_star) from = start;
2331 else if (len == 0) from = cur;
2333 tempsym_t *group = symptr;
2335 while (--len && group) group = group->previous;
2336 from = group ? start + group->strbeg : start;
2339 len = SvIV(fromstr);
2341 #ifdef PERL_PACK_CAN_SHRIEKSIGN
2342 case '@' | TYPE_IS_SHRIEKING:
2345 from = start + symptr->strbeg;
2347 #ifdef PERL_PACK_CAN_SHRIEKSIGN
2348 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
2349 #else /* PERL_PACK_CAN_SHRIEKSIGN */
2353 while (len && from < cur) {
2354 from += UTF8SKIP(from);
2358 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2360 /* Here we know from == cur */
2362 GROWING(0, cat, start, cur, len);
2363 Zero(cur, len, char);
2365 } else if (from < cur) {
2368 } else goto no_change;
2376 if (len > 0) goto grow;
2377 if (len == 0) goto no_change;
2384 tempsym_t savsym = *symptr;
2385 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2386 symptr->flags |= group_modifiers;
2387 symptr->patend = savsym.grpend;
2389 symptr->previous = &lookahead;
2392 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2393 else symptr->flags &= ~FLAG_PARSE_UTF8;
2394 was_utf8 = SvUTF8(cat);
2395 symptr->patptr = savsym.grpbeg;
2396 beglist = pack_rec(cat, symptr, beglist, endlist);
2397 if (SvUTF8(cat) != was_utf8)
2398 /* This had better be an upgrade while in utf8==0 mode */
2401 if (savsym.howlen == e_star && beglist == endlist)
2402 break; /* No way to continue */
2404 items = endlist - beglist;
2405 lookahead.flags = symptr->flags & ~group_modifiers;
2408 case 'X' | TYPE_IS_SHRIEKING:
2409 if (!len) /* Avoid division by 0 */
2416 hop += UTF8SKIP(hop);
2423 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2427 len = (cur-start) % len;
2431 if (len < 1) goto no_change;
2435 Perl_croak(aTHX_ "'%c' outside of string in pack",
2436 (int) TYPE_NO_MODIFIERS(datumtype));
2437 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2439 Perl_croak(aTHX_ "'%c' outside of string in pack",
2440 (int) TYPE_NO_MODIFIERS(datumtype));
2446 if (cur - start < len)
2447 Perl_croak(aTHX_ "'%c' outside of string in pack",
2448 (int) TYPE_NO_MODIFIERS(datumtype));
2451 if (cur < start+symptr->strbeg) {
2452 /* Make sure group starts don't point into the void */
2454 const STRLEN length = cur-start;
2455 for (group = symptr;
2456 group && length < group->strbeg;
2457 group = group->previous) group->strbeg = length;
2458 lookahead.strbeg = length;
2461 case 'x' | TYPE_IS_SHRIEKING: {
2463 if (!len) /* Avoid division by 0 */
2465 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
2466 else ai32 = (cur - start) % len;
2467 if (ai32 == 0) goto no_change;
2479 aptr = SvPV_const(fromstr, fromlen);
2480 if (DO_UTF8(fromstr)) {
2481 const char *end, *s;
2483 if (!utf8 && !SvUTF8(cat)) {
2484 marked_upgrade(aTHX_ cat, symptr);
2485 lookahead.flags |= FLAG_DO_UTF8;
2486 lookahead.strbeg = symptr->strbeg;
2489 cur = start + SvCUR(cat);
2491 if (howlen == e_star) {
2492 if (utf8) goto string_copy;
2496 end = aptr + fromlen;
2497 fromlen = datumtype == 'Z' ? len-1 : len;
2498 while ((I32) fromlen > 0 && s < end) {
2503 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2506 if (datumtype == 'Z') len++;
2512 fromlen = len - fromlen;
2513 if (datumtype == 'Z') fromlen--;
2514 if (howlen == e_star) {
2516 if (datumtype == 'Z') len++;
2518 GROWING(0, cat, start, cur, len);
2519 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
2520 datumtype | TYPE_IS_PACK))
2521 Perl_croak(aTHX_ "panic: predicted utf8 length not available, "
2522 "for '%c', aptr=%p end=%p cur=%p, fromlen=%"UVuf,
2523 (int)datumtype, aptr, end, cur, (UV)fromlen);
2527 if (howlen == e_star) {
2529 if (datumtype == 'Z') len++;
2531 if (len <= (I32) fromlen) {
2533 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2535 /* assumes a byte expands to at most UTF8_EXPAND bytes on
2537 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2538 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
2540 while (fromlen > 0) {
2541 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
2547 if (howlen == e_star) {
2549 if (datumtype == 'Z') len++;
2551 if (len <= (I32) fromlen) {
2553 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2555 GROWING(0, cat, start, cur, len);
2556 Copy(aptr, cur, fromlen, char);
2560 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2567 const char *str, *end;
2574 str = SvPV_const(fromstr, fromlen);
2575 end = str + fromlen;
2576 if (DO_UTF8(fromstr)) {
2578 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2580 utf8_source = FALSE;
2581 utf8_flags = 0; /* Unused, but keep compilers happy */
2583 if (howlen == e_star) len = fromlen;
2584 field_len = (len+7)/8;
2585 GROWING(utf8, cat, start, cur, field_len);
2586 if (len > (I32)fromlen) len = fromlen;
2589 if (datumtype == 'B')
2593 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2595 } else bits |= *str++ & 1;
2596 if (l & 7) bits <<= 1;
2598 PUSH_BYTE(utf8, cur, bits);
2603 /* datumtype == 'b' */
2607 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2608 if (val & 1) bits |= 0x80;
2609 } else if (*str++ & 1)
2611 if (l & 7) bits >>= 1;
2613 PUSH_BYTE(utf8, cur, bits);
2619 if (datumtype == 'B')
2620 bits <<= 7 - (l & 7);
2622 bits >>= 7 - (l & 7);
2623 PUSH_BYTE(utf8, cur, bits);
2626 /* Determine how many chars are left in the requested field */
2628 if (howlen == e_star) field_len = 0;
2629 else field_len -= l;
2630 Zero(cur, field_len, char);
2636 const char *str, *end;
2643 str = SvPV_const(fromstr, fromlen);
2644 end = str + fromlen;
2645 if (DO_UTF8(fromstr)) {
2647 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2649 utf8_source = FALSE;
2650 utf8_flags = 0; /* Unused, but keep compilers happy */
2652 if (howlen == e_star) len = fromlen;
2653 field_len = (len+1)/2;
2654 GROWING(utf8, cat, start, cur, field_len);
2655 if (!utf8 && len > (I32)fromlen) len = fromlen;
2658 if (datumtype == 'H')
2662 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2663 if (val < 256 && isALPHA(val))
2664 bits |= (val + 9) & 0xf;
2667 } else if (isALPHA(*str))
2668 bits |= (*str++ + 9) & 0xf;
2670 bits |= *str++ & 0xf;
2671 if (l & 1) bits <<= 4;
2673 PUSH_BYTE(utf8, cur, bits);
2681 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2682 if (val < 256 && isALPHA(val))
2683 bits |= ((val + 9) & 0xf) << 4;
2685 bits |= (val & 0xf) << 4;
2686 } else if (isALPHA(*str))
2687 bits |= ((*str++ + 9) & 0xf) << 4;
2689 bits |= (*str++ & 0xf) << 4;
2690 if (l & 1) bits >>= 4;
2692 PUSH_BYTE(utf8, cur, bits);
2698 PUSH_BYTE(utf8, cur, bits);
2701 /* Determine how many chars are left in the requested field */
2703 if (howlen == e_star) field_len = 0;
2704 else field_len -= l;
2705 Zero(cur, field_len, char);
2713 aiv = SvIV(fromstr);
2714 if ((-128 > aiv || aiv > 127))
2715 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2716 "Character in 'c' format wrapped in pack");
2717 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
2722 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2728 aiv = SvIV(fromstr);
2729 if ((0 > aiv || aiv > 0xff))
2730 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2731 "Character in 'C' format wrapped in pack");
2732 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
2737 U8 in_bytes = (U8)IN_BYTES;
2739 end = start+SvLEN(cat)-1;
2740 if (utf8) end -= UTF8_MAXLEN-1;
2744 auv = SvUV(fromstr);
2745 if (in_bytes) auv = auv % 0x100;
2750 SvCUR_set(cat, cur - start);
2752 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2753 end = start+SvLEN(cat)-UTF8_MAXLEN;
2755 cur = (char *) uvuni_to_utf8_flags((U8 *) cur,
2758 0 : UNICODE_ALLOW_ANY);
2763 SvCUR_set(cat, cur - start);
2764 marked_upgrade(aTHX_ cat, symptr);
2765 lookahead.flags |= FLAG_DO_UTF8;
2766 lookahead.strbeg = symptr->strbeg;
2769 cur = start + SvCUR(cat);
2770 end = start+SvLEN(cat)-UTF8_MAXLEN;
2773 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2774 "Character in 'W' format wrapped in pack");
2779 SvCUR_set(cat, cur - start);
2780 GROWING(0, cat, start, cur, len+1);
2781 end = start+SvLEN(cat)-1;
2783 *(U8 *) cur++ = (U8)auv;
2792 if (!(symptr->flags & FLAG_DO_UTF8)) {
2793 marked_upgrade(aTHX_ cat, symptr);
2794 lookahead.flags |= FLAG_DO_UTF8;
2795 lookahead.strbeg = symptr->strbeg;
2801 end = start+SvLEN(cat);
2802 if (!utf8) end -= UTF8_MAXLEN;
2806 auv = SvUV(fromstr);
2808 U8 buffer[UTF8_MAXLEN], *endb;
2809 endb = uvuni_to_utf8_flags(buffer, auv,
2811 0 : UNICODE_ALLOW_ANY);
2812 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
2814 SvCUR_set(cat, cur - start);
2815 GROWING(0, cat, start, cur,
2816 len+(endb-buffer)*UTF8_EXPAND);
2817 end = start+SvLEN(cat);
2819 cur = bytes_to_uni(buffer, endb-buffer, cur);
2823 SvCUR_set(cat, cur - start);
2824 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2825 end = start+SvLEN(cat)-UTF8_MAXLEN;
2827 cur = (char *) uvuni_to_utf8_flags((U8 *) cur, auv,
2829 0 : UNICODE_ALLOW_ANY);
2834 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
2840 anv = SvNV(fromstr);
2841 # if defined(VMS) && !defined(_IEEE_FP)
2842 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
2843 * on Alpha; fake it if we don't have them.
2847 else if (anv < -FLT_MAX)
2849 else afloat = (float)anv;
2851 afloat = (float)anv;
2853 DO_BO_PACK_N(afloat, float);
2854 PUSH_VAR(utf8, cur, afloat);
2862 anv = SvNV(fromstr);
2863 # if defined(VMS) && !defined(_IEEE_FP)
2864 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
2865 * on Alpha; fake it if we don't have them.
2869 else if (anv < -DBL_MAX)
2871 else adouble = (double)anv;
2873 adouble = (double)anv;
2875 DO_BO_PACK_N(adouble, double);
2876 PUSH_VAR(utf8, cur, adouble);
2881 Zero(&anv, 1, NV); /* can be long double with unused bits */
2885 /* to work round a gcc/x86 bug; don't use SvNV */
2886 anv.nv = sv_2nv(fromstr);
2888 anv.nv = SvNV(fromstr);
2890 DO_BO_PACK_N(anv, NV);
2891 PUSH_BYTES(utf8, cur, anv.bytes, sizeof(anv.bytes));
2895 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2898 /* long doubles can have unused bits, which may be nonzero */
2899 Zero(&aldouble, 1, long double);
2903 /* to work round a gcc/x86 bug; don't use SvNV */
2904 aldouble.ld = (long double)sv_2nv(fromstr);
2906 aldouble.ld = (long double)SvNV(fromstr);
2908 DO_BO_PACK_N(aldouble, long double);
2909 PUSH_BYTES(utf8, cur, aldouble.bytes, sizeof(aldouble.bytes));
2914 #ifdef PERL_PACK_CAN_SHRIEKSIGN
2915 case 'n' | TYPE_IS_SHRIEKING:
2921 ai16 = (I16)SvIV(fromstr);
2923 ai16 = PerlSock_htons(ai16);
2925 PUSH16(utf8, cur, &ai16);
2928 #ifdef PERL_PACK_CAN_SHRIEKSIGN
2929 case 'v' | TYPE_IS_SHRIEKING:
2935 ai16 = (I16)SvIV(fromstr);
2939 PUSH16(utf8, cur, &ai16);
2942 case 'S' | TYPE_IS_SHRIEKING:
2943 #if SHORTSIZE != SIZE16
2945 unsigned short aushort;
2947 aushort = SvUV(fromstr);
2948 DO_BO_PACK(aushort, s);
2949 PUSH_VAR(utf8, cur, aushort);
2959 au16 = (U16)SvUV(fromstr);
2960 DO_BO_PACK(au16, 16);
2961 PUSH16(utf8, cur, &au16);
2964 case 's' | TYPE_IS_SHRIEKING:
2965 #if SHORTSIZE != SIZE16
2969 ashort = SvIV(fromstr);
2970 DO_BO_PACK(ashort, s);
2971 PUSH_VAR(utf8, cur, ashort);
2981 ai16 = (I16)SvIV(fromstr);
2982 DO_BO_PACK(ai16, 16);
2983 PUSH16(utf8, cur, &ai16);
2987 case 'I' | TYPE_IS_SHRIEKING:
2991 auint = SvUV(fromstr);
2992 DO_BO_PACK(auint, i);
2993 PUSH_VAR(utf8, cur, auint);
3000 aiv = SvIV(fromstr);
3001 #if IVSIZE == INTSIZE
3003 #elif IVSIZE == LONGSIZE
3005 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
3006 DO_BO_PACK(aiv, 64);
3008 Perl_croak(aTHX_ "'j' not supported on this platform");
3010 PUSH_VAR(utf8, cur, aiv);
3017 auv = SvUV(fromstr);
3018 #if UVSIZE == INTSIZE
3020 #elif UVSIZE == LONGSIZE
3022 #elif defined(HAS_QUAD) && UVSIZE == U64SIZE
3023 DO_BO_PACK(auv, 64);
3025 Perl_croak(aTHX_ "'J' not supported on this platform");
3027 PUSH_VAR(utf8, cur, auv);
3034 anv = SvNV(fromstr);
3038 SvCUR_set(cat, cur - start);
3039 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
3042 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
3043 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
3044 any negative IVs will have already been got by the croak()
3045 above. IOK is untrue for fractions, so we test them
3046 against UV_MAX_P1. */
3047 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
3048 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
3049 char *in = buf + sizeof(buf);
3050 UV auv = SvUV(fromstr);
3053 *--in = (char)((auv & 0x7f) | 0x80);
3056 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
3057 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3058 in, (buf + sizeof(buf)) - in);
3059 } else if (SvPOKp(fromstr))
3061 else if (SvNOKp(fromstr)) {
3062 /* 10**NV_MAX_10_EXP is the largest power of 10
3063 so 10**(NV_MAX_10_EXP+1) is definitely unrepresentable
3064 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
3065 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
3066 And with that many bytes only Inf can overflow.
3067 Some C compilers are strict about integral constant
3068 expressions so we conservatively divide by a slightly
3069 smaller integer instead of multiplying by the exact
3070 floating-point value.
3072 #ifdef NV_MAX_10_EXP
3073 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
3074 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
3076 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
3077 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
3079 char *in = buf + sizeof(buf);
3081 anv = Perl_floor(anv);
3083 const NV next = Perl_floor(anv / 128);
3084 if (in <= buf) /* this cannot happen ;-) */
3085 Perl_croak(aTHX_ "Cannot compress integer in pack");
3086 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
3089 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
3090 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3091 in, (buf + sizeof(buf)) - in);
3100 /* Copy string and check for compliance */
3101 from = SvPV_const(fromstr, len);
3102 if ((norm = is_an_int(from, len)) == NULL)
3103 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
3105 Newx(result, len, char);
3108 while (!done) *--in = div128(norm, &done) | 0x80;
3109 result[len - 1] &= 0x7F; /* clear continue bit */
3110 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3111 in, (result + len) - in);
3113 SvREFCNT_dec(norm); /* free norm */
3118 case 'i' | TYPE_IS_SHRIEKING:
3122 aint = SvIV(fromstr);
3123 DO_BO_PACK(aint, i);
3124 PUSH_VAR(utf8, cur, aint);
3127 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3128 case 'N' | TYPE_IS_SHRIEKING:
3134 au32 = SvUV(fromstr);
3136 au32 = PerlSock_htonl(au32);
3138 PUSH32(utf8, cur, &au32);
3141 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3142 case 'V' | TYPE_IS_SHRIEKING:
3148 au32 = SvUV(fromstr);
3152 PUSH32(utf8, cur, &au32);
3155 case 'L' | TYPE_IS_SHRIEKING:
3156 #if LONGSIZE != SIZE32
3158 unsigned long aulong;
3160 aulong = SvUV(fromstr);
3161 DO_BO_PACK(aulong, l);
3162 PUSH_VAR(utf8, cur, aulong);
3172 au32 = SvUV(fromstr);
3173 DO_BO_PACK(au32, 32);
3174 PUSH32(utf8, cur, &au32);
3177 case 'l' | TYPE_IS_SHRIEKING:
3178 #if LONGSIZE != SIZE32
3182 along = SvIV(fromstr);
3183 DO_BO_PACK(along, l);
3184 PUSH_VAR(utf8, cur, along);
3194 ai32 = SvIV(fromstr);
3195 DO_BO_PACK(ai32, 32);
3196 PUSH32(utf8, cur, &ai32);
3204 auquad = (Uquad_t) SvUV(fromstr);
3205 DO_BO_PACK(auquad, 64);
3206 PUSH_VAR(utf8, cur, auquad);
3213 aquad = (Quad_t)SvIV(fromstr);
3214 DO_BO_PACK(aquad, 64);
3215 PUSH_VAR(utf8, cur, aquad);
3218 #endif /* HAS_QUAD */
3220 len = 1; /* assume SV is correct length */
3221 GROWING(utf8, cat, start, cur, sizeof(char *));
3228 SvGETMAGIC(fromstr);
3229 if (!SvOK(fromstr)) aptr = NULL;
3231 /* XXX better yet, could spirit away the string to
3232 * a safe spot and hang on to it until the result
3233 * of pack() (and all copies of the result) are
3236 if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
3237 !SvREADONLY(fromstr)))) {
3238 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3239 "Attempt to pack pointer to temporary value");
3241 if (SvPOK(fromstr) || SvNIOK(fromstr))
3242 aptr = SvPV_nomg_const_nolen(fromstr);
3244 aptr = SvPV_force_flags_nolen(fromstr, 0);
3246 DO_BO_PACK_PC(aptr);
3247 PUSH_VAR(utf8, cur, aptr);
3251 const char *aptr, *aend;
3255 if (len <= 2) len = 45;
3256 else len = len / 3 * 3;
3258 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3259 "Field too wide in 'u' format in pack");
3262 aptr = SvPV_const(fromstr, fromlen);
3263 from_utf8 = DO_UTF8(fromstr);
3265 aend = aptr + fromlen;
3266 fromlen = sv_len_utf8_nomg(fromstr);
3267 } else aend = NULL; /* Unused, but keep compilers happy */
3268 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
3269 while (fromlen > 0) {
3272 U8 hunk[1+63/3*4+1];
3274 if ((I32)fromlen > len)
3280 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3281 'u' | TYPE_IS_PACK)) {
3283 SvCUR_set(cat, cur - start);
3284 Perl_croak(aTHX_ "panic: string is shorter than advertised, "
3285 "aptr=%p, aend=%p, buffer=%p, todo=%ld",
3286 aptr, aend, buffer, (long) todo);
3288 end = doencodes(hunk, buffer, todo);
3290 end = doencodes(hunk, aptr, todo);
3293 PUSH_BYTES(utf8, cur, hunk, end-hunk);
3300 SvCUR_set(cat, cur - start);
3302 *symptr = lookahead;
3311 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
3314 SV *pat_sv = *++MARK;
3315 const char *pat = SvPV_const(pat_sv, fromlen);
3316 const char *patend = pat + fromlen;
3322 packlist(cat, pat, patend, MARK, SP + 1);
3332 * c-indentation-style: bsd
3334 * indent-tabs-mode: nil
3337 * ex: set ts=8 sts=4 sw=4 et: