3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
18 * [p.653 of _The Lord of the Rings_, IV/iv: "Of Herbs and Stewed Rabbit"]
21 /* This file contains pp ("push/pop") functions that
22 * execute the opcodes that make up a perl program. A typical pp function
23 * expects to find its arguments on the stack, and usually pushes its
24 * results onto the stack, hence the 'pp' terminology. Each OP structure
25 * contains a pointer to the relevant pp_foo() function.
27 * This particular file just contains pp_pack() and pp_unpack(). See the
28 * other pp*.c files for the rest of the pp_ functions.
32 #define PERL_IN_PP_PACK_C
35 /* Types used by pack/unpack */
37 e_no_len, /* no length */
38 e_number, /* number, [] */
42 typedef struct tempsym {
43 const char* patptr; /* current template char */
44 const char* patend; /* one after last char */
45 const char* grpbeg; /* 1st char of ()-group */
46 const char* grpend; /* end of ()-group */
47 I32 code; /* template code (!<>) */
48 I32 length; /* length/repeat count */
49 howlen_t howlen; /* how length is given */
50 int level; /* () nesting level */
51 U32 flags; /* /=4, comma=2, pack=1 */
52 /* and group modifiers */
53 STRLEN strbeg; /* offset of group start */
54 struct tempsym *previous; /* previous group */
57 #define TEMPSYM_INIT(symptr, p, e, f) \
59 (symptr)->patptr = (p); \
60 (symptr)->patend = (e); \
61 (symptr)->grpbeg = NULL; \
62 (symptr)->grpend = NULL; \
63 (symptr)->grpend = NULL; \
65 (symptr)->length = 0; \
66 (symptr)->howlen = e_no_len; \
67 (symptr)->level = 0; \
68 (symptr)->flags = (f); \
69 (symptr)->strbeg = 0; \
70 (symptr)->previous = NULL; \
78 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
81 U8 bytes[sizeof(long double)];
88 /* Maximum number of bytes to which a byte can grow due to upgrade */
92 * Offset for integer pack/unpack.
94 * On architectures where I16 and I32 aren't really 16 and 32 bits,
95 * which for now are all Crays, pack and unpack have to play games.
99 * These values are required for portability of pack() output.
100 * If they're not right on your machine, then pack() and unpack()
101 * wouldn't work right anyway; you'll need to apply the Cray hack.
102 * (I'd like to check them with #if, but you can't use sizeof() in
103 * the preprocessor.) --???
106 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
107 defines are now in config.h. --Andy Dougherty April 1998
112 /* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
115 #if U16SIZE > SIZE16 || U32SIZE > SIZE32
116 # if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
117 # define OFF16(p) ((char*)(p))
118 # define OFF32(p) ((char*)(p))
120 # if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
121 # define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
122 # define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
124 ++++ bad cray byte order
128 # define OFF16(p) ((char *) (p))
129 # define OFF32(p) ((char *) (p))
132 /* Only to be used inside a loop (see the break) */
133 #define SHIFT16(utf8, s, strend, p, datumtype) STMT_START { \
135 if (!uni_to_bytes(aTHX_ &(s), strend, OFF16(p), SIZE16, datumtype)) break; \
137 Copy(s, OFF16(p), SIZE16, char); \
142 /* Only to be used inside a loop (see the break) */
143 #define SHIFT32(utf8, s, strend, p, datumtype) STMT_START { \
145 if (!uni_to_bytes(aTHX_ &(s), strend, OFF32(p), SIZE32, datumtype)) break; \
147 Copy(s, OFF32(p), SIZE32, char); \
152 #define PUSH16(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF16(p), SIZE16)
153 #define PUSH32(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF32(p), SIZE32)
155 /* Only to be used inside a loop (see the break) */
156 #define SHIFT_BYTES(utf8, s, strend, buf, len, datumtype) \
159 if (!uni_to_bytes(aTHX_ &s, strend, \
160 (char *) (buf), len, datumtype)) break; \
162 Copy(s, (char *) (buf), len, char); \
167 #define SHIFT_VAR(utf8, s, strend, var, datumtype) \
168 SHIFT_BYTES(utf8, s, strend, &(var), sizeof(var), datumtype)
170 #define PUSH_VAR(utf8, aptr, var) \
171 PUSH_BYTES(utf8, aptr, &(var), sizeof(var))
173 /* Avoid stack overflow due to pathological templates. 100 should be plenty. */
174 #define MAX_SUB_TEMPLATE_LEVEL 100
176 /* flags (note that type modifiers can also be used as flags!) */
177 #define FLAG_WAS_UTF8 0x40
178 #define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
179 #define FLAG_UNPACK_ONLY_ONE 0x10
180 #define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
181 #define FLAG_SLASH 0x04
182 #define FLAG_COMMA 0x02
183 #define FLAG_PACK 0x01
186 S_mul128(pTHX_ SV *sv, U8 m)
189 char *s = SvPV(sv, len);
192 PERL_ARGS_ASSERT_MUL128;
194 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
195 SV * const tmpNew = newSVpvs("0000000000");
197 sv_catsv(tmpNew, sv);
198 SvREFCNT_dec(sv); /* free old sv */
203 while (!*t) /* trailing '\0'? */
206 const U32 i = ((*t - '0') << 7) + m;
207 *(t--) = '0' + (char)(i % 10);
213 /* Explosives and implosives. */
215 #if 'I' == 73 && 'J' == 74
216 /* On an ASCII/ISO kind of system */
217 #define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
220 Some other sort of character set - use memchr() so we don't match
223 #define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
227 #define TYPE_IS_SHRIEKING 0x100
228 #define TYPE_IS_BIG_ENDIAN 0x200
229 #define TYPE_IS_LITTLE_ENDIAN 0x400
230 #define TYPE_IS_PACK 0x800
231 #define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
232 #define TYPE_MODIFIERS(t) ((t) & ~0xFF)
233 #define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
235 # define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
236 # define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
238 # define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
240 #if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
242 # define DO_BO_UNPACK(var, type) \
244 if (TYPE_ENDIANNESS(datumtype) == TYPE_IS_LITTLE_ENDIAN) { \
245 var = my_letoh ## type (var); \
249 # define DO_BO_PACK(var, type) \
251 if (TYPE_ENDIANNESS(datumtype) == TYPE_IS_LITTLE_ENDIAN) { \
252 var = my_htole ## type (var); \
256 # define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast) \
258 if (TYPE_ENDIANNESS(datumtype) == TYPE_IS_LITTLE_ENDIAN) { \
259 var = (post_cast *) my_letoh ## type ((pre_cast) var); \
263 # define DO_BO_PACK_PTR(var, type, pre_cast, post_cast) \
265 if (TYPE_ENDIANNESS(datumtype) == TYPE_IS_LITTLE_ENDIAN) { \
266 var = (post_cast *) my_htole ## type ((pre_cast) var); \
270 # define DO_BO_UNPACK_N(var, type) \
272 if (TYPE_ENDIANNESS(datumtype) == TYPE_IS_LITTLE_ENDIAN) { \
273 my_letohn(&var, sizeof(type)); \
277 # define DO_BO_PACK_N(var, type) \
279 if (TYPE_ENDIANNESS(datumtype) == TYPE_IS_LITTLE_ENDIAN) { \
280 my_htolen(&var, sizeof(type)); \
284 # elif BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
286 # define DO_BO_UNPACK(var, type) \
288 if (TYPE_ENDIANNESS(datumtype) == TYPE_IS_BIG_ENDIAN) { \
289 var = my_betoh ## type (var); \
293 # define DO_BO_PACK(var, type) \
295 if (TYPE_ENDIANNESS(datumtype) == TYPE_IS_BIG_ENDIAN) { \
296 var = my_htobe ## type (var); \
300 # define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast) \
302 if (TYPE_ENDIANNESS(datumtype) == TYPE_IS_BIG_ENDIAN) { \
303 var = (post_cast *) my_betoh ## type ((pre_cast) var); \
307 # define DO_BO_PACK_PTR(var, type, pre_cast, post_cast) \
309 if (TYPE_ENDIANNESS(datumtype) == TYPE_IS_BIG_ENDIAN) { \
310 var = (post_cast *) my_htobe ## type ((pre_cast) var); \
314 # define DO_BO_UNPACK_N(var, type) \
316 if (TYPE_ENDIANNESS(datumtype) == TYPE_IS_BIG_ENDIAN) { \
317 my_betohn(&var, sizeof(type)); \
321 # define DO_BO_PACK_N(var, type) \
323 if (TYPE_ENDIANNESS(datumtype) == TYPE_IS_BIG_ENDIAN) { \
324 my_htoben(&var, sizeof(type)); \
329 # define DO_BO_UNPACK(var, type) BO_CANT_DOIT(unpack, type)
330 # define DO_BO_PACK(var, type) BO_CANT_DOIT(pack, type)
331 # define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast) \
332 BO_CANT_DOIT(unpack, type)
333 # define DO_BO_PACK_PTR(var, type, pre_cast, post_cast) \
334 BO_CANT_DOIT(pack, type)
335 # define DO_BO_UNPACK_N(var, type) BO_CANT_DOIT(unpack, type)
336 # define DO_BO_PACK_N(var, type) BO_CANT_DOIT(pack, type)
339 # define BO_CANT_DOIT(action, type) \
341 switch (TYPE_ENDIANNESS(datumtype)) { \
342 case TYPE_IS_BIG_ENDIAN: \
343 Perl_croak(aTHX_ "Can't %s big-endian %ss on this " \
344 "platform", #action, #type); \
346 case TYPE_IS_LITTLE_ENDIAN: \
347 Perl_croak(aTHX_ "Can't %s little-endian %ss on this " \
348 "platform", #action, #type); \
355 # if PTRSIZE == INTSIZE
356 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, i, int, char)
357 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, i, int, char)
358 # elif PTRSIZE == LONGSIZE
359 # if LONGSIZE < IVSIZE && IVSIZE == 8
360 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, 64, IV, char)
361 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, 64, IV, char)
363 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, IV, char)
364 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, IV, char)
366 # elif PTRSIZE == IVSIZE
367 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, IV, char)
368 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, IV, char)
370 # define DO_BO_UNPACK_PC(var) BO_CANT_DOIT(unpack, pointer)
371 # define DO_BO_PACK_PC(var) BO_CANT_DOIT(pack, pointer)
374 #define PACK_SIZE_CANNOT_CSUM 0x80
375 #define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
376 #define PACK_SIZE_MASK 0x3F
378 #include "packsizetables.c"
381 uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
384 UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
385 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
386 /* We try to process malformed UTF-8 as much as possible (preferably with
387 warnings), but these two mean we make no progress in the string and
388 might enter an infinite loop */
389 if (retlen == (STRLEN) -1 || retlen == 0)
390 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
391 (int) TYPE_NO_MODIFIERS(datumtype));
393 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
394 "Character in '%c' format wrapped in unpack",
395 (int) TYPE_NO_MODIFIERS(datumtype));
402 #define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
403 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
407 uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
411 const char *from = *s;
413 const U32 flags = ckWARN(WARN_UTF8) ?
414 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
415 for (;buf_len > 0; buf_len--) {
416 if (from >= end) return FALSE;
417 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
418 if (retlen == (STRLEN) -1 || retlen == 0) {
419 from += UTF8SKIP(from);
421 } else from += retlen;
426 *(U8 *)buf++ = (U8)val;
428 /* We have enough characters for the buffer. Did we have problems ? */
431 /* Rewalk the string fragment while warning */
433 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
434 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
435 if (ptr >= end) break;
436 utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
438 if (from > end) from = end;
441 Perl_ck_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
442 WARN_PACK : WARN_UNPACK),
443 "Character(s) in '%c' format wrapped in %s",
444 (int) TYPE_NO_MODIFIERS(datumtype),
445 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
452 next_uni_uu(pTHX_ const char **s, const char *end, I32 *out)
456 const UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
457 if (val >= 0x100 || !ISUUCHAR(val) ||
458 retlen == (STRLEN) -1 || retlen == 0) {
462 *out = PL_uudmap[val] & 077;
468 S_bytes_to_uni(const U8 *start, STRLEN len, char *dest) {
469 const U8 * const end = start + len;
471 PERL_ARGS_ASSERT_BYTES_TO_UNI;
473 while (start < end) {
474 const UV uv = NATIVE_TO_ASCII(*start);
475 if (UNI_IS_INVARIANT(uv))
476 *dest++ = (char)(U8)UTF_TO_NATIVE(uv);
478 *dest++ = (char)(U8)UTF8_EIGHT_BIT_HI(uv);
479 *dest++ = (char)(U8)UTF8_EIGHT_BIT_LO(uv);
486 #define PUSH_BYTES(utf8, cur, buf, len) \
489 (cur) = bytes_to_uni((U8 *) buf, len, (cur)); \
491 Copy(buf, cur, len, char); \
496 #define GROWING(utf8, cat, start, cur, in_len) \
498 STRLEN glen = (in_len); \
499 if (utf8) glen *= UTF8_EXPAND; \
500 if ((cur) + glen >= (start) + SvLEN(cat)) { \
501 (start) = sv_exp_grow(cat, glen); \
502 (cur) = (start) + SvCUR(cat); \
506 #define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
508 const STRLEN glen = (in_len); \
510 if (utf8) gl *= UTF8_EXPAND; \
511 if ((cur) + gl >= (start) + SvLEN(cat)) { \
513 SvCUR_set((cat), (cur) - (start)); \
514 (start) = sv_exp_grow(cat, gl); \
515 (cur) = (start) + SvCUR(cat); \
517 PUSH_BYTES(utf8, cur, buf, glen); \
520 #define PUSH_BYTE(utf8, s, byte) \
523 const U8 au8 = (byte); \
524 (s) = bytes_to_uni(&au8, 1, (s)); \
525 } else *(U8 *)(s)++ = (byte); \
528 /* Only to be used inside a loop (see the break) */
529 #define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
532 if (str >= end) break; \
533 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
534 if (retlen == (STRLEN) -1 || retlen == 0) { \
536 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
541 static const char *_action( const tempsym_t* symptr )
543 return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
546 /* Returns the sizeof() struct described by pat */
548 S_measure_struct(pTHX_ tempsym_t* symptr)
552 PERL_ARGS_ASSERT_MEASURE_STRUCT;
554 while (next_symbol(symptr)) {
558 switch (symptr->howlen) {
560 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
564 /* e_no_len and e_number */
565 len = symptr->length;
569 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
572 /* endianness doesn't influence the size of a type */
573 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
575 Perl_croak(aTHX_ "Invalid type '%c' in %s",
576 (int)TYPE_NO_MODIFIERS(symptr->code),
578 case '.' | TYPE_IS_SHRIEKING:
579 case '@' | TYPE_IS_SHRIEKING:
583 case 'U': /* XXXX Is it correct? */
586 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
587 (int) TYPE_NO_MODIFIERS(symptr->code),
594 tempsym_t savsym = *symptr;
595 symptr->patptr = savsym.grpbeg;
596 symptr->patend = savsym.grpend;
597 /* XXXX Theoretically, we need to measure many times at
598 different positions, since the subexpression may contain
599 alignment commands, but be not of aligned length.
600 Need to detect this and croak(). */
601 size = measure_struct(symptr);
605 case 'X' | TYPE_IS_SHRIEKING:
606 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
608 if (!len) /* Avoid division by 0 */
610 len = total % len; /* Assumed: the start is aligned. */
615 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
617 case 'x' | TYPE_IS_SHRIEKING:
618 if (!len) /* Avoid division by 0 */
620 star = total % len; /* Assumed: the start is aligned. */
621 if (star) /* Other portable ways? */
645 size = sizeof(char*);
655 /* locate matching closing parenthesis or bracket
656 * returns char pointer to char after match, or NULL
659 S_group_end(pTHX_ const char *patptr, const char *patend, char ender)
661 PERL_ARGS_ASSERT_GROUP_END;
663 while (patptr < patend) {
664 const char c = *patptr++;
671 while (patptr < patend && *patptr != '\n')
675 patptr = group_end(patptr, patend, ')') + 1;
677 patptr = group_end(patptr, patend, ']') + 1;
679 Perl_croak(aTHX_ "No group ending character '%c' found in template",
685 /* Convert unsigned decimal number to binary.
686 * Expects a pointer to the first digit and address of length variable
687 * Advances char pointer to 1st non-digit char and returns number
690 S_get_num(pTHX_ const char *patptr, I32 *lenptr )
692 I32 len = *patptr++ - '0';
694 PERL_ARGS_ASSERT_GET_NUM;
696 while (isDIGIT(*patptr)) {
697 if (len >= 0x7FFFFFFF/10)
698 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
699 len = (len * 10) + (*patptr++ - '0');
705 /* The marvellous template parsing routine: Using state stored in *symptr,
706 * locates next template code and count
709 S_next_symbol(pTHX_ tempsym_t* symptr )
711 const char* patptr = symptr->patptr;
712 const char* const patend = symptr->patend;
714 PERL_ARGS_ASSERT_NEXT_SYMBOL;
716 symptr->flags &= ~FLAG_SLASH;
718 while (patptr < patend) {
719 if (isSPACE(*patptr))
721 else if (*patptr == '#') {
723 while (patptr < patend && *patptr != '\n')
728 /* We should have found a template code */
729 I32 code = *patptr++ & 0xFF;
730 U32 inherited_modifiers = 0;
732 if (code == ','){ /* grandfather in commas but with a warning */
733 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
734 symptr->flags |= FLAG_COMMA;
735 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
736 "Invalid type ',' in %s", _action( symptr ) );
741 /* for '(', skip to ')' */
743 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
744 Perl_croak(aTHX_ "()-group starts with a count in %s",
746 symptr->grpbeg = patptr;
747 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
748 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
749 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
753 /* look for group modifiers to inherit */
754 if (TYPE_ENDIANNESS(symptr->flags)) {
755 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
756 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
759 /* look for modifiers */
760 while (patptr < patend) {
765 modifier = TYPE_IS_SHRIEKING;
766 allowed = "sSiIlLxXnNvV@.";
769 modifier = TYPE_IS_BIG_ENDIAN;
770 allowed = ENDIANNESS_ALLOWED_TYPES;
773 modifier = TYPE_IS_LITTLE_ENDIAN;
774 allowed = ENDIANNESS_ALLOWED_TYPES;
785 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
786 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
787 allowed, _action( symptr ) );
789 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
790 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
791 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
792 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
793 TYPE_ENDIANNESS_MASK)
794 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
795 *patptr, _action( symptr ) );
797 if ((code & modifier)) {
798 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
799 "Duplicate modifier '%c' after '%c' in %s",
800 *patptr, (int) TYPE_NO_MODIFIERS(code),
808 /* inherit modifiers */
809 code |= inherited_modifiers;
811 /* look for count and/or / */
812 if (patptr < patend) {
813 if (isDIGIT(*patptr)) {
814 patptr = get_num( patptr, &symptr->length );
815 symptr->howlen = e_number;
817 } else if (*patptr == '*') {
819 symptr->howlen = e_star;
821 } else if (*patptr == '[') {
822 const char* lenptr = ++patptr;
823 symptr->howlen = e_number;
824 patptr = group_end( patptr, patend, ']' ) + 1;
825 /* what kind of [] is it? */
826 if (isDIGIT(*lenptr)) {
827 lenptr = get_num( lenptr, &symptr->length );
829 Perl_croak(aTHX_ "Malformed integer in [] in %s",
832 tempsym_t savsym = *symptr;
833 symptr->patend = patptr-1;
834 symptr->patptr = lenptr;
835 savsym.length = measure_struct(symptr);
839 symptr->howlen = e_no_len;
844 while (patptr < patend) {
845 if (isSPACE(*patptr))
847 else if (*patptr == '#') {
849 while (patptr < patend && *patptr != '\n')
854 if (*patptr == '/') {
855 symptr->flags |= FLAG_SLASH;
857 if (patptr < patend &&
858 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
859 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
866 /* at end - no count, no / */
867 symptr->howlen = e_no_len;
872 symptr->patptr = patptr;
876 symptr->patptr = patptr;
881 There is no way to cleanly handle the case where we should process the
882 string per byte in its upgraded form while it's really in downgraded form
883 (e.g. estimates like strend-s as an upper bound for the number of
884 characters left wouldn't work). So if we foresee the need of this
885 (pattern starts with U or contains U0), we want to work on the encoded
886 version of the string. Users are advised to upgrade their pack string
887 themselves if they need to do a lot of unpacks like this on it
890 need_utf8(const char *pat, const char *patend)
894 PERL_ARGS_ASSERT_NEED_UTF8;
896 while (pat < patend) {
899 pat = (const char *) memchr(pat, '\n', patend-pat);
900 if (!pat) return FALSE;
901 } else if (pat[0] == 'U') {
902 if (first || pat[1] == '0') return TRUE;
903 } else first = FALSE;
910 first_symbol(const char *pat, const char *patend) {
911 PERL_ARGS_ASSERT_FIRST_SYMBOL;
913 while (pat < patend) {
914 if (pat[0] != '#') return pat[0];
916 pat = (const char *) memchr(pat, '\n', patend-pat);
924 =for apidoc unpackstring
926 The engine implementing the unpack() Perl function.
928 Using the template pat..patend, this function unpacks the string
929 s..strend into a number of mortal SVs, which it pushes onto the perl
930 argument (@_) stack (so you will need to issue a C<PUTBACK> before and
931 C<SPAGAIN> after the call to this function). It returns the number of
934 The strend and patend pointers should point to the byte following the last
935 character of each string.
937 Although this function returns its values on the perl argument stack, it
938 doesn't take any parameters from that stack (and thus in particular
939 there's no need to do a PUSHMARK before calling it, unlike L</call_pv> for
945 Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
949 PERL_ARGS_ASSERT_UNPACKSTRING;
951 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
952 else if (need_utf8(pat, patend)) {
953 /* We probably should try to avoid this in case a scalar context call
954 wouldn't get to the "U0" */
955 STRLEN len = strend - s;
956 s = (char *) bytes_to_utf8((U8 *) s, &len);
959 flags |= FLAG_DO_UTF8;
962 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
963 flags |= FLAG_PARSE_UTF8;
965 TEMPSYM_INIT(&sym, pat, patend, flags);
967 return unpack_rec(&sym, s, s, strend, NULL );
971 S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
975 const I32 start_sp_offset = SP - PL_stack_base;
980 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
982 bool explicit_length;
983 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
984 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
986 PERL_ARGS_ASSERT_UNPACK_REC;
988 symptr->strbeg = s - strbeg;
990 while (next_symbol(symptr)) {
993 I32 datumtype = symptr->code;
994 /* do first one only unless in list context
995 / is implemented by unpacking the count, then popping it from the
996 stack, so must check that we're not in the middle of a / */
998 && (SP - PL_stack_base == start_sp_offset + 1)
999 && (datumtype != '/') ) /* XXX can this be omitted */
1002 switch (howlen = symptr->howlen) {
1004 len = strend - strbeg; /* long enough */
1007 /* e_no_len and e_number */
1008 len = symptr->length;
1012 explicit_length = TRUE;
1014 beyond = s >= strend;
1016 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
1018 /* props nonzero means we can process this letter. */
1019 const long size = props & PACK_SIZE_MASK;
1020 const long howmany = (strend - s) / size;
1024 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
1025 if (len && unpack_only_one) len = 1;
1031 switch(TYPE_NO_ENDIANNESS(datumtype)) {
1033 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
1036 if (howlen == e_no_len)
1037 len = 16; /* len is not specified */
1045 tempsym_t savsym = *symptr;
1046 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
1047 symptr->flags |= group_modifiers;
1048 symptr->patend = savsym.grpend;
1049 symptr->previous = &savsym;
1052 if (len && unpack_only_one) len = 1;
1054 symptr->patptr = savsym.grpbeg;
1055 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
1056 else symptr->flags &= ~FLAG_PARSE_UTF8;
1057 unpack_rec(symptr, s, strbeg, strend, &s);
1058 if (s == strend && savsym.howlen == e_star)
1059 break; /* No way to continue */
1062 savsym.flags = symptr->flags & ~group_modifiers;
1066 case '.' | TYPE_IS_SHRIEKING:
1070 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
1071 if (howlen == e_star) from = strbeg;
1072 else if (len <= 0) from = s;
1074 tempsym_t *group = symptr;
1076 while (--len && group) group = group->previous;
1077 from = group ? strbeg + group->strbeg : strbeg;
1080 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
1081 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
1085 case '@' | TYPE_IS_SHRIEKING:
1087 s = strbeg + symptr->strbeg;
1088 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
1092 Perl_croak(aTHX_ "'@' outside of string in unpack");
1097 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
1100 Perl_croak(aTHX_ "'@' outside of string in unpack");
1104 case 'X' | TYPE_IS_SHRIEKING:
1105 if (!len) /* Avoid division by 0 */
1108 const char *hop, *last;
1110 hop = last = strbeg;
1112 hop += UTF8SKIP(hop);
1119 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1123 len = (s - strbeg) % len;
1129 Perl_croak(aTHX_ "'X' outside of string in unpack");
1130 while (--s, UTF8_IS_CONTINUATION(*s)) {
1132 Perl_croak(aTHX_ "'X' outside of string in unpack");
1137 if (len > s - strbeg)
1138 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1142 case 'x' | TYPE_IS_SHRIEKING: {
1144 if (!len) /* Avoid division by 0 */
1146 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1147 else ai32 = (s - strbeg) % len;
1148 if (ai32 == 0) break;
1156 Perl_croak(aTHX_ "'x' outside of string in unpack");
1161 if (len > strend - s)
1162 Perl_croak(aTHX_ "'x' outside of string in unpack");
1167 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1173 /* Preliminary length estimate is assumed done in 'W' */
1174 if (len > strend - s) len = strend - s;
1180 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1181 if (hop >= strend) {
1183 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1188 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1190 } else if (len > strend - s)
1193 if (datumtype == 'Z') {
1194 /* 'Z' strips stuff after first null */
1195 const char *ptr, *end;
1197 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
1198 sv = newSVpvn(s, ptr-s);
1199 if (howlen == e_star) /* exact for 'Z*' */
1200 len = ptr-s + (ptr != strend ? 1 : 0);
1201 } else if (datumtype == 'A') {
1202 /* 'A' strips both nulls and spaces */
1204 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1205 for (ptr = s+len-1; ptr >= s; ptr--)
1206 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
1207 !isSPACE_utf8(ptr)) break;
1208 if (ptr >= s) ptr += UTF8SKIP(ptr);
1211 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1213 for (ptr = s+len-1; ptr >= s; ptr--)
1214 if (*ptr != 0 && !isSPACE(*ptr)) break;
1217 sv = newSVpvn(s, ptr-s);
1218 } else sv = newSVpvn(s, len);
1222 /* Undo any upgrade done due to need_utf8() */
1223 if (!(symptr->flags & FLAG_WAS_UTF8))
1224 sv_utf8_downgrade(sv, 0);
1232 if (howlen == e_star || len > (strend - s) * 8)
1233 len = (strend - s) * 8;
1236 while (len >= 8 && s < strend) {
1237 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
1242 cuv += PL_bitcount[*(U8 *)s++];
1245 if (len && s < strend) {
1247 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1248 if (datumtype == 'b')
1250 if (bits & 1) cuv++;
1255 if (bits & 0x80) cuv++;
1262 sv = sv_2mortal(newSV(len ? len : 1));
1265 if (datumtype == 'b') {
1267 const I32 ai32 = len;
1268 for (len = 0; len < ai32; len++) {
1269 if (len & 7) bits >>= 1;
1271 if (s >= strend) break;
1272 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1273 } else bits = *(U8 *) s++;
1274 *str++ = bits & 1 ? '1' : '0';
1278 const I32 ai32 = len;
1279 for (len = 0; len < ai32; len++) {
1280 if (len & 7) bits <<= 1;
1282 if (s >= strend) break;
1283 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1284 } else bits = *(U8 *) s++;
1285 *str++ = bits & 0x80 ? '1' : '0';
1289 SvCUR_set(sv, str - SvPVX_const(sv));
1296 /* Preliminary length estimate, acceptable for utf8 too */
1297 if (howlen == e_star || len > (strend - s) * 2)
1298 len = (strend - s) * 2;
1300 sv = sv_2mortal(newSV(len ? len : 1));
1304 if (datumtype == 'h') {
1307 for (len = 0; len < ai32; len++) {
1308 if (len & 1) bits >>= 4;
1310 if (s >= strend) break;
1311 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1312 } else bits = * (U8 *) s++;
1314 *str++ = PL_hexdigit[bits & 15];
1318 const I32 ai32 = len;
1319 for (len = 0; len < ai32; len++) {
1320 if (len & 1) bits <<= 4;
1322 if (s >= strend) break;
1323 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1324 } else bits = *(U8 *) s++;
1326 *str++ = PL_hexdigit[(bits >> 4) & 15];
1331 SvCUR_set(sv, str - SvPVX_const(sv));
1338 if (explicit_length)
1339 /* Switch to "character" mode */
1340 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1345 while (len-- > 0 && s < strend) {
1350 aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1351 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1352 if (retlen == (STRLEN) -1 || retlen == 0)
1353 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1357 aint = *(U8 *)(s)++;
1358 if (aint >= 128 && datumtype != 'C') /* fake up signed chars */
1362 else if (checksum > bits_in_uv)
1363 cdouble += (NV)aint;
1371 while (len-- > 0 && s < strend) {
1373 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1374 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1375 if (retlen == (STRLEN) -1 || retlen == 0)
1376 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1380 else if (checksum > bits_in_uv)
1381 cdouble += (NV) val;
1385 } else if (!checksum)
1387 const U8 ch = *(U8 *) s++;
1390 else if (checksum > bits_in_uv)
1391 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1393 while (len-- > 0) cuv += *(U8 *) s++;
1397 if (explicit_length && howlen != e_star) {
1398 /* Switch to "bytes in UTF-8" mode */
1399 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
1401 /* Should be impossible due to the need_utf8() test */
1402 Perl_croak(aTHX_ "U0 mode on a byte string");
1406 if (len > strend - s) len = strend - s;
1408 if (len && unpack_only_one) len = 1;
1412 while (len-- > 0 && s < strend) {
1416 U8 result[UTF8_MAXLEN];
1417 const char *ptr = s;
1419 /* Bug: warns about bad utf8 even if we are short on bytes
1420 and will break out of the loop */
1421 if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1424 len = UTF8SKIP(result);
1425 if (!uni_to_bytes(aTHX_ &ptr, strend,
1426 (char *) &result[1], len-1, 'U')) break;
1427 auv = utf8n_to_uvuni(result, len, &retlen, UTF8_ALLOW_DEFAULT);
1430 auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, UTF8_ALLOW_DEFAULT);
1431 if (retlen == (STRLEN) -1 || retlen == 0)
1432 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1437 else if (checksum > bits_in_uv)
1438 cdouble += (NV) auv;
1443 case 's' | TYPE_IS_SHRIEKING:
1444 #if SHORTSIZE != SIZE16
1447 SHIFT_VAR(utf8, s, strend, ashort, datumtype);
1448 DO_BO_UNPACK(ashort, s);
1451 else if (checksum > bits_in_uv)
1452 cdouble += (NV)ashort;
1464 #if U16SIZE > SIZE16
1467 SHIFT16(utf8, s, strend, &ai16, datumtype);
1468 DO_BO_UNPACK(ai16, 16);
1469 #if U16SIZE > SIZE16
1475 else if (checksum > bits_in_uv)
1476 cdouble += (NV)ai16;
1481 case 'S' | TYPE_IS_SHRIEKING:
1482 #if SHORTSIZE != SIZE16
1484 unsigned short aushort;
1485 SHIFT_VAR(utf8, s, strend, aushort, datumtype);
1486 DO_BO_UNPACK(aushort, s);
1489 else if (checksum > bits_in_uv)
1490 cdouble += (NV)aushort;
1503 #if U16SIZE > SIZE16
1506 SHIFT16(utf8, s, strend, &au16, datumtype);
1507 DO_BO_UNPACK(au16, 16);
1508 if (datumtype == 'n')
1509 au16 = PerlSock_ntohs(au16);
1510 if (datumtype == 'v')
1514 else if (checksum > bits_in_uv)
1515 cdouble += (NV) au16;
1520 case 'v' | TYPE_IS_SHRIEKING:
1521 case 'n' | TYPE_IS_SHRIEKING:
1524 # if U16SIZE > SIZE16
1527 SHIFT16(utf8, s, strend, &ai16, datumtype);
1528 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
1529 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1530 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
1531 ai16 = (I16) vtohs((U16) ai16);
1534 else if (checksum > bits_in_uv)
1535 cdouble += (NV) ai16;
1541 case 'i' | TYPE_IS_SHRIEKING:
1544 SHIFT_VAR(utf8, s, strend, aint, datumtype);
1545 DO_BO_UNPACK(aint, i);
1548 else if (checksum > bits_in_uv)
1549 cdouble += (NV)aint;
1555 case 'I' | TYPE_IS_SHRIEKING:
1558 SHIFT_VAR(utf8, s, strend, auint, datumtype);
1559 DO_BO_UNPACK(auint, i);
1562 else if (checksum > bits_in_uv)
1563 cdouble += (NV)auint;
1571 SHIFT_VAR(utf8, s, strend, aiv, datumtype);
1572 #if IVSIZE == INTSIZE
1573 DO_BO_UNPACK(aiv, i);
1574 #elif IVSIZE == LONGSIZE
1575 DO_BO_UNPACK(aiv, l);
1576 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
1577 DO_BO_UNPACK(aiv, 64);
1579 Perl_croak(aTHX_ "'j' not supported on this platform");
1583 else if (checksum > bits_in_uv)
1592 SHIFT_VAR(utf8, s, strend, auv, datumtype);
1593 #if IVSIZE == INTSIZE
1594 DO_BO_UNPACK(auv, i);
1595 #elif IVSIZE == LONGSIZE
1596 DO_BO_UNPACK(auv, l);
1597 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
1598 DO_BO_UNPACK(auv, 64);
1600 Perl_croak(aTHX_ "'J' not supported on this platform");
1604 else if (checksum > bits_in_uv)
1610 case 'l' | TYPE_IS_SHRIEKING:
1611 #if LONGSIZE != SIZE32
1614 SHIFT_VAR(utf8, s, strend, along, datumtype);
1615 DO_BO_UNPACK(along, l);
1618 else if (checksum > bits_in_uv)
1619 cdouble += (NV)along;
1630 #if U32SIZE > SIZE32
1633 SHIFT32(utf8, s, strend, &ai32, datumtype);
1634 DO_BO_UNPACK(ai32, 32);
1635 #if U32SIZE > SIZE32
1636 if (ai32 > 2147483647) ai32 -= 4294967296;
1640 else if (checksum > bits_in_uv)
1641 cdouble += (NV)ai32;
1646 case 'L' | TYPE_IS_SHRIEKING:
1647 #if LONGSIZE != SIZE32
1649 unsigned long aulong;
1650 SHIFT_VAR(utf8, s, strend, aulong, datumtype);
1651 DO_BO_UNPACK(aulong, l);
1654 else if (checksum > bits_in_uv)
1655 cdouble += (NV)aulong;
1668 #if U32SIZE > SIZE32
1671 SHIFT32(utf8, s, strend, &au32, datumtype);
1672 DO_BO_UNPACK(au32, 32);
1673 if (datumtype == 'N')
1674 au32 = PerlSock_ntohl(au32);
1675 if (datumtype == 'V')
1679 else if (checksum > bits_in_uv)
1680 cdouble += (NV)au32;
1685 case 'V' | TYPE_IS_SHRIEKING:
1686 case 'N' | TYPE_IS_SHRIEKING:
1689 #if U32SIZE > SIZE32
1692 SHIFT32(utf8, s, strend, &ai32, datumtype);
1693 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1694 ai32 = (I32)PerlSock_ntohl((U32)ai32);
1695 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1696 ai32 = (I32)vtohl((U32)ai32);
1699 else if (checksum > bits_in_uv)
1700 cdouble += (NV)ai32;
1708 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
1709 DO_BO_UNPACK_PC(aptr);
1710 /* newSVpv generates undef if aptr is NULL */
1711 mPUSHs(newSVpv(aptr, 0));
1719 while (len > 0 && s < strend) {
1721 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1722 auv = (auv << 7) | (ch & 0x7f);
1723 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
1731 if (++bytes >= sizeof(UV)) { /* promote to string */
1734 sv = Perl_newSVpvf(aTHX_ "%.*"UVuf, (int)TYPE_DIGITS(UV), auv);
1735 while (s < strend) {
1736 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1737 sv = mul128(sv, (U8)(ch & 0x7f));
1743 t = SvPV_nolen_const(sv);
1752 if ((s >= strend) && bytes)
1753 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
1757 if (symptr->howlen == e_star)
1758 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
1760 if (s + sizeof(char*) <= strend) {
1762 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
1763 DO_BO_UNPACK_PC(aptr);
1764 /* newSVpvn generates undef if aptr is NULL */
1765 PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP));
1772 SHIFT_VAR(utf8, s, strend, aquad, datumtype);
1773 DO_BO_UNPACK(aquad, 64);
1775 mPUSHs(aquad >= IV_MIN && aquad <= IV_MAX ?
1776 newSViv((IV)aquad) : newSVnv((NV)aquad));
1777 else if (checksum > bits_in_uv)
1778 cdouble += (NV)aquad;
1786 SHIFT_VAR(utf8, s, strend, auquad, datumtype);
1787 DO_BO_UNPACK(auquad, 64);
1789 mPUSHs(auquad <= UV_MAX ?
1790 newSVuv((UV)auquad) : newSVnv((NV)auquad));
1791 else if (checksum > bits_in_uv)
1792 cdouble += (NV)auquad;
1797 #endif /* HAS_QUAD */
1798 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
1802 SHIFT_VAR(utf8, s, strend, afloat, datumtype);
1803 DO_BO_UNPACK_N(afloat, float);
1813 SHIFT_VAR(utf8, s, strend, adouble, datumtype);
1814 DO_BO_UNPACK_N(adouble, double);
1824 SHIFT_BYTES(utf8, s, strend, anv.bytes, sizeof(anv.bytes), datumtype);
1825 DO_BO_UNPACK_N(anv.nv, NV);
1832 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
1836 SHIFT_BYTES(utf8, s, strend, aldouble.bytes, sizeof(aldouble.bytes), datumtype);
1837 DO_BO_UNPACK_N(aldouble.ld, long double);
1839 mPUSHn(aldouble.ld);
1841 cdouble += aldouble.ld;
1847 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
1848 sv = sv_2mortal(newSV(l));
1849 if (l) SvPOK_on(sv);
1852 while (next_uni_uu(aTHX_ &s, strend, &len)) {
1857 next_uni_uu(aTHX_ &s, strend, &a);
1858 next_uni_uu(aTHX_ &s, strend, &b);
1859 next_uni_uu(aTHX_ &s, strend, &c);
1860 next_uni_uu(aTHX_ &s, strend, &d);
1861 hunk[0] = (char)((a << 2) | (b >> 4));
1862 hunk[1] = (char)((b << 4) | (c >> 2));
1863 hunk[2] = (char)((c << 6) | d);
1865 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
1873 /* possible checksum byte */
1874 const char *skip = s+UTF8SKIP(s);
1875 if (skip < strend && *skip == '\n')
1881 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
1885 len = PL_uudmap[*(U8*)s++] & 077;
1887 if (s < strend && ISUUCHAR(*s))
1888 a = PL_uudmap[*(U8*)s++] & 077;
1891 if (s < strend && ISUUCHAR(*s))
1892 b = PL_uudmap[*(U8*)s++] & 077;
1895 if (s < strend && ISUUCHAR(*s))
1896 c = PL_uudmap[*(U8*)s++] & 077;
1899 if (s < strend && ISUUCHAR(*s))
1900 d = PL_uudmap[*(U8*)s++] & 077;
1903 hunk[0] = (char)((a << 2) | (b >> 4));
1904 hunk[1] = (char)((b << 4) | (c >> 2));
1905 hunk[2] = (char)((c << 6) | d);
1907 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
1912 else /* possible checksum byte */
1913 if (s + 1 < strend && s[1] == '\n')
1923 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
1924 (checksum > bits_in_uv &&
1925 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
1928 anv = (NV) (1 << (checksum & 15));
1929 while (checksum >= 16) {
1933 while (cdouble < 0.0)
1935 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
1936 sv = newSVnv(cdouble);
1939 if (checksum < bits_in_uv) {
1940 UV mask = ((UV)1 << checksum) - 1;
1949 if (symptr->flags & FLAG_SLASH){
1950 if (SP - PL_stack_base - start_sp_offset <= 0)
1952 if( next_symbol(symptr) ){
1953 if( symptr->howlen == e_number )
1954 Perl_croak(aTHX_ "Count after length/code in unpack" );
1956 /* ...end of char buffer then no decent length available */
1957 Perl_croak(aTHX_ "length/code after end of string in unpack" );
1959 /* take top of stack (hope it's numeric) */
1962 Perl_croak(aTHX_ "Negative '/' count in unpack" );
1965 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
1967 datumtype = symptr->code;
1968 explicit_length = FALSE;
1976 return SP - PL_stack_base - start_sp_offset;
1984 I32 gimme = GIMME_V;
1987 const char *pat = SvPV_const(left, llen);
1988 const char *s = SvPV_const(right, rlen);
1989 const char *strend = s + rlen;
1990 const char *patend = pat + llen;
1994 cnt = unpackstring(pat, patend, s, strend,
1995 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
1996 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
1999 if ( !cnt && gimme == G_SCALAR )
2000 PUSHs(&PL_sv_undef);
2005 doencodes(U8 *h, const char *s, I32 len)
2007 *h++ = PL_uuemap[len];
2009 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2010 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
2011 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
2012 *h++ = PL_uuemap[(077 & (s[2] & 077))];
2017 const char r = (len > 1 ? s[1] : '\0');
2018 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2019 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
2020 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
2021 *h++ = PL_uuemap[0];
2028 S_is_an_int(pTHX_ const char *s, STRLEN l)
2030 SV *result = newSVpvn(s, l);
2031 char *const result_c = SvPV_nolen(result); /* convenience */
2032 char *out = result_c;
2036 PERL_ARGS_ASSERT_IS_AN_INT;
2044 SvREFCNT_dec(result);
2067 SvREFCNT_dec(result);
2073 SvCUR_set(result, out - result_c);
2077 /* pnum must be '\0' terminated */
2079 S_div128(pTHX_ SV *pnum, bool *done)
2082 char * const s = SvPV(pnum, len);
2086 PERL_ARGS_ASSERT_DIV128;
2090 const int i = m * 10 + (*t - '0');
2091 const int r = (i >> 7); /* r < 10 */
2099 SvCUR_set(pnum, (STRLEN) (t - s));
2104 =for apidoc packlist
2106 The engine implementing pack() Perl function.
2112 Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, SV **beglist, SV **endlist )
2117 PERL_ARGS_ASSERT_PACKLIST;
2119 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
2121 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
2122 Also make sure any UTF8 flag is loaded */
2123 SvPV_force_nolen(cat);
2125 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
2127 (void)pack_rec( cat, &sym, beglist, endlist );
2130 /* like sv_utf8_upgrade, but also repoint the group start markers */
2132 marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2135 const char *from_ptr, *from_start, *from_end, **marks, **m;
2136 char *to_start, *to_ptr;
2138 if (SvUTF8(sv)) return;
2140 from_start = SvPVX_const(sv);
2141 from_end = from_start + SvCUR(sv);
2142 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2143 if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
2144 if (from_ptr == from_end) {
2145 /* Simple case: no character needs to be changed */
2150 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
2151 Newx(to_start, len, char);
2152 Copy(from_start, to_start, from_ptr-from_start, char);
2153 to_ptr = to_start + (from_ptr-from_start);
2155 Newx(marks, sym_ptr->level+2, const char *);
2156 for (group=sym_ptr; group; group = group->previous)
2157 marks[group->level] = from_start + group->strbeg;
2158 marks[sym_ptr->level+1] = from_end+1;
2159 for (m = marks; *m < from_ptr; m++)
2160 *m = to_start + (*m-from_start);
2162 for (;from_ptr < from_end; from_ptr++) {
2163 while (*m == from_ptr) *m++ = to_ptr;
2164 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
2168 while (*m == from_ptr) *m++ = to_ptr;
2169 if (m != marks + sym_ptr->level+1) {
2172 Perl_croak(aTHX_ "panic: marks beyond string end, m=%p, marks=%p, "
2173 "level=%d", m, marks, sym_ptr->level);
2175 for (group=sym_ptr; group; group = group->previous)
2176 group->strbeg = marks[group->level] - to_start;
2181 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
2182 from_start -= SvIVX(sv);
2185 SvFLAGS(sv) &= ~SVf_OOK;
2188 Safefree(from_start);
2189 SvPV_set(sv, to_start);
2190 SvCUR_set(sv, to_ptr - to_start);
2195 /* Exponential string grower. Makes string extension effectively O(n)
2196 needed says how many extra bytes we need (not counting the final '\0')
2197 Only grows the string if there is an actual lack of space
2200 S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
2201 const STRLEN cur = SvCUR(sv);
2202 const STRLEN len = SvLEN(sv);
2205 PERL_ARGS_ASSERT_SV_EXP_GROW;
2207 if (len - cur > needed) return SvPVX(sv);
2208 extend = needed > len ? needed : len;
2209 return SvGROW(sv, len+extend+1);
2214 S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
2217 tempsym_t lookahead;
2218 I32 items = endlist - beglist;
2219 bool found = next_symbol(symptr);
2220 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
2221 bool warn_utf8 = ckWARN(WARN_UTF8);
2223 PERL_ARGS_ASSERT_PACK_REC;
2225 if (symptr->level == 0 && found && symptr->code == 'U') {
2226 marked_upgrade(aTHX_ cat, symptr);
2227 symptr->flags |= FLAG_DO_UTF8;
2230 symptr->strbeg = SvCUR(cat);
2236 SV *lengthcode = NULL;
2237 I32 datumtype = symptr->code;
2238 howlen_t howlen = symptr->howlen;
2239 char *start = SvPVX(cat);
2240 char *cur = start + SvCUR(cat);
2242 #define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2246 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2250 /* e_no_len and e_number */
2251 len = symptr->length;
2256 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
2258 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2259 /* We can process this letter. */
2260 STRLEN size = props & PACK_SIZE_MASK;
2261 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2265 /* Look ahead for next symbol. Do we have code/code? */
2266 lookahead = *symptr;
2267 found = next_symbol(&lookahead);
2268 if (symptr->flags & FLAG_SLASH) {
2270 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
2271 if (strchr("aAZ", lookahead.code)) {
2272 if (lookahead.howlen == e_number) count = lookahead.length;
2275 count = sv_len_utf8(*beglist);
2278 if (lookahead.code == 'Z') count++;
2281 if (lookahead.howlen == e_number && lookahead.length < items)
2282 count = lookahead.length;
2285 lookahead.howlen = e_number;
2286 lookahead.length = count;
2287 lengthcode = sv_2mortal(newSViv(count));
2290 /* Code inside the switch must take care to properly update
2291 cat (CUR length and '\0' termination) if it updated *cur and
2292 doesn't simply leave using break */
2293 switch(TYPE_NO_ENDIANNESS(datumtype)) {
2295 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2296 (int) TYPE_NO_MODIFIERS(datumtype));
2298 Perl_croak(aTHX_ "'%%' may not be used in pack");
2301 case '.' | TYPE_IS_SHRIEKING:
2303 if (howlen == e_star) from = start;
2304 else if (len == 0) from = cur;
2306 tempsym_t *group = symptr;
2308 while (--len && group) group = group->previous;
2309 from = group ? start + group->strbeg : start;
2312 len = SvIV(fromstr);
2314 case '@' | TYPE_IS_SHRIEKING:
2316 from = start + symptr->strbeg;
2318 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
2320 while (len && from < cur) {
2321 from += UTF8SKIP(from);
2325 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2327 /* Here we know from == cur */
2329 GROWING(0, cat, start, cur, len);
2330 Zero(cur, len, char);
2332 } else if (from < cur) {
2335 } else goto no_change;
2343 if (len > 0) goto grow;
2344 if (len == 0) goto no_change;
2351 tempsym_t savsym = *symptr;
2352 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2353 symptr->flags |= group_modifiers;
2354 symptr->patend = savsym.grpend;
2356 symptr->previous = &lookahead;
2359 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2360 else symptr->flags &= ~FLAG_PARSE_UTF8;
2361 was_utf8 = SvUTF8(cat);
2362 symptr->patptr = savsym.grpbeg;
2363 beglist = pack_rec(cat, symptr, beglist, endlist);
2364 if (SvUTF8(cat) != was_utf8)
2365 /* This had better be an upgrade while in utf8==0 mode */
2368 if (savsym.howlen == e_star && beglist == endlist)
2369 break; /* No way to continue */
2371 items = endlist - beglist;
2372 lookahead.flags = symptr->flags & ~group_modifiers;
2375 case 'X' | TYPE_IS_SHRIEKING:
2376 if (!len) /* Avoid division by 0 */
2383 hop += UTF8SKIP(hop);
2390 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2394 len = (cur-start) % len;
2398 if (len < 1) goto no_change;
2402 Perl_croak(aTHX_ "'%c' outside of string in pack",
2403 (int) TYPE_NO_MODIFIERS(datumtype));
2404 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2406 Perl_croak(aTHX_ "'%c' outside of string in pack",
2407 (int) TYPE_NO_MODIFIERS(datumtype));
2413 if (cur - start < len)
2414 Perl_croak(aTHX_ "'%c' outside of string in pack",
2415 (int) TYPE_NO_MODIFIERS(datumtype));
2418 if (cur < start+symptr->strbeg) {
2419 /* Make sure group starts don't point into the void */
2421 const STRLEN length = cur-start;
2422 for (group = symptr;
2423 group && length < group->strbeg;
2424 group = group->previous) group->strbeg = length;
2425 lookahead.strbeg = length;
2428 case 'x' | TYPE_IS_SHRIEKING: {
2430 if (!len) /* Avoid division by 0 */
2432 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
2433 else ai32 = (cur - start) % len;
2434 if (ai32 == 0) goto no_change;
2446 aptr = SvPV_const(fromstr, fromlen);
2447 if (DO_UTF8(fromstr)) {
2448 const char *end, *s;
2450 if (!utf8 && !SvUTF8(cat)) {
2451 marked_upgrade(aTHX_ cat, symptr);
2452 lookahead.flags |= FLAG_DO_UTF8;
2453 lookahead.strbeg = symptr->strbeg;
2456 cur = start + SvCUR(cat);
2458 if (howlen == e_star) {
2459 if (utf8) goto string_copy;
2463 end = aptr + fromlen;
2464 fromlen = datumtype == 'Z' ? len-1 : len;
2465 while ((I32) fromlen > 0 && s < end) {
2470 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2473 if (datumtype == 'Z') len++;
2479 fromlen = len - fromlen;
2480 if (datumtype == 'Z') fromlen--;
2481 if (howlen == e_star) {
2483 if (datumtype == 'Z') len++;
2485 GROWING(0, cat, start, cur, len);
2486 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
2487 datumtype | TYPE_IS_PACK))
2488 Perl_croak(aTHX_ "panic: predicted utf8 length not available, "
2489 "for '%c', aptr=%p end=%p cur=%p, fromlen=%"UVuf,
2490 (int)datumtype, aptr, end, cur, (UV)fromlen);
2494 if (howlen == e_star) {
2496 if (datumtype == 'Z') len++;
2498 if (len <= (I32) fromlen) {
2500 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2502 /* assumes a byte expands to at most UTF8_EXPAND bytes on
2504 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2505 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
2507 while (fromlen > 0) {
2508 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
2514 if (howlen == e_star) {
2516 if (datumtype == 'Z') len++;
2518 if (len <= (I32) fromlen) {
2520 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2522 GROWING(0, cat, start, cur, len);
2523 Copy(aptr, cur, fromlen, char);
2527 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2534 const char *str, *end;
2541 str = SvPV_const(fromstr, fromlen);
2542 end = str + fromlen;
2543 if (DO_UTF8(fromstr)) {
2545 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2547 utf8_source = FALSE;
2548 utf8_flags = 0; /* Unused, but keep compilers happy */
2550 if (howlen == e_star) len = fromlen;
2551 field_len = (len+7)/8;
2552 GROWING(utf8, cat, start, cur, field_len);
2553 if (len > (I32)fromlen) len = fromlen;
2556 if (datumtype == 'B')
2560 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2562 } else bits |= *str++ & 1;
2563 if (l & 7) bits <<= 1;
2565 PUSH_BYTE(utf8, cur, bits);
2570 /* datumtype == 'b' */
2574 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2575 if (val & 1) bits |= 0x80;
2576 } else if (*str++ & 1)
2578 if (l & 7) bits >>= 1;
2580 PUSH_BYTE(utf8, cur, bits);
2586 if (datumtype == 'B')
2587 bits <<= 7 - (l & 7);
2589 bits >>= 7 - (l & 7);
2590 PUSH_BYTE(utf8, cur, bits);
2593 /* Determine how many chars are left in the requested field */
2595 if (howlen == e_star) field_len = 0;
2596 else field_len -= l;
2597 Zero(cur, field_len, char);
2603 const char *str, *end;
2610 str = SvPV_const(fromstr, fromlen);
2611 end = str + fromlen;
2612 if (DO_UTF8(fromstr)) {
2614 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2616 utf8_source = FALSE;
2617 utf8_flags = 0; /* Unused, but keep compilers happy */
2619 if (howlen == e_star) len = fromlen;
2620 field_len = (len+1)/2;
2621 GROWING(utf8, cat, start, cur, field_len);
2622 if (!utf8 && len > (I32)fromlen) len = fromlen;
2625 if (datumtype == 'H')
2629 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2630 if (val < 256 && isALPHA(val))
2631 bits |= (val + 9) & 0xf;
2634 } else if (isALPHA(*str))
2635 bits |= (*str++ + 9) & 0xf;
2637 bits |= *str++ & 0xf;
2638 if (l & 1) bits <<= 4;
2640 PUSH_BYTE(utf8, cur, bits);
2648 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2649 if (val < 256 && isALPHA(val))
2650 bits |= ((val + 9) & 0xf) << 4;
2652 bits |= (val & 0xf) << 4;
2653 } else if (isALPHA(*str))
2654 bits |= ((*str++ + 9) & 0xf) << 4;
2656 bits |= (*str++ & 0xf) << 4;
2657 if (l & 1) bits >>= 4;
2659 PUSH_BYTE(utf8, cur, bits);
2665 PUSH_BYTE(utf8, cur, bits);
2668 /* Determine how many chars are left in the requested field */
2670 if (howlen == e_star) field_len = 0;
2671 else field_len -= l;
2672 Zero(cur, field_len, char);
2680 aiv = SvIV(fromstr);
2681 if ((-128 > aiv || aiv > 127))
2682 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2683 "Character in 'c' format wrapped in pack");
2684 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
2689 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2695 aiv = SvIV(fromstr);
2696 if ((0 > aiv || aiv > 0xff))
2697 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2698 "Character in 'C' format wrapped in pack");
2699 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
2704 U8 in_bytes = (U8)IN_BYTES;
2706 end = start+SvLEN(cat)-1;
2707 if (utf8) end -= UTF8_MAXLEN-1;
2711 auv = SvUV(fromstr);
2712 if (in_bytes) auv = auv % 0x100;
2717 SvCUR_set(cat, cur - start);
2719 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2720 end = start+SvLEN(cat)-UTF8_MAXLEN;
2722 cur = (char *) uvuni_to_utf8_flags((U8 *) cur,
2725 0 : UNICODE_ALLOW_ANY);
2730 SvCUR_set(cat, cur - start);
2731 marked_upgrade(aTHX_ cat, symptr);
2732 lookahead.flags |= FLAG_DO_UTF8;
2733 lookahead.strbeg = symptr->strbeg;
2736 cur = start + SvCUR(cat);
2737 end = start+SvLEN(cat)-UTF8_MAXLEN;
2740 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2741 "Character in 'W' format wrapped in pack");
2746 SvCUR_set(cat, cur - start);
2747 GROWING(0, cat, start, cur, len+1);
2748 end = start+SvLEN(cat)-1;
2750 *(U8 *) cur++ = (U8)auv;
2759 if (!(symptr->flags & FLAG_DO_UTF8)) {
2760 marked_upgrade(aTHX_ cat, symptr);
2761 lookahead.flags |= FLAG_DO_UTF8;
2762 lookahead.strbeg = symptr->strbeg;
2768 end = start+SvLEN(cat);
2769 if (!utf8) end -= UTF8_MAXLEN;
2773 auv = SvUV(fromstr);
2775 U8 buffer[UTF8_MAXLEN], *endb;
2776 endb = uvuni_to_utf8_flags(buffer, auv,
2778 0 : UNICODE_ALLOW_ANY);
2779 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
2781 SvCUR_set(cat, cur - start);
2782 GROWING(0, cat, start, cur,
2783 len+(endb-buffer)*UTF8_EXPAND);
2784 end = start+SvLEN(cat);
2786 cur = bytes_to_uni(buffer, endb-buffer, cur);
2790 SvCUR_set(cat, cur - start);
2791 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2792 end = start+SvLEN(cat)-UTF8_MAXLEN;
2794 cur = (char *) uvuni_to_utf8_flags((U8 *) cur, auv,
2796 0 : UNICODE_ALLOW_ANY);
2801 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
2807 anv = SvNV(fromstr);
2808 # if defined(VMS) && !defined(_IEEE_FP)
2809 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
2810 * on Alpha; fake it if we don't have them.
2814 else if (anv < -FLT_MAX)
2816 else afloat = (float)anv;
2818 afloat = (float)anv;
2820 DO_BO_PACK_N(afloat, float);
2821 PUSH_VAR(utf8, cur, afloat);
2829 anv = SvNV(fromstr);
2830 # if defined(VMS) && !defined(_IEEE_FP)
2831 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
2832 * on Alpha; fake it if we don't have them.
2836 else if (anv < -DBL_MAX)
2838 else adouble = (double)anv;
2840 adouble = (double)anv;
2842 DO_BO_PACK_N(adouble, double);
2843 PUSH_VAR(utf8, cur, adouble);
2848 Zero(&anv, 1, NV); /* can be long double with unused bits */
2852 /* to work round a gcc/x86 bug; don't use SvNV */
2853 anv.nv = sv_2nv(fromstr);
2855 anv.nv = SvNV(fromstr);
2857 DO_BO_PACK_N(anv, NV);
2858 PUSH_BYTES(utf8, cur, anv.bytes, sizeof(anv.bytes));
2862 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2865 /* long doubles can have unused bits, which may be nonzero */
2866 Zero(&aldouble, 1, long double);
2870 /* to work round a gcc/x86 bug; don't use SvNV */
2871 aldouble.ld = (long double)sv_2nv(fromstr);
2873 aldouble.ld = (long double)SvNV(fromstr);
2875 DO_BO_PACK_N(aldouble, long double);
2876 PUSH_BYTES(utf8, cur, aldouble.bytes, sizeof(aldouble.bytes));
2881 case 'n' | TYPE_IS_SHRIEKING:
2886 ai16 = (I16)SvIV(fromstr);
2887 ai16 = PerlSock_htons(ai16);
2888 PUSH16(utf8, cur, &ai16);
2891 case 'v' | TYPE_IS_SHRIEKING:
2896 ai16 = (I16)SvIV(fromstr);
2898 PUSH16(utf8, cur, &ai16);
2901 case 'S' | TYPE_IS_SHRIEKING:
2902 #if SHORTSIZE != SIZE16
2904 unsigned short aushort;
2906 aushort = SvUV(fromstr);
2907 DO_BO_PACK(aushort, s);
2908 PUSH_VAR(utf8, cur, aushort);
2918 au16 = (U16)SvUV(fromstr);
2919 DO_BO_PACK(au16, 16);
2920 PUSH16(utf8, cur, &au16);
2923 case 's' | TYPE_IS_SHRIEKING:
2924 #if SHORTSIZE != SIZE16
2928 ashort = SvIV(fromstr);
2929 DO_BO_PACK(ashort, s);
2930 PUSH_VAR(utf8, cur, ashort);
2940 ai16 = (I16)SvIV(fromstr);
2941 DO_BO_PACK(ai16, 16);
2942 PUSH16(utf8, cur, &ai16);
2946 case 'I' | TYPE_IS_SHRIEKING:
2950 auint = SvUV(fromstr);
2951 DO_BO_PACK(auint, i);
2952 PUSH_VAR(utf8, cur, auint);
2959 aiv = SvIV(fromstr);
2960 #if IVSIZE == INTSIZE
2962 #elif IVSIZE == LONGSIZE
2964 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
2965 DO_BO_PACK(aiv, 64);
2967 Perl_croak(aTHX_ "'j' not supported on this platform");
2969 PUSH_VAR(utf8, cur, aiv);
2976 auv = SvUV(fromstr);
2977 #if UVSIZE == INTSIZE
2979 #elif UVSIZE == LONGSIZE
2981 #elif defined(HAS_QUAD) && UVSIZE == U64SIZE
2982 DO_BO_PACK(auv, 64);
2984 Perl_croak(aTHX_ "'J' not supported on this platform");
2986 PUSH_VAR(utf8, cur, auv);
2993 anv = SvNV(fromstr);
2997 SvCUR_set(cat, cur - start);
2998 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
3001 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
3002 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
3003 any negative IVs will have already been got by the croak()
3004 above. IOK is untrue for fractions, so we test them
3005 against UV_MAX_P1. */
3006 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
3007 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
3008 char *in = buf + sizeof(buf);
3009 UV auv = SvUV(fromstr);
3012 *--in = (char)((auv & 0x7f) | 0x80);
3015 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
3016 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3017 in, (buf + sizeof(buf)) - in);
3018 } else if (SvPOKp(fromstr))
3020 else if (SvNOKp(fromstr)) {
3021 /* 10**NV_MAX_10_EXP is the largest power of 10
3022 so 10**(NV_MAX_10_EXP+1) is definitely unrepresentable
3023 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
3024 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
3025 And with that many bytes only Inf can overflow.
3026 Some C compilers are strict about integral constant
3027 expressions so we conservatively divide by a slightly
3028 smaller integer instead of multiplying by the exact
3029 floating-point value.
3031 #ifdef NV_MAX_10_EXP
3032 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
3033 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
3035 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
3036 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
3038 char *in = buf + sizeof(buf);
3040 anv = Perl_floor(anv);
3042 const NV next = Perl_floor(anv / 128);
3043 if (in <= buf) /* this cannot happen ;-) */
3044 Perl_croak(aTHX_ "Cannot compress integer in pack");
3045 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
3048 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
3049 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3050 in, (buf + sizeof(buf)) - in);
3059 /* Copy string and check for compliance */
3060 from = SvPV_const(fromstr, len);
3061 if ((norm = is_an_int(from, len)) == NULL)
3062 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
3064 Newx(result, len, char);
3067 while (!done) *--in = div128(norm, &done) | 0x80;
3068 result[len - 1] &= 0x7F; /* clear continue bit */
3069 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3070 in, (result + len) - in);
3072 SvREFCNT_dec(norm); /* free norm */
3077 case 'i' | TYPE_IS_SHRIEKING:
3081 aint = SvIV(fromstr);
3082 DO_BO_PACK(aint, i);
3083 PUSH_VAR(utf8, cur, aint);
3086 case 'N' | TYPE_IS_SHRIEKING:
3091 au32 = SvUV(fromstr);
3092 au32 = PerlSock_htonl(au32);
3093 PUSH32(utf8, cur, &au32);
3096 case 'V' | TYPE_IS_SHRIEKING:
3101 au32 = SvUV(fromstr);
3103 PUSH32(utf8, cur, &au32);
3106 case 'L' | TYPE_IS_SHRIEKING:
3107 #if LONGSIZE != SIZE32
3109 unsigned long aulong;
3111 aulong = SvUV(fromstr);
3112 DO_BO_PACK(aulong, l);
3113 PUSH_VAR(utf8, cur, aulong);
3123 au32 = SvUV(fromstr);
3124 DO_BO_PACK(au32, 32);
3125 PUSH32(utf8, cur, &au32);
3128 case 'l' | TYPE_IS_SHRIEKING:
3129 #if LONGSIZE != SIZE32
3133 along = SvIV(fromstr);
3134 DO_BO_PACK(along, l);
3135 PUSH_VAR(utf8, cur, along);
3145 ai32 = SvIV(fromstr);
3146 DO_BO_PACK(ai32, 32);
3147 PUSH32(utf8, cur, &ai32);
3155 auquad = (Uquad_t) SvUV(fromstr);
3156 DO_BO_PACK(auquad, 64);
3157 PUSH_VAR(utf8, cur, auquad);
3164 aquad = (Quad_t)SvIV(fromstr);
3165 DO_BO_PACK(aquad, 64);
3166 PUSH_VAR(utf8, cur, aquad);
3169 #endif /* HAS_QUAD */
3171 len = 1; /* assume SV is correct length */
3172 GROWING(utf8, cat, start, cur, sizeof(char *));
3179 SvGETMAGIC(fromstr);
3180 if (!SvOK(fromstr)) aptr = NULL;
3182 /* XXX better yet, could spirit away the string to
3183 * a safe spot and hang on to it until the result
3184 * of pack() (and all copies of the result) are
3187 if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
3188 !SvREADONLY(fromstr)))) {
3189 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3190 "Attempt to pack pointer to temporary value");
3192 if (SvPOK(fromstr) || SvNIOK(fromstr))
3193 aptr = SvPV_nomg_const_nolen(fromstr);
3195 aptr = SvPV_force_flags_nolen(fromstr, 0);
3197 DO_BO_PACK_PC(aptr);
3198 PUSH_VAR(utf8, cur, aptr);
3202 const char *aptr, *aend;
3206 if (len <= 2) len = 45;
3207 else len = len / 3 * 3;
3209 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3210 "Field too wide in 'u' format in pack");
3213 aptr = SvPV_const(fromstr, fromlen);
3214 from_utf8 = DO_UTF8(fromstr);
3216 aend = aptr + fromlen;
3217 fromlen = sv_len_utf8_nomg(fromstr);
3218 } else aend = NULL; /* Unused, but keep compilers happy */
3219 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
3220 while (fromlen > 0) {
3223 U8 hunk[1+63/3*4+1];
3225 if ((I32)fromlen > len)
3231 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3232 'u' | TYPE_IS_PACK)) {
3234 SvCUR_set(cat, cur - start);
3235 Perl_croak(aTHX_ "panic: string is shorter than advertised, "
3236 "aptr=%p, aend=%p, buffer=%p, todo=%ld",
3237 aptr, aend, buffer, (long) todo);
3239 end = doencodes(hunk, buffer, todo);
3241 end = doencodes(hunk, aptr, todo);
3244 PUSH_BYTES(utf8, cur, hunk, end-hunk);
3251 SvCUR_set(cat, cur - start);
3253 *symptr = lookahead;
3262 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
3265 SV *pat_sv = *++MARK;
3266 const char *pat = SvPV_const(pat_sv, fromlen);
3267 const char *patend = pat + fromlen;
3273 packlist(cat, pat, patend, MARK, SP + 1);
3283 * c-indentation-style: bsd
3285 * indent-tabs-mode: nil
3288 * ex: set ts=8 sts=4 sw=4 et: