3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
18 * [p.653 of _The Lord of the Rings_, IV/iv: "Of Herbs and Stewed Rabbit"]
21 /* This file contains pp ("push/pop") functions that
22 * execute the opcodes that make up a perl program. A typical pp function
23 * expects to find its arguments on the stack, and usually pushes its
24 * results onto the stack, hence the 'pp' terminology. Each OP structure
25 * contains a pointer to the relevant pp_foo() function.
27 * This particular file just contains pp_pack() and pp_unpack(). See the
28 * other pp*.c files for the rest of the pp_ functions.
32 #define PERL_IN_PP_PACK_C
35 /* Types used by pack/unpack */
37 e_no_len, /* no length */
38 e_number, /* number, [] */
42 typedef struct tempsym {
43 const char* patptr; /* current template char */
44 const char* patend; /* one after last char */
45 const char* grpbeg; /* 1st char of ()-group */
46 const char* grpend; /* end of ()-group */
47 I32 code; /* template code (!<>) */
48 I32 length; /* length/repeat count */
49 howlen_t howlen; /* how length is given */
50 int level; /* () nesting level */
51 U32 flags; /* /=4, comma=2, pack=1 */
52 /* and group modifiers */
53 STRLEN strbeg; /* offset of group start */
54 struct tempsym *previous; /* previous group */
57 #define TEMPSYM_INIT(symptr, p, e, f) \
59 (symptr)->patptr = (p); \
60 (symptr)->patend = (e); \
61 (symptr)->grpbeg = NULL; \
62 (symptr)->grpend = NULL; \
63 (symptr)->grpend = NULL; \
65 (symptr)->length = 0; \
66 (symptr)->howlen = e_no_len; \
67 (symptr)->level = 0; \
68 (symptr)->flags = (f); \
69 (symptr)->strbeg = 0; \
70 (symptr)->previous = NULL; \
78 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
81 U8 bytes[sizeof(long double)];
88 /* Maximum number of bytes to which a byte can grow due to upgrade */
92 * Offset for integer pack/unpack.
94 * On architectures where I16 and I32 aren't really 16 and 32 bits,
95 * which for now are all Crays, pack and unpack have to play games.
99 * These values are required for portability of pack() output.
100 * If they're not right on your machine, then pack() and unpack()
101 * wouldn't work right anyway; you'll need to apply the Cray hack.
102 * (I'd like to check them with #if, but you can't use sizeof() in
103 * the preprocessor.) --???
106 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
107 defines are now in config.h. --Andy Dougherty April 1998
112 /* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
115 #if U16SIZE > SIZE16 || U32SIZE > SIZE32
116 # if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
117 # define OFF16(p) ((char*)(p))
118 # define OFF32(p) ((char*)(p))
120 # if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
121 # define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
122 # define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
124 ++++ bad cray byte order
128 # define OFF16(p) ((char *) (p))
129 # define OFF32(p) ((char *) (p))
132 /* Only to be used inside a loop (see the break) */
133 #define SHIFT16(utf8, s, strend, p, datumtype) STMT_START { \
135 if (!uni_to_bytes(aTHX_ &(s), strend, OFF16(p), SIZE16, datumtype)) break; \
137 Copy(s, OFF16(p), SIZE16, char); \
142 /* Only to be used inside a loop (see the break) */
143 #define SHIFT32(utf8, s, strend, p, datumtype) STMT_START { \
145 if (!uni_to_bytes(aTHX_ &(s), strend, OFF32(p), SIZE32, datumtype)) break; \
147 Copy(s, OFF32(p), SIZE32, char); \
152 #define PUSH16(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF16(p), SIZE16)
153 #define PUSH32(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF32(p), SIZE32)
155 /* Only to be used inside a loop (see the break) */
156 #define SHIFT_BYTES(utf8, s, strend, buf, len, datumtype) \
159 if (!uni_to_bytes(aTHX_ &s, strend, \
160 (char *) (buf), len, datumtype)) break; \
162 Copy(s, (char *) (buf), len, char); \
167 #define SHIFT_VAR(utf8, s, strend, var, datumtype) \
168 SHIFT_BYTES(utf8, s, strend, &(var), sizeof(var), datumtype)
170 #define PUSH_VAR(utf8, aptr, var) \
171 PUSH_BYTES(utf8, aptr, &(var), sizeof(var))
173 /* Avoid stack overflow due to pathological templates. 100 should be plenty. */
174 #define MAX_SUB_TEMPLATE_LEVEL 100
176 /* flags (note that type modifiers can also be used as flags!) */
177 #define FLAG_WAS_UTF8 0x40
178 #define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
179 #define FLAG_UNPACK_ONLY_ONE 0x10
180 #define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
181 #define FLAG_SLASH 0x04
182 #define FLAG_COMMA 0x02
183 #define FLAG_PACK 0x01
186 S_mul128(pTHX_ SV *sv, U8 m)
189 char *s = SvPV(sv, len);
192 PERL_ARGS_ASSERT_MUL128;
194 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
195 SV * const tmpNew = newSVpvs("0000000000");
197 sv_catsv(tmpNew, sv);
198 SvREFCNT_dec(sv); /* free old sv */
203 while (!*t) /* trailing '\0'? */
206 const U32 i = ((*t - '0') << 7) + m;
207 *(t--) = '0' + (char)(i % 10);
213 /* Explosives and implosives. */
215 #if 'I' == 73 && 'J' == 74
216 /* On an ASCII/ISO kind of system */
217 #define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
220 Some other sort of character set - use memchr() so we don't match
223 #define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
227 #define TYPE_IS_SHRIEKING 0x100
228 #define TYPE_IS_BIG_ENDIAN 0x200
229 #define TYPE_IS_LITTLE_ENDIAN 0x400
230 #define TYPE_IS_PACK 0x800
231 #define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
232 #define TYPE_MODIFIERS(t) ((t) & ~0xFF)
233 #define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
235 # define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
236 # define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
238 # define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
240 # define DO_BO_UNPACK(var, type) \
242 switch (TYPE_ENDIANNESS(datumtype)) { \
243 case TYPE_IS_BIG_ENDIAN: var = my_betoh ## type (var); break; \
244 case TYPE_IS_LITTLE_ENDIAN: var = my_letoh ## type (var); break; \
249 # define DO_BO_PACK(var, type) \
251 switch (TYPE_ENDIANNESS(datumtype)) { \
252 case TYPE_IS_BIG_ENDIAN: var = my_htobe ## type (var); break; \
253 case TYPE_IS_LITTLE_ENDIAN: var = my_htole ## type (var); break; \
258 # define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast) \
260 switch (TYPE_ENDIANNESS(datumtype)) { \
261 case TYPE_IS_BIG_ENDIAN: \
262 var = (post_cast*) my_betoh ## type ((pre_cast) var); \
264 case TYPE_IS_LITTLE_ENDIAN: \
265 var = (post_cast *) my_letoh ## type ((pre_cast) var); \
272 # define DO_BO_PACK_PTR(var, type, pre_cast, post_cast) \
274 switch (TYPE_ENDIANNESS(datumtype)) { \
275 case TYPE_IS_BIG_ENDIAN: \
276 var = (post_cast *) my_htobe ## type ((pre_cast) var); \
278 case TYPE_IS_LITTLE_ENDIAN: \
279 var = (post_cast *) my_htole ## type ((pre_cast) var); \
286 # define BO_CANT_DOIT(action, type) \
288 switch (TYPE_ENDIANNESS(datumtype)) { \
289 case TYPE_IS_BIG_ENDIAN: \
290 Perl_croak(aTHX_ "Can't %s big-endian %ss on this " \
291 "platform", #action, #type); \
293 case TYPE_IS_LITTLE_ENDIAN: \
294 Perl_croak(aTHX_ "Can't %s little-endian %ss on this " \
295 "platform", #action, #type); \
302 # if PTRSIZE == INTSIZE
303 # define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, i, int, void)
304 # define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, i, int, void)
305 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, i, int, char)
306 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, i, int, char)
307 # elif PTRSIZE == LONGSIZE
308 # if LONGSIZE < IVSIZE && IVSIZE == 8
309 # define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, 64, IV, void)
310 # define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, 64, IV, void)
311 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, 64, IV, char)
312 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, 64, IV, char)
314 # define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, IV, void)
315 # define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, IV, void)
316 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, IV, char)
317 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, IV, char)
319 # elif PTRSIZE == IVSIZE
320 # define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, IV, void)
321 # define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, IV, void)
322 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, IV, char)
323 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, IV, char)
325 # define DO_BO_UNPACK_P(var) BO_CANT_DOIT(unpack, pointer)
326 # define DO_BO_PACK_P(var) BO_CANT_DOIT(pack, pointer)
327 # define DO_BO_UNPACK_PC(var) BO_CANT_DOIT(unpack, pointer)
328 # define DO_BO_PACK_PC(var) BO_CANT_DOIT(pack, pointer)
331 # if defined(my_htolen) && defined(my_letohn) && \
332 defined(my_htoben) && defined(my_betohn)
333 # define DO_BO_UNPACK_N(var, type) \
335 switch (TYPE_ENDIANNESS(datumtype)) { \
336 case TYPE_IS_BIG_ENDIAN: my_betohn(&var, sizeof(type)); break;\
337 case TYPE_IS_LITTLE_ENDIAN: my_letohn(&var, sizeof(type)); break;\
342 # define DO_BO_PACK_N(var, type) \
344 switch (TYPE_ENDIANNESS(datumtype)) { \
345 case TYPE_IS_BIG_ENDIAN: my_htoben(&var, sizeof(type)); break;\
346 case TYPE_IS_LITTLE_ENDIAN: my_htolen(&var, sizeof(type)); break;\
351 # define DO_BO_UNPACK_N(var, type) BO_CANT_DOIT(unpack, type)
352 # define DO_BO_PACK_N(var, type) BO_CANT_DOIT(pack, type)
355 #define PACK_SIZE_CANNOT_CSUM 0x80
356 #define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
357 #define PACK_SIZE_MASK 0x3F
359 #include "packsizetables.c"
362 uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
365 UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
366 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
367 /* We try to process malformed UTF-8 as much as possible (preferably with
368 warnings), but these two mean we make no progress in the string and
369 might enter an infinite loop */
370 if (retlen == (STRLEN) -1 || retlen == 0)
371 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
372 (int) TYPE_NO_MODIFIERS(datumtype));
374 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
375 "Character in '%c' format wrapped in unpack",
376 (int) TYPE_NO_MODIFIERS(datumtype));
383 #define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
384 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
388 uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
392 const char *from = *s;
394 const U32 flags = ckWARN(WARN_UTF8) ?
395 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
396 for (;buf_len > 0; buf_len--) {
397 if (from >= end) return FALSE;
398 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
399 if (retlen == (STRLEN) -1 || retlen == 0) {
400 from += UTF8SKIP(from);
402 } else from += retlen;
407 *(U8 *)buf++ = (U8)val;
409 /* We have enough characters for the buffer. Did we have problems ? */
412 /* Rewalk the string fragment while warning */
414 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
415 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
416 if (ptr >= end) break;
417 utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
419 if (from > end) from = end;
422 Perl_ck_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
423 WARN_PACK : WARN_UNPACK),
424 "Character(s) in '%c' format wrapped in %s",
425 (int) TYPE_NO_MODIFIERS(datumtype),
426 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
433 next_uni_uu(pTHX_ const char **s, const char *end, I32 *out)
437 const UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
438 if (val >= 0x100 || !ISUUCHAR(val) ||
439 retlen == (STRLEN) -1 || retlen == 0) {
443 *out = PL_uudmap[val] & 077;
449 S_bytes_to_uni(const U8 *start, STRLEN len, char *dest) {
450 const U8 * const end = start + len;
452 PERL_ARGS_ASSERT_BYTES_TO_UNI;
454 while (start < end) {
455 const UV uv = NATIVE_TO_ASCII(*start);
456 if (UNI_IS_INVARIANT(uv))
457 *dest++ = (char)(U8)UTF_TO_NATIVE(uv);
459 *dest++ = (char)(U8)UTF8_EIGHT_BIT_HI(uv);
460 *dest++ = (char)(U8)UTF8_EIGHT_BIT_LO(uv);
467 #define PUSH_BYTES(utf8, cur, buf, len) \
470 (cur) = bytes_to_uni((U8 *) buf, len, (cur)); \
472 Copy(buf, cur, len, char); \
477 #define GROWING(utf8, cat, start, cur, in_len) \
479 STRLEN glen = (in_len); \
480 if (utf8) glen *= UTF8_EXPAND; \
481 if ((cur) + glen >= (start) + SvLEN(cat)) { \
482 (start) = sv_exp_grow(cat, glen); \
483 (cur) = (start) + SvCUR(cat); \
487 #define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
489 const STRLEN glen = (in_len); \
491 if (utf8) gl *= UTF8_EXPAND; \
492 if ((cur) + gl >= (start) + SvLEN(cat)) { \
494 SvCUR_set((cat), (cur) - (start)); \
495 (start) = sv_exp_grow(cat, gl); \
496 (cur) = (start) + SvCUR(cat); \
498 PUSH_BYTES(utf8, cur, buf, glen); \
501 #define PUSH_BYTE(utf8, s, byte) \
504 const U8 au8 = (byte); \
505 (s) = bytes_to_uni(&au8, 1, (s)); \
506 } else *(U8 *)(s)++ = (byte); \
509 /* Only to be used inside a loop (see the break) */
510 #define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
513 if (str >= end) break; \
514 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
515 if (retlen == (STRLEN) -1 || retlen == 0) { \
517 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
522 static const char *_action( const tempsym_t* symptr )
524 return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
527 /* Returns the sizeof() struct described by pat */
529 S_measure_struct(pTHX_ tempsym_t* symptr)
533 PERL_ARGS_ASSERT_MEASURE_STRUCT;
535 while (next_symbol(symptr)) {
539 switch (symptr->howlen) {
541 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
545 /* e_no_len and e_number */
546 len = symptr->length;
550 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
553 /* endianness doesn't influence the size of a type */
554 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
556 Perl_croak(aTHX_ "Invalid type '%c' in %s",
557 (int)TYPE_NO_MODIFIERS(symptr->code),
559 case '.' | TYPE_IS_SHRIEKING:
560 case '@' | TYPE_IS_SHRIEKING:
564 case 'U': /* XXXX Is it correct? */
567 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
568 (int) TYPE_NO_MODIFIERS(symptr->code),
575 tempsym_t savsym = *symptr;
576 symptr->patptr = savsym.grpbeg;
577 symptr->patend = savsym.grpend;
578 /* XXXX Theoretically, we need to measure many times at
579 different positions, since the subexpression may contain
580 alignment commands, but be not of aligned length.
581 Need to detect this and croak(). */
582 size = measure_struct(symptr);
586 case 'X' | TYPE_IS_SHRIEKING:
587 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
589 if (!len) /* Avoid division by 0 */
591 len = total % len; /* Assumed: the start is aligned. */
596 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
598 case 'x' | TYPE_IS_SHRIEKING:
599 if (!len) /* Avoid division by 0 */
601 star = total % len; /* Assumed: the start is aligned. */
602 if (star) /* Other portable ways? */
626 size = sizeof(char*);
636 /* locate matching closing parenthesis or bracket
637 * returns char pointer to char after match, or NULL
640 S_group_end(pTHX_ const char *patptr, const char *patend, char ender)
642 PERL_ARGS_ASSERT_GROUP_END;
644 while (patptr < patend) {
645 const char c = *patptr++;
652 while (patptr < patend && *patptr != '\n')
656 patptr = group_end(patptr, patend, ')') + 1;
658 patptr = group_end(patptr, patend, ']') + 1;
660 Perl_croak(aTHX_ "No group ending character '%c' found in template",
666 /* Convert unsigned decimal number to binary.
667 * Expects a pointer to the first digit and address of length variable
668 * Advances char pointer to 1st non-digit char and returns number
671 S_get_num(pTHX_ const char *patptr, I32 *lenptr )
673 I32 len = *patptr++ - '0';
675 PERL_ARGS_ASSERT_GET_NUM;
677 while (isDIGIT(*patptr)) {
678 if (len >= 0x7FFFFFFF/10)
679 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
680 len = (len * 10) + (*patptr++ - '0');
686 /* The marvellous template parsing routine: Using state stored in *symptr,
687 * locates next template code and count
690 S_next_symbol(pTHX_ tempsym_t* symptr )
692 const char* patptr = symptr->patptr;
693 const char* const patend = symptr->patend;
695 PERL_ARGS_ASSERT_NEXT_SYMBOL;
697 symptr->flags &= ~FLAG_SLASH;
699 while (patptr < patend) {
700 if (isSPACE(*patptr))
702 else if (*patptr == '#') {
704 while (patptr < patend && *patptr != '\n')
709 /* We should have found a template code */
710 I32 code = *patptr++ & 0xFF;
711 U32 inherited_modifiers = 0;
713 if (code == ','){ /* grandfather in commas but with a warning */
714 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
715 symptr->flags |= FLAG_COMMA;
716 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
717 "Invalid type ',' in %s", _action( symptr ) );
722 /* for '(', skip to ')' */
724 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
725 Perl_croak(aTHX_ "()-group starts with a count in %s",
727 symptr->grpbeg = patptr;
728 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
729 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
730 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
734 /* look for group modifiers to inherit */
735 if (TYPE_ENDIANNESS(symptr->flags)) {
736 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
737 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
740 /* look for modifiers */
741 while (patptr < patend) {
746 modifier = TYPE_IS_SHRIEKING;
747 allowed = "sSiIlLxXnNvV@.";
750 modifier = TYPE_IS_BIG_ENDIAN;
751 allowed = ENDIANNESS_ALLOWED_TYPES;
754 modifier = TYPE_IS_LITTLE_ENDIAN;
755 allowed = ENDIANNESS_ALLOWED_TYPES;
766 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
767 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
768 allowed, _action( symptr ) );
770 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
771 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
772 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
773 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
774 TYPE_ENDIANNESS_MASK)
775 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
776 *patptr, _action( symptr ) );
778 if ((code & modifier)) {
779 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
780 "Duplicate modifier '%c' after '%c' in %s",
781 *patptr, (int) TYPE_NO_MODIFIERS(code),
789 /* inherit modifiers */
790 code |= inherited_modifiers;
792 /* look for count and/or / */
793 if (patptr < patend) {
794 if (isDIGIT(*patptr)) {
795 patptr = get_num( patptr, &symptr->length );
796 symptr->howlen = e_number;
798 } else if (*patptr == '*') {
800 symptr->howlen = e_star;
802 } else if (*patptr == '[') {
803 const char* lenptr = ++patptr;
804 symptr->howlen = e_number;
805 patptr = group_end( patptr, patend, ']' ) + 1;
806 /* what kind of [] is it? */
807 if (isDIGIT(*lenptr)) {
808 lenptr = get_num( lenptr, &symptr->length );
810 Perl_croak(aTHX_ "Malformed integer in [] in %s",
813 tempsym_t savsym = *symptr;
814 symptr->patend = patptr-1;
815 symptr->patptr = lenptr;
816 savsym.length = measure_struct(symptr);
820 symptr->howlen = e_no_len;
825 while (patptr < patend) {
826 if (isSPACE(*patptr))
828 else if (*patptr == '#') {
830 while (patptr < patend && *patptr != '\n')
835 if (*patptr == '/') {
836 symptr->flags |= FLAG_SLASH;
838 if (patptr < patend &&
839 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
840 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
847 /* at end - no count, no / */
848 symptr->howlen = e_no_len;
853 symptr->patptr = patptr;
857 symptr->patptr = patptr;
862 There is no way to cleanly handle the case where we should process the
863 string per byte in its upgraded form while it's really in downgraded form
864 (e.g. estimates like strend-s as an upper bound for the number of
865 characters left wouldn't work). So if we foresee the need of this
866 (pattern starts with U or contains U0), we want to work on the encoded
867 version of the string. Users are advised to upgrade their pack string
868 themselves if they need to do a lot of unpacks like this on it
871 need_utf8(const char *pat, const char *patend)
875 PERL_ARGS_ASSERT_NEED_UTF8;
877 while (pat < patend) {
880 pat = (const char *) memchr(pat, '\n', patend-pat);
881 if (!pat) return FALSE;
882 } else if (pat[0] == 'U') {
883 if (first || pat[1] == '0') return TRUE;
884 } else first = FALSE;
891 first_symbol(const char *pat, const char *patend) {
892 PERL_ARGS_ASSERT_FIRST_SYMBOL;
894 while (pat < patend) {
895 if (pat[0] != '#') return pat[0];
897 pat = (const char *) memchr(pat, '\n', patend-pat);
905 =for apidoc unpackstring
907 The engine implementing the unpack() Perl function.
909 Using the template pat..patend, this function unpacks the string
910 s..strend into a number of mortal SVs, which it pushes onto the perl
911 argument (@_) stack (so you will need to issue a C<PUTBACK> before and
912 C<SPAGAIN> after the call to this function). It returns the number of
915 The strend and patend pointers should point to the byte following the last
916 character of each string.
918 Although this function returns its values on the perl argument stack, it
919 doesn't take any parameters from that stack (and thus in particular
920 there's no need to do a PUSHMARK before calling it, unlike L</call_pv> for
926 Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
930 PERL_ARGS_ASSERT_UNPACKSTRING;
932 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
933 else if (need_utf8(pat, patend)) {
934 /* We probably should try to avoid this in case a scalar context call
935 wouldn't get to the "U0" */
936 STRLEN len = strend - s;
937 s = (char *) bytes_to_utf8((U8 *) s, &len);
940 flags |= FLAG_DO_UTF8;
943 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
944 flags |= FLAG_PARSE_UTF8;
946 TEMPSYM_INIT(&sym, pat, patend, flags);
948 return unpack_rec(&sym, s, s, strend, NULL );
952 S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
956 const I32 start_sp_offset = SP - PL_stack_base;
961 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
963 bool explicit_length;
964 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
965 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
967 PERL_ARGS_ASSERT_UNPACK_REC;
969 symptr->strbeg = s - strbeg;
971 while (next_symbol(symptr)) {
974 I32 datumtype = symptr->code;
975 /* do first one only unless in list context
976 / is implemented by unpacking the count, then popping it from the
977 stack, so must check that we're not in the middle of a / */
979 && (SP - PL_stack_base == start_sp_offset + 1)
980 && (datumtype != '/') ) /* XXX can this be omitted */
983 switch (howlen = symptr->howlen) {
985 len = strend - strbeg; /* long enough */
988 /* e_no_len and e_number */
989 len = symptr->length;
993 explicit_length = TRUE;
995 beyond = s >= strend;
997 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
999 /* props nonzero means we can process this letter. */
1000 const long size = props & PACK_SIZE_MASK;
1001 const long howmany = (strend - s) / size;
1005 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
1006 if (len && unpack_only_one) len = 1;
1012 switch(TYPE_NO_ENDIANNESS(datumtype)) {
1014 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
1017 if (howlen == e_no_len)
1018 len = 16; /* len is not specified */
1026 tempsym_t savsym = *symptr;
1027 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
1028 symptr->flags |= group_modifiers;
1029 symptr->patend = savsym.grpend;
1030 symptr->previous = &savsym;
1033 if (len && unpack_only_one) len = 1;
1035 symptr->patptr = savsym.grpbeg;
1036 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
1037 else symptr->flags &= ~FLAG_PARSE_UTF8;
1038 unpack_rec(symptr, s, strbeg, strend, &s);
1039 if (s == strend && savsym.howlen == e_star)
1040 break; /* No way to continue */
1043 savsym.flags = symptr->flags & ~group_modifiers;
1047 case '.' | TYPE_IS_SHRIEKING:
1051 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
1052 if (howlen == e_star) from = strbeg;
1053 else if (len <= 0) from = s;
1055 tempsym_t *group = symptr;
1057 while (--len && group) group = group->previous;
1058 from = group ? strbeg + group->strbeg : strbeg;
1061 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
1062 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
1066 case '@' | TYPE_IS_SHRIEKING:
1068 s = strbeg + symptr->strbeg;
1069 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
1073 Perl_croak(aTHX_ "'@' outside of string in unpack");
1078 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
1081 Perl_croak(aTHX_ "'@' outside of string in unpack");
1085 case 'X' | TYPE_IS_SHRIEKING:
1086 if (!len) /* Avoid division by 0 */
1089 const char *hop, *last;
1091 hop = last = strbeg;
1093 hop += UTF8SKIP(hop);
1100 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1104 len = (s - strbeg) % len;
1110 Perl_croak(aTHX_ "'X' outside of string in unpack");
1111 while (--s, UTF8_IS_CONTINUATION(*s)) {
1113 Perl_croak(aTHX_ "'X' outside of string in unpack");
1118 if (len > s - strbeg)
1119 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1123 case 'x' | TYPE_IS_SHRIEKING: {
1125 if (!len) /* Avoid division by 0 */
1127 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1128 else ai32 = (s - strbeg) % len;
1129 if (ai32 == 0) break;
1137 Perl_croak(aTHX_ "'x' outside of string in unpack");
1142 if (len > strend - s)
1143 Perl_croak(aTHX_ "'x' outside of string in unpack");
1148 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1154 /* Preliminary length estimate is assumed done in 'W' */
1155 if (len > strend - s) len = strend - s;
1161 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1162 if (hop >= strend) {
1164 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1169 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1171 } else if (len > strend - s)
1174 if (datumtype == 'Z') {
1175 /* 'Z' strips stuff after first null */
1176 const char *ptr, *end;
1178 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
1179 sv = newSVpvn(s, ptr-s);
1180 if (howlen == e_star) /* exact for 'Z*' */
1181 len = ptr-s + (ptr != strend ? 1 : 0);
1182 } else if (datumtype == 'A') {
1183 /* 'A' strips both nulls and spaces */
1185 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1186 for (ptr = s+len-1; ptr >= s; ptr--)
1187 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
1188 !isSPACE_utf8(ptr)) break;
1189 if (ptr >= s) ptr += UTF8SKIP(ptr);
1192 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1194 for (ptr = s+len-1; ptr >= s; ptr--)
1195 if (*ptr != 0 && !isSPACE(*ptr)) break;
1198 sv = newSVpvn(s, ptr-s);
1199 } else sv = newSVpvn(s, len);
1203 /* Undo any upgrade done due to need_utf8() */
1204 if (!(symptr->flags & FLAG_WAS_UTF8))
1205 sv_utf8_downgrade(sv, 0);
1213 if (howlen == e_star || len > (strend - s) * 8)
1214 len = (strend - s) * 8;
1217 while (len >= 8 && s < strend) {
1218 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
1223 cuv += PL_bitcount[*(U8 *)s++];
1226 if (len && s < strend) {
1228 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1229 if (datumtype == 'b')
1231 if (bits & 1) cuv++;
1236 if (bits & 0x80) cuv++;
1243 sv = sv_2mortal(newSV(len ? len : 1));
1246 if (datumtype == 'b') {
1248 const I32 ai32 = len;
1249 for (len = 0; len < ai32; len++) {
1250 if (len & 7) bits >>= 1;
1252 if (s >= strend) break;
1253 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1254 } else bits = *(U8 *) s++;
1255 *str++ = bits & 1 ? '1' : '0';
1259 const I32 ai32 = len;
1260 for (len = 0; len < ai32; len++) {
1261 if (len & 7) bits <<= 1;
1263 if (s >= strend) break;
1264 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1265 } else bits = *(U8 *) s++;
1266 *str++ = bits & 0x80 ? '1' : '0';
1270 SvCUR_set(sv, str - SvPVX_const(sv));
1277 /* Preliminary length estimate, acceptable for utf8 too */
1278 if (howlen == e_star || len > (strend - s) * 2)
1279 len = (strend - s) * 2;
1281 sv = sv_2mortal(newSV(len ? len : 1));
1285 if (datumtype == 'h') {
1288 for (len = 0; len < ai32; len++) {
1289 if (len & 1) bits >>= 4;
1291 if (s >= strend) break;
1292 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1293 } else bits = * (U8 *) s++;
1295 *str++ = PL_hexdigit[bits & 15];
1299 const I32 ai32 = len;
1300 for (len = 0; len < ai32; len++) {
1301 if (len & 1) bits <<= 4;
1303 if (s >= strend) break;
1304 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1305 } else bits = *(U8 *) s++;
1307 *str++ = PL_hexdigit[(bits >> 4) & 15];
1312 SvCUR_set(sv, str - SvPVX_const(sv));
1319 if (explicit_length)
1320 /* Switch to "character" mode */
1321 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1326 while (len-- > 0 && s < strend) {
1331 aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1332 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1333 if (retlen == (STRLEN) -1 || retlen == 0)
1334 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1338 aint = *(U8 *)(s)++;
1339 if (aint >= 128 && datumtype != 'C') /* fake up signed chars */
1343 else if (checksum > bits_in_uv)
1344 cdouble += (NV)aint;
1352 while (len-- > 0 && s < strend) {
1354 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1355 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1356 if (retlen == (STRLEN) -1 || retlen == 0)
1357 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1361 else if (checksum > bits_in_uv)
1362 cdouble += (NV) val;
1366 } else if (!checksum)
1368 const U8 ch = *(U8 *) s++;
1371 else if (checksum > bits_in_uv)
1372 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1374 while (len-- > 0) cuv += *(U8 *) s++;
1378 if (explicit_length && howlen != e_star) {
1379 /* Switch to "bytes in UTF-8" mode */
1380 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
1382 /* Should be impossible due to the need_utf8() test */
1383 Perl_croak(aTHX_ "U0 mode on a byte string");
1387 if (len > strend - s) len = strend - s;
1389 if (len && unpack_only_one) len = 1;
1393 while (len-- > 0 && s < strend) {
1397 U8 result[UTF8_MAXLEN];
1398 const char *ptr = s;
1400 /* Bug: warns about bad utf8 even if we are short on bytes
1401 and will break out of the loop */
1402 if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1405 len = UTF8SKIP(result);
1406 if (!uni_to_bytes(aTHX_ &ptr, strend,
1407 (char *) &result[1], len-1, 'U')) break;
1408 auv = utf8n_to_uvuni(result, len, &retlen, UTF8_ALLOW_DEFAULT);
1411 auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, UTF8_ALLOW_DEFAULT);
1412 if (retlen == (STRLEN) -1 || retlen == 0)
1413 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1418 else if (checksum > bits_in_uv)
1419 cdouble += (NV) auv;
1424 case 's' | TYPE_IS_SHRIEKING:
1425 #if SHORTSIZE != SIZE16
1428 SHIFT_VAR(utf8, s, strend, ashort, datumtype);
1429 DO_BO_UNPACK(ashort, s);
1432 else if (checksum > bits_in_uv)
1433 cdouble += (NV)ashort;
1445 #if U16SIZE > SIZE16
1448 SHIFT16(utf8, s, strend, &ai16, datumtype);
1449 DO_BO_UNPACK(ai16, 16);
1450 #if U16SIZE > SIZE16
1456 else if (checksum > bits_in_uv)
1457 cdouble += (NV)ai16;
1462 case 'S' | TYPE_IS_SHRIEKING:
1463 #if SHORTSIZE != SIZE16
1465 unsigned short aushort;
1466 SHIFT_VAR(utf8, s, strend, aushort, datumtype);
1467 DO_BO_UNPACK(aushort, s);
1470 else if (checksum > bits_in_uv)
1471 cdouble += (NV)aushort;
1484 #if U16SIZE > SIZE16
1487 SHIFT16(utf8, s, strend, &au16, datumtype);
1488 DO_BO_UNPACK(au16, 16);
1490 if (datumtype == 'n')
1491 au16 = PerlSock_ntohs(au16);
1494 if (datumtype == 'v')
1499 else if (checksum > bits_in_uv)
1500 cdouble += (NV) au16;
1505 case 'v' | TYPE_IS_SHRIEKING:
1506 case 'n' | TYPE_IS_SHRIEKING:
1509 # if U16SIZE > SIZE16
1512 SHIFT16(utf8, s, strend, &ai16, datumtype);
1514 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
1515 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1516 # endif /* HAS_NTOHS */
1518 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
1519 ai16 = (I16) vtohs((U16) ai16);
1520 # endif /* HAS_VTOHS */
1523 else if (checksum > bits_in_uv)
1524 cdouble += (NV) ai16;
1530 case 'i' | TYPE_IS_SHRIEKING:
1533 SHIFT_VAR(utf8, s, strend, aint, datumtype);
1534 DO_BO_UNPACK(aint, i);
1537 else if (checksum > bits_in_uv)
1538 cdouble += (NV)aint;
1544 case 'I' | TYPE_IS_SHRIEKING:
1547 SHIFT_VAR(utf8, s, strend, auint, datumtype);
1548 DO_BO_UNPACK(auint, i);
1551 else if (checksum > bits_in_uv)
1552 cdouble += (NV)auint;
1560 SHIFT_VAR(utf8, s, strend, aiv, datumtype);
1561 #if IVSIZE == INTSIZE
1562 DO_BO_UNPACK(aiv, i);
1563 #elif IVSIZE == LONGSIZE
1564 DO_BO_UNPACK(aiv, l);
1565 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
1566 DO_BO_UNPACK(aiv, 64);
1568 Perl_croak(aTHX_ "'j' not supported on this platform");
1572 else if (checksum > bits_in_uv)
1581 SHIFT_VAR(utf8, s, strend, auv, datumtype);
1582 #if IVSIZE == INTSIZE
1583 DO_BO_UNPACK(auv, i);
1584 #elif IVSIZE == LONGSIZE
1585 DO_BO_UNPACK(auv, l);
1586 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
1587 DO_BO_UNPACK(auv, 64);
1589 Perl_croak(aTHX_ "'J' not supported on this platform");
1593 else if (checksum > bits_in_uv)
1599 case 'l' | TYPE_IS_SHRIEKING:
1600 #if LONGSIZE != SIZE32
1603 SHIFT_VAR(utf8, s, strend, along, datumtype);
1604 DO_BO_UNPACK(along, l);
1607 else if (checksum > bits_in_uv)
1608 cdouble += (NV)along;
1619 #if U32SIZE > SIZE32
1622 SHIFT32(utf8, s, strend, &ai32, datumtype);
1623 DO_BO_UNPACK(ai32, 32);
1624 #if U32SIZE > SIZE32
1625 if (ai32 > 2147483647) ai32 -= 4294967296;
1629 else if (checksum > bits_in_uv)
1630 cdouble += (NV)ai32;
1635 case 'L' | TYPE_IS_SHRIEKING:
1636 #if LONGSIZE != SIZE32
1638 unsigned long aulong;
1639 SHIFT_VAR(utf8, s, strend, aulong, datumtype);
1640 DO_BO_UNPACK(aulong, l);
1643 else if (checksum > bits_in_uv)
1644 cdouble += (NV)aulong;
1657 #if U32SIZE > SIZE32
1660 SHIFT32(utf8, s, strend, &au32, datumtype);
1661 DO_BO_UNPACK(au32, 32);
1663 if (datumtype == 'N')
1664 au32 = PerlSock_ntohl(au32);
1667 if (datumtype == 'V')
1672 else if (checksum > bits_in_uv)
1673 cdouble += (NV)au32;
1678 case 'V' | TYPE_IS_SHRIEKING:
1679 case 'N' | TYPE_IS_SHRIEKING:
1682 #if U32SIZE > SIZE32
1685 SHIFT32(utf8, s, strend, &ai32, datumtype);
1687 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1688 ai32 = (I32)PerlSock_ntohl((U32)ai32);
1691 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1692 ai32 = (I32)vtohl((U32)ai32);
1696 else if (checksum > bits_in_uv)
1697 cdouble += (NV)ai32;
1705 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
1706 DO_BO_UNPACK_PC(aptr);
1707 /* newSVpv generates undef if aptr is NULL */
1708 mPUSHs(newSVpv(aptr, 0));
1716 while (len > 0 && s < strend) {
1718 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1719 auv = (auv << 7) | (ch & 0x7f);
1720 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
1728 if (++bytes >= sizeof(UV)) { /* promote to string */
1731 sv = Perl_newSVpvf(aTHX_ "%.*"UVuf, (int)TYPE_DIGITS(UV), auv);
1732 while (s < strend) {
1733 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1734 sv = mul128(sv, (U8)(ch & 0x7f));
1740 t = SvPV_nolen_const(sv);
1749 if ((s >= strend) && bytes)
1750 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
1754 if (symptr->howlen == e_star)
1755 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
1757 if (s + sizeof(char*) <= strend) {
1759 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
1760 DO_BO_UNPACK_PC(aptr);
1761 /* newSVpvn generates undef if aptr is NULL */
1762 PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP));
1769 SHIFT_VAR(utf8, s, strend, aquad, datumtype);
1770 DO_BO_UNPACK(aquad, 64);
1772 mPUSHs(aquad >= IV_MIN && aquad <= IV_MAX ?
1773 newSViv((IV)aquad) : newSVnv((NV)aquad));
1774 else if (checksum > bits_in_uv)
1775 cdouble += (NV)aquad;
1783 SHIFT_VAR(utf8, s, strend, auquad, datumtype);
1784 DO_BO_UNPACK(auquad, 64);
1786 mPUSHs(auquad <= UV_MAX ?
1787 newSVuv((UV)auquad) : newSVnv((NV)auquad));
1788 else if (checksum > bits_in_uv)
1789 cdouble += (NV)auquad;
1794 #endif /* HAS_QUAD */
1795 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
1799 SHIFT_VAR(utf8, s, strend, afloat, datumtype);
1800 DO_BO_UNPACK_N(afloat, float);
1810 SHIFT_VAR(utf8, s, strend, adouble, datumtype);
1811 DO_BO_UNPACK_N(adouble, double);
1821 SHIFT_BYTES(utf8, s, strend, anv.bytes, sizeof(anv.bytes), datumtype);
1822 DO_BO_UNPACK_N(anv.nv, NV);
1829 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
1833 SHIFT_BYTES(utf8, s, strend, aldouble.bytes, sizeof(aldouble.bytes), datumtype);
1834 DO_BO_UNPACK_N(aldouble.ld, long double);
1836 mPUSHn(aldouble.ld);
1838 cdouble += aldouble.ld;
1844 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
1845 sv = sv_2mortal(newSV(l));
1846 if (l) SvPOK_on(sv);
1849 while (next_uni_uu(aTHX_ &s, strend, &len)) {
1854 next_uni_uu(aTHX_ &s, strend, &a);
1855 next_uni_uu(aTHX_ &s, strend, &b);
1856 next_uni_uu(aTHX_ &s, strend, &c);
1857 next_uni_uu(aTHX_ &s, strend, &d);
1858 hunk[0] = (char)((a << 2) | (b >> 4));
1859 hunk[1] = (char)((b << 4) | (c >> 2));
1860 hunk[2] = (char)((c << 6) | d);
1862 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
1870 /* possible checksum byte */
1871 const char *skip = s+UTF8SKIP(s);
1872 if (skip < strend && *skip == '\n')
1878 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
1882 len = PL_uudmap[*(U8*)s++] & 077;
1884 if (s < strend && ISUUCHAR(*s))
1885 a = PL_uudmap[*(U8*)s++] & 077;
1888 if (s < strend && ISUUCHAR(*s))
1889 b = PL_uudmap[*(U8*)s++] & 077;
1892 if (s < strend && ISUUCHAR(*s))
1893 c = PL_uudmap[*(U8*)s++] & 077;
1896 if (s < strend && ISUUCHAR(*s))
1897 d = PL_uudmap[*(U8*)s++] & 077;
1900 hunk[0] = (char)((a << 2) | (b >> 4));
1901 hunk[1] = (char)((b << 4) | (c >> 2));
1902 hunk[2] = (char)((c << 6) | d);
1904 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
1909 else /* possible checksum byte */
1910 if (s + 1 < strend && s[1] == '\n')
1920 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
1921 (checksum > bits_in_uv &&
1922 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
1925 anv = (NV) (1 << (checksum & 15));
1926 while (checksum >= 16) {
1930 while (cdouble < 0.0)
1932 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
1933 sv = newSVnv(cdouble);
1936 if (checksum < bits_in_uv) {
1937 UV mask = ((UV)1 << checksum) - 1;
1946 if (symptr->flags & FLAG_SLASH){
1947 if (SP - PL_stack_base - start_sp_offset <= 0)
1949 if( next_symbol(symptr) ){
1950 if( symptr->howlen == e_number )
1951 Perl_croak(aTHX_ "Count after length/code in unpack" );
1953 /* ...end of char buffer then no decent length available */
1954 Perl_croak(aTHX_ "length/code after end of string in unpack" );
1956 /* take top of stack (hope it's numeric) */
1959 Perl_croak(aTHX_ "Negative '/' count in unpack" );
1962 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
1964 datumtype = symptr->code;
1965 explicit_length = FALSE;
1973 return SP - PL_stack_base - start_sp_offset;
1981 I32 gimme = GIMME_V;
1984 const char *pat = SvPV_const(left, llen);
1985 const char *s = SvPV_const(right, rlen);
1986 const char *strend = s + rlen;
1987 const char *patend = pat + llen;
1991 cnt = unpackstring(pat, patend, s, strend,
1992 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
1993 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
1996 if ( !cnt && gimme == G_SCALAR )
1997 PUSHs(&PL_sv_undef);
2002 doencodes(U8 *h, const char *s, I32 len)
2004 *h++ = PL_uuemap[len];
2006 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2007 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
2008 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
2009 *h++ = PL_uuemap[(077 & (s[2] & 077))];
2014 const char r = (len > 1 ? s[1] : '\0');
2015 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2016 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
2017 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
2018 *h++ = PL_uuemap[0];
2025 S_is_an_int(pTHX_ const char *s, STRLEN l)
2027 SV *result = newSVpvn(s, l);
2028 char *const result_c = SvPV_nolen(result); /* convenience */
2029 char *out = result_c;
2033 PERL_ARGS_ASSERT_IS_AN_INT;
2041 SvREFCNT_dec(result);
2064 SvREFCNT_dec(result);
2070 SvCUR_set(result, out - result_c);
2074 /* pnum must be '\0' terminated */
2076 S_div128(pTHX_ SV *pnum, bool *done)
2079 char * const s = SvPV(pnum, len);
2083 PERL_ARGS_ASSERT_DIV128;
2087 const int i = m * 10 + (*t - '0');
2088 const int r = (i >> 7); /* r < 10 */
2096 SvCUR_set(pnum, (STRLEN) (t - s));
2101 =for apidoc packlist
2103 The engine implementing pack() Perl function.
2109 Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, SV **beglist, SV **endlist )
2114 PERL_ARGS_ASSERT_PACKLIST;
2116 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
2118 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
2119 Also make sure any UTF8 flag is loaded */
2120 SvPV_force_nolen(cat);
2122 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
2124 (void)pack_rec( cat, &sym, beglist, endlist );
2127 /* like sv_utf8_upgrade, but also repoint the group start markers */
2129 marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2132 const char *from_ptr, *from_start, *from_end, **marks, **m;
2133 char *to_start, *to_ptr;
2135 if (SvUTF8(sv)) return;
2137 from_start = SvPVX_const(sv);
2138 from_end = from_start + SvCUR(sv);
2139 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2140 if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
2141 if (from_ptr == from_end) {
2142 /* Simple case: no character needs to be changed */
2147 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
2148 Newx(to_start, len, char);
2149 Copy(from_start, to_start, from_ptr-from_start, char);
2150 to_ptr = to_start + (from_ptr-from_start);
2152 Newx(marks, sym_ptr->level+2, const char *);
2153 for (group=sym_ptr; group; group = group->previous)
2154 marks[group->level] = from_start + group->strbeg;
2155 marks[sym_ptr->level+1] = from_end+1;
2156 for (m = marks; *m < from_ptr; m++)
2157 *m = to_start + (*m-from_start);
2159 for (;from_ptr < from_end; from_ptr++) {
2160 while (*m == from_ptr) *m++ = to_ptr;
2161 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
2165 while (*m == from_ptr) *m++ = to_ptr;
2166 if (m != marks + sym_ptr->level+1) {
2169 Perl_croak(aTHX_ "panic: marks beyond string end, m=%p, marks=%p, "
2170 "level=%d", m, marks, sym_ptr->level);
2172 for (group=sym_ptr; group; group = group->previous)
2173 group->strbeg = marks[group->level] - to_start;
2178 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
2179 from_start -= SvIVX(sv);
2182 SvFLAGS(sv) &= ~SVf_OOK;
2185 Safefree(from_start);
2186 SvPV_set(sv, to_start);
2187 SvCUR_set(sv, to_ptr - to_start);
2192 /* Exponential string grower. Makes string extension effectively O(n)
2193 needed says how many extra bytes we need (not counting the final '\0')
2194 Only grows the string if there is an actual lack of space
2197 S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
2198 const STRLEN cur = SvCUR(sv);
2199 const STRLEN len = SvLEN(sv);
2202 PERL_ARGS_ASSERT_SV_EXP_GROW;
2204 if (len - cur > needed) return SvPVX(sv);
2205 extend = needed > len ? needed : len;
2206 return SvGROW(sv, len+extend+1);
2211 S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
2214 tempsym_t lookahead;
2215 I32 items = endlist - beglist;
2216 bool found = next_symbol(symptr);
2217 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
2218 bool warn_utf8 = ckWARN(WARN_UTF8);
2220 PERL_ARGS_ASSERT_PACK_REC;
2222 if (symptr->level == 0 && found && symptr->code == 'U') {
2223 marked_upgrade(aTHX_ cat, symptr);
2224 symptr->flags |= FLAG_DO_UTF8;
2227 symptr->strbeg = SvCUR(cat);
2233 SV *lengthcode = NULL;
2234 I32 datumtype = symptr->code;
2235 howlen_t howlen = symptr->howlen;
2236 char *start = SvPVX(cat);
2237 char *cur = start + SvCUR(cat);
2239 #define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2243 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2247 /* e_no_len and e_number */
2248 len = symptr->length;
2253 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
2255 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2256 /* We can process this letter. */
2257 STRLEN size = props & PACK_SIZE_MASK;
2258 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2262 /* Look ahead for next symbol. Do we have code/code? */
2263 lookahead = *symptr;
2264 found = next_symbol(&lookahead);
2265 if (symptr->flags & FLAG_SLASH) {
2267 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
2268 if (strchr("aAZ", lookahead.code)) {
2269 if (lookahead.howlen == e_number) count = lookahead.length;
2272 count = sv_len_utf8(*beglist);
2275 if (lookahead.code == 'Z') count++;
2278 if (lookahead.howlen == e_number && lookahead.length < items)
2279 count = lookahead.length;
2282 lookahead.howlen = e_number;
2283 lookahead.length = count;
2284 lengthcode = sv_2mortal(newSViv(count));
2287 /* Code inside the switch must take care to properly update
2288 cat (CUR length and '\0' termination) if it updated *cur and
2289 doesn't simply leave using break */
2290 switch(TYPE_NO_ENDIANNESS(datumtype)) {
2292 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2293 (int) TYPE_NO_MODIFIERS(datumtype));
2295 Perl_croak(aTHX_ "'%%' may not be used in pack");
2298 case '.' | TYPE_IS_SHRIEKING:
2300 if (howlen == e_star) from = start;
2301 else if (len == 0) from = cur;
2303 tempsym_t *group = symptr;
2305 while (--len && group) group = group->previous;
2306 from = group ? start + group->strbeg : start;
2309 len = SvIV(fromstr);
2311 case '@' | TYPE_IS_SHRIEKING:
2313 from = start + symptr->strbeg;
2315 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
2317 while (len && from < cur) {
2318 from += UTF8SKIP(from);
2322 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2324 /* Here we know from == cur */
2326 GROWING(0, cat, start, cur, len);
2327 Zero(cur, len, char);
2329 } else if (from < cur) {
2332 } else goto no_change;
2340 if (len > 0) goto grow;
2341 if (len == 0) goto no_change;
2348 tempsym_t savsym = *symptr;
2349 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2350 symptr->flags |= group_modifiers;
2351 symptr->patend = savsym.grpend;
2353 symptr->previous = &lookahead;
2356 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2357 else symptr->flags &= ~FLAG_PARSE_UTF8;
2358 was_utf8 = SvUTF8(cat);
2359 symptr->patptr = savsym.grpbeg;
2360 beglist = pack_rec(cat, symptr, beglist, endlist);
2361 if (SvUTF8(cat) != was_utf8)
2362 /* This had better be an upgrade while in utf8==0 mode */
2365 if (savsym.howlen == e_star && beglist == endlist)
2366 break; /* No way to continue */
2368 items = endlist - beglist;
2369 lookahead.flags = symptr->flags & ~group_modifiers;
2372 case 'X' | TYPE_IS_SHRIEKING:
2373 if (!len) /* Avoid division by 0 */
2380 hop += UTF8SKIP(hop);
2387 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2391 len = (cur-start) % len;
2395 if (len < 1) goto no_change;
2399 Perl_croak(aTHX_ "'%c' outside of string in pack",
2400 (int) TYPE_NO_MODIFIERS(datumtype));
2401 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2403 Perl_croak(aTHX_ "'%c' outside of string in pack",
2404 (int) TYPE_NO_MODIFIERS(datumtype));
2410 if (cur - start < len)
2411 Perl_croak(aTHX_ "'%c' outside of string in pack",
2412 (int) TYPE_NO_MODIFIERS(datumtype));
2415 if (cur < start+symptr->strbeg) {
2416 /* Make sure group starts don't point into the void */
2418 const STRLEN length = cur-start;
2419 for (group = symptr;
2420 group && length < group->strbeg;
2421 group = group->previous) group->strbeg = length;
2422 lookahead.strbeg = length;
2425 case 'x' | TYPE_IS_SHRIEKING: {
2427 if (!len) /* Avoid division by 0 */
2429 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
2430 else ai32 = (cur - start) % len;
2431 if (ai32 == 0) goto no_change;
2443 aptr = SvPV_const(fromstr, fromlen);
2444 if (DO_UTF8(fromstr)) {
2445 const char *end, *s;
2447 if (!utf8 && !SvUTF8(cat)) {
2448 marked_upgrade(aTHX_ cat, symptr);
2449 lookahead.flags |= FLAG_DO_UTF8;
2450 lookahead.strbeg = symptr->strbeg;
2453 cur = start + SvCUR(cat);
2455 if (howlen == e_star) {
2456 if (utf8) goto string_copy;
2460 end = aptr + fromlen;
2461 fromlen = datumtype == 'Z' ? len-1 : len;
2462 while ((I32) fromlen > 0 && s < end) {
2467 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2470 if (datumtype == 'Z') len++;
2476 fromlen = len - fromlen;
2477 if (datumtype == 'Z') fromlen--;
2478 if (howlen == e_star) {
2480 if (datumtype == 'Z') len++;
2482 GROWING(0, cat, start, cur, len);
2483 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
2484 datumtype | TYPE_IS_PACK))
2485 Perl_croak(aTHX_ "panic: predicted utf8 length not available, "
2486 "for '%c', aptr=%p end=%p cur=%p, fromlen=%"UVuf,
2487 (int)datumtype, aptr, end, cur, (UV)fromlen);
2491 if (howlen == e_star) {
2493 if (datumtype == 'Z') len++;
2495 if (len <= (I32) fromlen) {
2497 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2499 /* assumes a byte expands to at most UTF8_EXPAND bytes on
2501 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2502 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
2504 while (fromlen > 0) {
2505 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
2511 if (howlen == e_star) {
2513 if (datumtype == 'Z') len++;
2515 if (len <= (I32) fromlen) {
2517 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2519 GROWING(0, cat, start, cur, len);
2520 Copy(aptr, cur, fromlen, char);
2524 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2531 const char *str, *end;
2538 str = SvPV_const(fromstr, fromlen);
2539 end = str + fromlen;
2540 if (DO_UTF8(fromstr)) {
2542 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2544 utf8_source = FALSE;
2545 utf8_flags = 0; /* Unused, but keep compilers happy */
2547 if (howlen == e_star) len = fromlen;
2548 field_len = (len+7)/8;
2549 GROWING(utf8, cat, start, cur, field_len);
2550 if (len > (I32)fromlen) len = fromlen;
2553 if (datumtype == 'B')
2557 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2559 } else bits |= *str++ & 1;
2560 if (l & 7) bits <<= 1;
2562 PUSH_BYTE(utf8, cur, bits);
2567 /* datumtype == 'b' */
2571 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2572 if (val & 1) bits |= 0x80;
2573 } else if (*str++ & 1)
2575 if (l & 7) bits >>= 1;
2577 PUSH_BYTE(utf8, cur, bits);
2583 if (datumtype == 'B')
2584 bits <<= 7 - (l & 7);
2586 bits >>= 7 - (l & 7);
2587 PUSH_BYTE(utf8, cur, bits);
2590 /* Determine how many chars are left in the requested field */
2592 if (howlen == e_star) field_len = 0;
2593 else field_len -= l;
2594 Zero(cur, field_len, char);
2600 const char *str, *end;
2607 str = SvPV_const(fromstr, fromlen);
2608 end = str + fromlen;
2609 if (DO_UTF8(fromstr)) {
2611 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2613 utf8_source = FALSE;
2614 utf8_flags = 0; /* Unused, but keep compilers happy */
2616 if (howlen == e_star) len = fromlen;
2617 field_len = (len+1)/2;
2618 GROWING(utf8, cat, start, cur, field_len);
2619 if (!utf8 && len > (I32)fromlen) len = fromlen;
2622 if (datumtype == 'H')
2626 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2627 if (val < 256 && isALPHA(val))
2628 bits |= (val + 9) & 0xf;
2631 } else if (isALPHA(*str))
2632 bits |= (*str++ + 9) & 0xf;
2634 bits |= *str++ & 0xf;
2635 if (l & 1) bits <<= 4;
2637 PUSH_BYTE(utf8, cur, bits);
2645 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2646 if (val < 256 && isALPHA(val))
2647 bits |= ((val + 9) & 0xf) << 4;
2649 bits |= (val & 0xf) << 4;
2650 } else if (isALPHA(*str))
2651 bits |= ((*str++ + 9) & 0xf) << 4;
2653 bits |= (*str++ & 0xf) << 4;
2654 if (l & 1) bits >>= 4;
2656 PUSH_BYTE(utf8, cur, bits);
2662 PUSH_BYTE(utf8, cur, bits);
2665 /* Determine how many chars are left in the requested field */
2667 if (howlen == e_star) field_len = 0;
2668 else field_len -= l;
2669 Zero(cur, field_len, char);
2677 aiv = SvIV(fromstr);
2678 if ((-128 > aiv || aiv > 127))
2679 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2680 "Character in 'c' format wrapped in pack");
2681 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
2686 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2692 aiv = SvIV(fromstr);
2693 if ((0 > aiv || aiv > 0xff))
2694 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2695 "Character in 'C' format wrapped in pack");
2696 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
2701 U8 in_bytes = (U8)IN_BYTES;
2703 end = start+SvLEN(cat)-1;
2704 if (utf8) end -= UTF8_MAXLEN-1;
2708 auv = SvUV(fromstr);
2709 if (in_bytes) auv = auv % 0x100;
2714 SvCUR_set(cat, cur - start);
2716 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2717 end = start+SvLEN(cat)-UTF8_MAXLEN;
2719 cur = (char *) uvuni_to_utf8_flags((U8 *) cur,
2722 0 : UNICODE_ALLOW_ANY);
2727 SvCUR_set(cat, cur - start);
2728 marked_upgrade(aTHX_ cat, symptr);
2729 lookahead.flags |= FLAG_DO_UTF8;
2730 lookahead.strbeg = symptr->strbeg;
2733 cur = start + SvCUR(cat);
2734 end = start+SvLEN(cat)-UTF8_MAXLEN;
2737 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2738 "Character in 'W' format wrapped in pack");
2743 SvCUR_set(cat, cur - start);
2744 GROWING(0, cat, start, cur, len+1);
2745 end = start+SvLEN(cat)-1;
2747 *(U8 *) cur++ = (U8)auv;
2756 if (!(symptr->flags & FLAG_DO_UTF8)) {
2757 marked_upgrade(aTHX_ cat, symptr);
2758 lookahead.flags |= FLAG_DO_UTF8;
2759 lookahead.strbeg = symptr->strbeg;
2765 end = start+SvLEN(cat);
2766 if (!utf8) end -= UTF8_MAXLEN;
2770 auv = SvUV(fromstr);
2772 U8 buffer[UTF8_MAXLEN], *endb;
2773 endb = uvuni_to_utf8_flags(buffer, auv,
2775 0 : UNICODE_ALLOW_ANY);
2776 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
2778 SvCUR_set(cat, cur - start);
2779 GROWING(0, cat, start, cur,
2780 len+(endb-buffer)*UTF8_EXPAND);
2781 end = start+SvLEN(cat);
2783 cur = bytes_to_uni(buffer, endb-buffer, cur);
2787 SvCUR_set(cat, cur - start);
2788 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2789 end = start+SvLEN(cat)-UTF8_MAXLEN;
2791 cur = (char *) uvuni_to_utf8_flags((U8 *) cur, auv,
2793 0 : UNICODE_ALLOW_ANY);
2798 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
2804 anv = SvNV(fromstr);
2805 # if defined(VMS) && !defined(_IEEE_FP)
2806 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
2807 * on Alpha; fake it if we don't have them.
2811 else if (anv < -FLT_MAX)
2813 else afloat = (float)anv;
2815 afloat = (float)anv;
2817 DO_BO_PACK_N(afloat, float);
2818 PUSH_VAR(utf8, cur, afloat);
2826 anv = SvNV(fromstr);
2827 # if defined(VMS) && !defined(_IEEE_FP)
2828 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
2829 * on Alpha; fake it if we don't have them.
2833 else if (anv < -DBL_MAX)
2835 else adouble = (double)anv;
2837 adouble = (double)anv;
2839 DO_BO_PACK_N(adouble, double);
2840 PUSH_VAR(utf8, cur, adouble);
2845 Zero(&anv, 1, NV); /* can be long double with unused bits */
2849 /* to work round a gcc/x86 bug; don't use SvNV */
2850 anv.nv = sv_2nv(fromstr);
2852 anv.nv = SvNV(fromstr);
2854 DO_BO_PACK_N(anv, NV);
2855 PUSH_BYTES(utf8, cur, anv.bytes, sizeof(anv.bytes));
2859 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2862 /* long doubles can have unused bits, which may be nonzero */
2863 Zero(&aldouble, 1, long double);
2867 /* to work round a gcc/x86 bug; don't use SvNV */
2868 aldouble.ld = (long double)sv_2nv(fromstr);
2870 aldouble.ld = (long double)SvNV(fromstr);
2872 DO_BO_PACK_N(aldouble, long double);
2873 PUSH_BYTES(utf8, cur, aldouble.bytes, sizeof(aldouble.bytes));
2878 case 'n' | TYPE_IS_SHRIEKING:
2883 ai16 = (I16)SvIV(fromstr);
2885 ai16 = PerlSock_htons(ai16);
2887 PUSH16(utf8, cur, &ai16);
2890 case 'v' | TYPE_IS_SHRIEKING:
2895 ai16 = (I16)SvIV(fromstr);
2899 PUSH16(utf8, cur, &ai16);
2902 case 'S' | TYPE_IS_SHRIEKING:
2903 #if SHORTSIZE != SIZE16
2905 unsigned short aushort;
2907 aushort = SvUV(fromstr);
2908 DO_BO_PACK(aushort, s);
2909 PUSH_VAR(utf8, cur, aushort);
2919 au16 = (U16)SvUV(fromstr);
2920 DO_BO_PACK(au16, 16);
2921 PUSH16(utf8, cur, &au16);
2924 case 's' | TYPE_IS_SHRIEKING:
2925 #if SHORTSIZE != SIZE16
2929 ashort = SvIV(fromstr);
2930 DO_BO_PACK(ashort, s);
2931 PUSH_VAR(utf8, cur, ashort);
2941 ai16 = (I16)SvIV(fromstr);
2942 DO_BO_PACK(ai16, 16);
2943 PUSH16(utf8, cur, &ai16);
2947 case 'I' | TYPE_IS_SHRIEKING:
2951 auint = SvUV(fromstr);
2952 DO_BO_PACK(auint, i);
2953 PUSH_VAR(utf8, cur, auint);
2960 aiv = SvIV(fromstr);
2961 #if IVSIZE == INTSIZE
2963 #elif IVSIZE == LONGSIZE
2965 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
2966 DO_BO_PACK(aiv, 64);
2968 Perl_croak(aTHX_ "'j' not supported on this platform");
2970 PUSH_VAR(utf8, cur, aiv);
2977 auv = SvUV(fromstr);
2978 #if UVSIZE == INTSIZE
2980 #elif UVSIZE == LONGSIZE
2982 #elif defined(HAS_QUAD) && UVSIZE == U64SIZE
2983 DO_BO_PACK(auv, 64);
2985 Perl_croak(aTHX_ "'J' not supported on this platform");
2987 PUSH_VAR(utf8, cur, auv);
2994 anv = SvNV(fromstr);
2998 SvCUR_set(cat, cur - start);
2999 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
3002 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
3003 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
3004 any negative IVs will have already been got by the croak()
3005 above. IOK is untrue for fractions, so we test them
3006 against UV_MAX_P1. */
3007 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
3008 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
3009 char *in = buf + sizeof(buf);
3010 UV auv = SvUV(fromstr);
3013 *--in = (char)((auv & 0x7f) | 0x80);
3016 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
3017 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3018 in, (buf + sizeof(buf)) - in);
3019 } else if (SvPOKp(fromstr))
3021 else if (SvNOKp(fromstr)) {
3022 /* 10**NV_MAX_10_EXP is the largest power of 10
3023 so 10**(NV_MAX_10_EXP+1) is definitely unrepresentable
3024 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
3025 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
3026 And with that many bytes only Inf can overflow.
3027 Some C compilers are strict about integral constant
3028 expressions so we conservatively divide by a slightly
3029 smaller integer instead of multiplying by the exact
3030 floating-point value.
3032 #ifdef NV_MAX_10_EXP
3033 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
3034 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
3036 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
3037 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
3039 char *in = buf + sizeof(buf);
3041 anv = Perl_floor(anv);
3043 const NV next = Perl_floor(anv / 128);
3044 if (in <= buf) /* this cannot happen ;-) */
3045 Perl_croak(aTHX_ "Cannot compress integer in pack");
3046 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
3049 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
3050 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3051 in, (buf + sizeof(buf)) - in);
3060 /* Copy string and check for compliance */
3061 from = SvPV_const(fromstr, len);
3062 if ((norm = is_an_int(from, len)) == NULL)
3063 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
3065 Newx(result, len, char);
3068 while (!done) *--in = div128(norm, &done) | 0x80;
3069 result[len - 1] &= 0x7F; /* clear continue bit */
3070 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3071 in, (result + len) - in);
3073 SvREFCNT_dec(norm); /* free norm */
3078 case 'i' | TYPE_IS_SHRIEKING:
3082 aint = SvIV(fromstr);
3083 DO_BO_PACK(aint, i);
3084 PUSH_VAR(utf8, cur, aint);
3087 case 'N' | TYPE_IS_SHRIEKING:
3092 au32 = SvUV(fromstr);
3094 au32 = PerlSock_htonl(au32);
3096 PUSH32(utf8, cur, &au32);
3099 case 'V' | TYPE_IS_SHRIEKING:
3104 au32 = SvUV(fromstr);
3108 PUSH32(utf8, cur, &au32);
3111 case 'L' | TYPE_IS_SHRIEKING:
3112 #if LONGSIZE != SIZE32
3114 unsigned long aulong;
3116 aulong = SvUV(fromstr);
3117 DO_BO_PACK(aulong, l);
3118 PUSH_VAR(utf8, cur, aulong);
3128 au32 = SvUV(fromstr);
3129 DO_BO_PACK(au32, 32);
3130 PUSH32(utf8, cur, &au32);
3133 case 'l' | TYPE_IS_SHRIEKING:
3134 #if LONGSIZE != SIZE32
3138 along = SvIV(fromstr);
3139 DO_BO_PACK(along, l);
3140 PUSH_VAR(utf8, cur, along);
3150 ai32 = SvIV(fromstr);
3151 DO_BO_PACK(ai32, 32);
3152 PUSH32(utf8, cur, &ai32);
3160 auquad = (Uquad_t) SvUV(fromstr);
3161 DO_BO_PACK(auquad, 64);
3162 PUSH_VAR(utf8, cur, auquad);
3169 aquad = (Quad_t)SvIV(fromstr);
3170 DO_BO_PACK(aquad, 64);
3171 PUSH_VAR(utf8, cur, aquad);
3174 #endif /* HAS_QUAD */
3176 len = 1; /* assume SV is correct length */
3177 GROWING(utf8, cat, start, cur, sizeof(char *));
3184 SvGETMAGIC(fromstr);
3185 if (!SvOK(fromstr)) aptr = NULL;
3187 /* XXX better yet, could spirit away the string to
3188 * a safe spot and hang on to it until the result
3189 * of pack() (and all copies of the result) are
3192 if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
3193 !SvREADONLY(fromstr)))) {
3194 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3195 "Attempt to pack pointer to temporary value");
3197 if (SvPOK(fromstr) || SvNIOK(fromstr))
3198 aptr = SvPV_nomg_const_nolen(fromstr);
3200 aptr = SvPV_force_flags_nolen(fromstr, 0);
3202 DO_BO_PACK_PC(aptr);
3203 PUSH_VAR(utf8, cur, aptr);
3207 const char *aptr, *aend;
3211 if (len <= 2) len = 45;
3212 else len = len / 3 * 3;
3214 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3215 "Field too wide in 'u' format in pack");
3218 aptr = SvPV_const(fromstr, fromlen);
3219 from_utf8 = DO_UTF8(fromstr);
3221 aend = aptr + fromlen;
3222 fromlen = sv_len_utf8_nomg(fromstr);
3223 } else aend = NULL; /* Unused, but keep compilers happy */
3224 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
3225 while (fromlen > 0) {
3228 U8 hunk[1+63/3*4+1];
3230 if ((I32)fromlen > len)
3236 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3237 'u' | TYPE_IS_PACK)) {
3239 SvCUR_set(cat, cur - start);
3240 Perl_croak(aTHX_ "panic: string is shorter than advertised, "
3241 "aptr=%p, aend=%p, buffer=%p, todo=%ld",
3242 aptr, aend, buffer, (long) todo);
3244 end = doencodes(hunk, buffer, todo);
3246 end = doencodes(hunk, aptr, todo);
3249 PUSH_BYTES(utf8, cur, hunk, end-hunk);
3256 SvCUR_set(cat, cur - start);
3258 *symptr = lookahead;
3267 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
3270 SV *pat_sv = *++MARK;
3271 const char *pat = SvPV_const(pat_sv, fromlen);
3272 const char *patend = pat + fromlen;
3278 packlist(cat, pat, patend, MARK, SP + 1);
3288 * c-indentation-style: bsd
3290 * indent-tabs-mode: nil
3293 * ex: set ts=8 sts=4 sw=4 et: