3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
18 * [p.653 of _The Lord of the Rings_, IV/iv: "Of Herbs and Stewed Rabbit"]
21 /* This file contains pp ("push/pop") functions that
22 * execute the opcodes that make up a perl program. A typical pp function
23 * expects to find its arguments on the stack, and usually pushes its
24 * results onto the stack, hence the 'pp' terminology. Each OP structure
25 * contains a pointer to the relevant pp_foo() function.
27 * This particular file just contains pp_pack() and pp_unpack(). See the
28 * other pp*.c files for the rest of the pp_ functions.
32 #define PERL_IN_PP_PACK_C
35 /* Types used by pack/unpack */
37 e_no_len, /* no length */
38 e_number, /* number, [] */
42 typedef struct tempsym {
43 const char* patptr; /* current template char */
44 const char* patend; /* one after last char */
45 const char* grpbeg; /* 1st char of ()-group */
46 const char* grpend; /* end of ()-group */
47 I32 code; /* template code (!<>) */
48 I32 length; /* length/repeat count */
49 howlen_t howlen; /* how length is given */
50 int level; /* () nesting level */
51 U32 flags; /* /=4, comma=2, pack=1 */
52 /* and group modifiers */
53 STRLEN strbeg; /* offset of group start */
54 struct tempsym *previous; /* previous group */
57 #define TEMPSYM_INIT(symptr, p, e, f) \
59 (symptr)->patptr = (p); \
60 (symptr)->patend = (e); \
61 (symptr)->grpbeg = NULL; \
62 (symptr)->grpend = NULL; \
63 (symptr)->grpend = NULL; \
65 (symptr)->length = 0; \
66 (symptr)->howlen = e_no_len; \
67 (symptr)->level = 0; \
68 (symptr)->flags = (f); \
69 (symptr)->strbeg = 0; \
70 (symptr)->previous = NULL; \
78 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
81 U8 bytes[sizeof(long double)];
88 /* Maximum number of bytes to which a byte can grow due to upgrade */
92 * Offset for integer pack/unpack.
94 * On architectures where I16 and I32 aren't really 16 and 32 bits,
95 * which for now are all Crays, pack and unpack have to play games.
99 * These values are required for portability of pack() output.
100 * If they're not right on your machine, then pack() and unpack()
101 * wouldn't work right anyway; you'll need to apply the Cray hack.
102 * (I'd like to check them with #if, but you can't use sizeof() in
103 * the preprocessor.) --???
106 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
107 defines are now in config.h. --Andy Dougherty April 1998
112 /* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
115 #if U16SIZE > SIZE16 || U32SIZE > SIZE32
116 # if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
117 # define OFF16(p) ((char*)(p))
118 # define OFF32(p) ((char*)(p))
120 # if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
121 # define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
122 # define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
124 ++++ bad cray byte order
128 # define OFF16(p) ((char *) (p))
129 # define OFF32(p) ((char *) (p))
132 /* Only to be used inside a loop (see the break) */
133 #define SHIFT16(utf8, s, strend, p, datumtype) STMT_START { \
135 if (!uni_to_bytes(aTHX_ &(s), strend, OFF16(p), SIZE16, datumtype)) break; \
137 Copy(s, OFF16(p), SIZE16, char); \
142 /* Only to be used inside a loop (see the break) */
143 #define SHIFT32(utf8, s, strend, p, datumtype) STMT_START { \
145 if (!uni_to_bytes(aTHX_ &(s), strend, OFF32(p), SIZE32, datumtype)) break; \
147 Copy(s, OFF32(p), SIZE32, char); \
152 #define PUSH16(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF16(p), SIZE16)
153 #define PUSH32(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF32(p), SIZE32)
155 /* Only to be used inside a loop (see the break) */
156 #define SHIFT_BYTES(utf8, s, strend, buf, len, datumtype) \
159 if (!uni_to_bytes(aTHX_ &s, strend, \
160 (char *) (buf), len, datumtype)) break; \
162 Copy(s, (char *) (buf), len, char); \
167 #define SHIFT_VAR(utf8, s, strend, var, datumtype) \
168 SHIFT_BYTES(utf8, s, strend, &(var), sizeof(var), datumtype)
170 #define PUSH_VAR(utf8, aptr, var) \
171 PUSH_BYTES(utf8, aptr, &(var), sizeof(var))
173 /* Avoid stack overflow due to pathological templates. 100 should be plenty. */
174 #define MAX_SUB_TEMPLATE_LEVEL 100
176 /* flags (note that type modifiers can also be used as flags!) */
177 #define FLAG_WAS_UTF8 0x40
178 #define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
179 #define FLAG_UNPACK_ONLY_ONE 0x10
180 #define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
181 #define FLAG_SLASH 0x04
182 #define FLAG_COMMA 0x02
183 #define FLAG_PACK 0x01
186 S_mul128(pTHX_ SV *sv, U8 m)
189 char *s = SvPV(sv, len);
192 PERL_ARGS_ASSERT_MUL128;
194 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
195 SV * const tmpNew = newSVpvs("0000000000");
197 sv_catsv(tmpNew, sv);
198 SvREFCNT_dec(sv); /* free old sv */
203 while (!*t) /* trailing '\0'? */
206 const U32 i = ((*t - '0') << 7) + m;
207 *(t--) = '0' + (char)(i % 10);
213 /* Explosives and implosives. */
215 #if 'I' == 73 && 'J' == 74
216 /* On an ASCII/ISO kind of system */
217 #define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
220 Some other sort of character set - use memchr() so we don't match
223 #define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
227 #define TYPE_IS_SHRIEKING 0x100
228 #define TYPE_IS_BIG_ENDIAN 0x200
229 #define TYPE_IS_LITTLE_ENDIAN 0x400
230 #define TYPE_IS_PACK 0x800
231 #define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
232 #define TYPE_MODIFIERS(t) ((t) & ~0xFF)
233 #define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
235 # define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
236 # define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
238 # define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
240 # define DO_BO_UNPACK(var, type) \
242 switch (TYPE_ENDIANNESS(datumtype)) { \
243 case TYPE_IS_BIG_ENDIAN: var = my_betoh ## type (var); break; \
244 case TYPE_IS_LITTLE_ENDIAN: var = my_letoh ## type (var); break; \
249 # define DO_BO_PACK(var, type) \
251 switch (TYPE_ENDIANNESS(datumtype)) { \
252 case TYPE_IS_BIG_ENDIAN: var = my_htobe ## type (var); break; \
253 case TYPE_IS_LITTLE_ENDIAN: var = my_htole ## type (var); break; \
258 # define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast) \
260 switch (TYPE_ENDIANNESS(datumtype)) { \
261 case TYPE_IS_BIG_ENDIAN: \
262 var = (post_cast*) my_betoh ## type ((pre_cast) var); \
264 case TYPE_IS_LITTLE_ENDIAN: \
265 var = (post_cast *) my_letoh ## type ((pre_cast) var); \
272 # define DO_BO_PACK_PTR(var, type, pre_cast, post_cast) \
274 switch (TYPE_ENDIANNESS(datumtype)) { \
275 case TYPE_IS_BIG_ENDIAN: \
276 var = (post_cast *) my_htobe ## type ((pre_cast) var); \
278 case TYPE_IS_LITTLE_ENDIAN: \
279 var = (post_cast *) my_htole ## type ((pre_cast) var); \
286 # define BO_CANT_DOIT(action, type) \
288 switch (TYPE_ENDIANNESS(datumtype)) { \
289 case TYPE_IS_BIG_ENDIAN: \
290 Perl_croak(aTHX_ "Can't %s big-endian %ss on this " \
291 "platform", #action, #type); \
293 case TYPE_IS_LITTLE_ENDIAN: \
294 Perl_croak(aTHX_ "Can't %s little-endian %ss on this " \
295 "platform", #action, #type); \
302 # if PTRSIZE == INTSIZE
303 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, i, int, char)
304 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, i, int, char)
305 # elif PTRSIZE == LONGSIZE
306 # if LONGSIZE < IVSIZE && IVSIZE == 8
307 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, 64, IV, char)
308 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, 64, IV, char)
310 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, IV, char)
311 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, IV, char)
313 # elif PTRSIZE == IVSIZE
314 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, IV, char)
315 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, IV, char)
317 # define DO_BO_UNPACK_PC(var) BO_CANT_DOIT(unpack, pointer)
318 # define DO_BO_PACK_PC(var) BO_CANT_DOIT(pack, pointer)
321 # if defined(my_htolen) && defined(my_letohn) && \
322 defined(my_htoben) && defined(my_betohn)
323 # define DO_BO_UNPACK_N(var, type) \
325 switch (TYPE_ENDIANNESS(datumtype)) { \
326 case TYPE_IS_BIG_ENDIAN: my_betohn(&var, sizeof(type)); break;\
327 case TYPE_IS_LITTLE_ENDIAN: my_letohn(&var, sizeof(type)); break;\
332 # define DO_BO_PACK_N(var, type) \
334 switch (TYPE_ENDIANNESS(datumtype)) { \
335 case TYPE_IS_BIG_ENDIAN: my_htoben(&var, sizeof(type)); break;\
336 case TYPE_IS_LITTLE_ENDIAN: my_htolen(&var, sizeof(type)); break;\
341 # define DO_BO_UNPACK_N(var, type) BO_CANT_DOIT(unpack, type)
342 # define DO_BO_PACK_N(var, type) BO_CANT_DOIT(pack, type)
345 #define PACK_SIZE_CANNOT_CSUM 0x80
346 #define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
347 #define PACK_SIZE_MASK 0x3F
349 #include "packsizetables.c"
352 uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
355 UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
356 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
357 /* We try to process malformed UTF-8 as much as possible (preferably with
358 warnings), but these two mean we make no progress in the string and
359 might enter an infinite loop */
360 if (retlen == (STRLEN) -1 || retlen == 0)
361 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
362 (int) TYPE_NO_MODIFIERS(datumtype));
364 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
365 "Character in '%c' format wrapped in unpack",
366 (int) TYPE_NO_MODIFIERS(datumtype));
373 #define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
374 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
378 uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
382 const char *from = *s;
384 const U32 flags = ckWARN(WARN_UTF8) ?
385 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
386 for (;buf_len > 0; buf_len--) {
387 if (from >= end) return FALSE;
388 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
389 if (retlen == (STRLEN) -1 || retlen == 0) {
390 from += UTF8SKIP(from);
392 } else from += retlen;
397 *(U8 *)buf++ = (U8)val;
399 /* We have enough characters for the buffer. Did we have problems ? */
402 /* Rewalk the string fragment while warning */
404 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
405 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
406 if (ptr >= end) break;
407 utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
409 if (from > end) from = end;
412 Perl_ck_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
413 WARN_PACK : WARN_UNPACK),
414 "Character(s) in '%c' format wrapped in %s",
415 (int) TYPE_NO_MODIFIERS(datumtype),
416 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
423 next_uni_uu(pTHX_ const char **s, const char *end, I32 *out)
427 const UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
428 if (val >= 0x100 || !ISUUCHAR(val) ||
429 retlen == (STRLEN) -1 || retlen == 0) {
433 *out = PL_uudmap[val] & 077;
439 S_bytes_to_uni(const U8 *start, STRLEN len, char *dest) {
440 const U8 * const end = start + len;
442 PERL_ARGS_ASSERT_BYTES_TO_UNI;
444 while (start < end) {
445 const UV uv = NATIVE_TO_ASCII(*start);
446 if (UNI_IS_INVARIANT(uv))
447 *dest++ = (char)(U8)UTF_TO_NATIVE(uv);
449 *dest++ = (char)(U8)UTF8_EIGHT_BIT_HI(uv);
450 *dest++ = (char)(U8)UTF8_EIGHT_BIT_LO(uv);
457 #define PUSH_BYTES(utf8, cur, buf, len) \
460 (cur) = bytes_to_uni((U8 *) buf, len, (cur)); \
462 Copy(buf, cur, len, char); \
467 #define GROWING(utf8, cat, start, cur, in_len) \
469 STRLEN glen = (in_len); \
470 if (utf8) glen *= UTF8_EXPAND; \
471 if ((cur) + glen >= (start) + SvLEN(cat)) { \
472 (start) = sv_exp_grow(cat, glen); \
473 (cur) = (start) + SvCUR(cat); \
477 #define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
479 const STRLEN glen = (in_len); \
481 if (utf8) gl *= UTF8_EXPAND; \
482 if ((cur) + gl >= (start) + SvLEN(cat)) { \
484 SvCUR_set((cat), (cur) - (start)); \
485 (start) = sv_exp_grow(cat, gl); \
486 (cur) = (start) + SvCUR(cat); \
488 PUSH_BYTES(utf8, cur, buf, glen); \
491 #define PUSH_BYTE(utf8, s, byte) \
494 const U8 au8 = (byte); \
495 (s) = bytes_to_uni(&au8, 1, (s)); \
496 } else *(U8 *)(s)++ = (byte); \
499 /* Only to be used inside a loop (see the break) */
500 #define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
503 if (str >= end) break; \
504 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
505 if (retlen == (STRLEN) -1 || retlen == 0) { \
507 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
512 static const char *_action( const tempsym_t* symptr )
514 return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
517 /* Returns the sizeof() struct described by pat */
519 S_measure_struct(pTHX_ tempsym_t* symptr)
523 PERL_ARGS_ASSERT_MEASURE_STRUCT;
525 while (next_symbol(symptr)) {
529 switch (symptr->howlen) {
531 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
535 /* e_no_len and e_number */
536 len = symptr->length;
540 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
543 /* endianness doesn't influence the size of a type */
544 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
546 Perl_croak(aTHX_ "Invalid type '%c' in %s",
547 (int)TYPE_NO_MODIFIERS(symptr->code),
549 case '.' | TYPE_IS_SHRIEKING:
550 case '@' | TYPE_IS_SHRIEKING:
554 case 'U': /* XXXX Is it correct? */
557 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
558 (int) TYPE_NO_MODIFIERS(symptr->code),
565 tempsym_t savsym = *symptr;
566 symptr->patptr = savsym.grpbeg;
567 symptr->patend = savsym.grpend;
568 /* XXXX Theoretically, we need to measure many times at
569 different positions, since the subexpression may contain
570 alignment commands, but be not of aligned length.
571 Need to detect this and croak(). */
572 size = measure_struct(symptr);
576 case 'X' | TYPE_IS_SHRIEKING:
577 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
579 if (!len) /* Avoid division by 0 */
581 len = total % len; /* Assumed: the start is aligned. */
586 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
588 case 'x' | TYPE_IS_SHRIEKING:
589 if (!len) /* Avoid division by 0 */
591 star = total % len; /* Assumed: the start is aligned. */
592 if (star) /* Other portable ways? */
616 size = sizeof(char*);
626 /* locate matching closing parenthesis or bracket
627 * returns char pointer to char after match, or NULL
630 S_group_end(pTHX_ const char *patptr, const char *patend, char ender)
632 PERL_ARGS_ASSERT_GROUP_END;
634 while (patptr < patend) {
635 const char c = *patptr++;
642 while (patptr < patend && *patptr != '\n')
646 patptr = group_end(patptr, patend, ')') + 1;
648 patptr = group_end(patptr, patend, ']') + 1;
650 Perl_croak(aTHX_ "No group ending character '%c' found in template",
656 /* Convert unsigned decimal number to binary.
657 * Expects a pointer to the first digit and address of length variable
658 * Advances char pointer to 1st non-digit char and returns number
661 S_get_num(pTHX_ const char *patptr, I32 *lenptr )
663 I32 len = *patptr++ - '0';
665 PERL_ARGS_ASSERT_GET_NUM;
667 while (isDIGIT(*patptr)) {
668 if (len >= 0x7FFFFFFF/10)
669 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
670 len = (len * 10) + (*patptr++ - '0');
676 /* The marvellous template parsing routine: Using state stored in *symptr,
677 * locates next template code and count
680 S_next_symbol(pTHX_ tempsym_t* symptr )
682 const char* patptr = symptr->patptr;
683 const char* const patend = symptr->patend;
685 PERL_ARGS_ASSERT_NEXT_SYMBOL;
687 symptr->flags &= ~FLAG_SLASH;
689 while (patptr < patend) {
690 if (isSPACE(*patptr))
692 else if (*patptr == '#') {
694 while (patptr < patend && *patptr != '\n')
699 /* We should have found a template code */
700 I32 code = *patptr++ & 0xFF;
701 U32 inherited_modifiers = 0;
703 if (code == ','){ /* grandfather in commas but with a warning */
704 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
705 symptr->flags |= FLAG_COMMA;
706 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
707 "Invalid type ',' in %s", _action( symptr ) );
712 /* for '(', skip to ')' */
714 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
715 Perl_croak(aTHX_ "()-group starts with a count in %s",
717 symptr->grpbeg = patptr;
718 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
719 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
720 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
724 /* look for group modifiers to inherit */
725 if (TYPE_ENDIANNESS(symptr->flags)) {
726 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
727 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
730 /* look for modifiers */
731 while (patptr < patend) {
736 modifier = TYPE_IS_SHRIEKING;
737 allowed = "sSiIlLxXnNvV@.";
740 modifier = TYPE_IS_BIG_ENDIAN;
741 allowed = ENDIANNESS_ALLOWED_TYPES;
744 modifier = TYPE_IS_LITTLE_ENDIAN;
745 allowed = ENDIANNESS_ALLOWED_TYPES;
756 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
757 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
758 allowed, _action( symptr ) );
760 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
761 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
762 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
763 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
764 TYPE_ENDIANNESS_MASK)
765 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
766 *patptr, _action( symptr ) );
768 if ((code & modifier)) {
769 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
770 "Duplicate modifier '%c' after '%c' in %s",
771 *patptr, (int) TYPE_NO_MODIFIERS(code),
779 /* inherit modifiers */
780 code |= inherited_modifiers;
782 /* look for count and/or / */
783 if (patptr < patend) {
784 if (isDIGIT(*patptr)) {
785 patptr = get_num( patptr, &symptr->length );
786 symptr->howlen = e_number;
788 } else if (*patptr == '*') {
790 symptr->howlen = e_star;
792 } else if (*patptr == '[') {
793 const char* lenptr = ++patptr;
794 symptr->howlen = e_number;
795 patptr = group_end( patptr, patend, ']' ) + 1;
796 /* what kind of [] is it? */
797 if (isDIGIT(*lenptr)) {
798 lenptr = get_num( lenptr, &symptr->length );
800 Perl_croak(aTHX_ "Malformed integer in [] in %s",
803 tempsym_t savsym = *symptr;
804 symptr->patend = patptr-1;
805 symptr->patptr = lenptr;
806 savsym.length = measure_struct(symptr);
810 symptr->howlen = e_no_len;
815 while (patptr < patend) {
816 if (isSPACE(*patptr))
818 else if (*patptr == '#') {
820 while (patptr < patend && *patptr != '\n')
825 if (*patptr == '/') {
826 symptr->flags |= FLAG_SLASH;
828 if (patptr < patend &&
829 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
830 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
837 /* at end - no count, no / */
838 symptr->howlen = e_no_len;
843 symptr->patptr = patptr;
847 symptr->patptr = patptr;
852 There is no way to cleanly handle the case where we should process the
853 string per byte in its upgraded form while it's really in downgraded form
854 (e.g. estimates like strend-s as an upper bound for the number of
855 characters left wouldn't work). So if we foresee the need of this
856 (pattern starts with U or contains U0), we want to work on the encoded
857 version of the string. Users are advised to upgrade their pack string
858 themselves if they need to do a lot of unpacks like this on it
861 need_utf8(const char *pat, const char *patend)
865 PERL_ARGS_ASSERT_NEED_UTF8;
867 while (pat < patend) {
870 pat = (const char *) memchr(pat, '\n', patend-pat);
871 if (!pat) return FALSE;
872 } else if (pat[0] == 'U') {
873 if (first || pat[1] == '0') return TRUE;
874 } else first = FALSE;
881 first_symbol(const char *pat, const char *patend) {
882 PERL_ARGS_ASSERT_FIRST_SYMBOL;
884 while (pat < patend) {
885 if (pat[0] != '#') return pat[0];
887 pat = (const char *) memchr(pat, '\n', patend-pat);
895 =for apidoc unpackstring
897 The engine implementing the unpack() Perl function.
899 Using the template pat..patend, this function unpacks the string
900 s..strend into a number of mortal SVs, which it pushes onto the perl
901 argument (@_) stack (so you will need to issue a C<PUTBACK> before and
902 C<SPAGAIN> after the call to this function). It returns the number of
905 The strend and patend pointers should point to the byte following the last
906 character of each string.
908 Although this function returns its values on the perl argument stack, it
909 doesn't take any parameters from that stack (and thus in particular
910 there's no need to do a PUSHMARK before calling it, unlike L</call_pv> for
916 Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
920 PERL_ARGS_ASSERT_UNPACKSTRING;
922 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
923 else if (need_utf8(pat, patend)) {
924 /* We probably should try to avoid this in case a scalar context call
925 wouldn't get to the "U0" */
926 STRLEN len = strend - s;
927 s = (char *) bytes_to_utf8((U8 *) s, &len);
930 flags |= FLAG_DO_UTF8;
933 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
934 flags |= FLAG_PARSE_UTF8;
936 TEMPSYM_INIT(&sym, pat, patend, flags);
938 return unpack_rec(&sym, s, s, strend, NULL );
942 S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
946 const I32 start_sp_offset = SP - PL_stack_base;
951 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
953 bool explicit_length;
954 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
955 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
957 PERL_ARGS_ASSERT_UNPACK_REC;
959 symptr->strbeg = s - strbeg;
961 while (next_symbol(symptr)) {
964 I32 datumtype = symptr->code;
965 /* do first one only unless in list context
966 / is implemented by unpacking the count, then popping it from the
967 stack, so must check that we're not in the middle of a / */
969 && (SP - PL_stack_base == start_sp_offset + 1)
970 && (datumtype != '/') ) /* XXX can this be omitted */
973 switch (howlen = symptr->howlen) {
975 len = strend - strbeg; /* long enough */
978 /* e_no_len and e_number */
979 len = symptr->length;
983 explicit_length = TRUE;
985 beyond = s >= strend;
987 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
989 /* props nonzero means we can process this letter. */
990 const long size = props & PACK_SIZE_MASK;
991 const long howmany = (strend - s) / size;
995 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
996 if (len && unpack_only_one) len = 1;
1002 switch(TYPE_NO_ENDIANNESS(datumtype)) {
1004 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
1007 if (howlen == e_no_len)
1008 len = 16; /* len is not specified */
1016 tempsym_t savsym = *symptr;
1017 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
1018 symptr->flags |= group_modifiers;
1019 symptr->patend = savsym.grpend;
1020 symptr->previous = &savsym;
1023 if (len && unpack_only_one) len = 1;
1025 symptr->patptr = savsym.grpbeg;
1026 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
1027 else symptr->flags &= ~FLAG_PARSE_UTF8;
1028 unpack_rec(symptr, s, strbeg, strend, &s);
1029 if (s == strend && savsym.howlen == e_star)
1030 break; /* No way to continue */
1033 savsym.flags = symptr->flags & ~group_modifiers;
1037 case '.' | TYPE_IS_SHRIEKING:
1041 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
1042 if (howlen == e_star) from = strbeg;
1043 else if (len <= 0) from = s;
1045 tempsym_t *group = symptr;
1047 while (--len && group) group = group->previous;
1048 from = group ? strbeg + group->strbeg : strbeg;
1051 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
1052 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
1056 case '@' | TYPE_IS_SHRIEKING:
1058 s = strbeg + symptr->strbeg;
1059 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
1063 Perl_croak(aTHX_ "'@' outside of string in unpack");
1068 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
1071 Perl_croak(aTHX_ "'@' outside of string in unpack");
1075 case 'X' | TYPE_IS_SHRIEKING:
1076 if (!len) /* Avoid division by 0 */
1079 const char *hop, *last;
1081 hop = last = strbeg;
1083 hop += UTF8SKIP(hop);
1090 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1094 len = (s - strbeg) % len;
1100 Perl_croak(aTHX_ "'X' outside of string in unpack");
1101 while (--s, UTF8_IS_CONTINUATION(*s)) {
1103 Perl_croak(aTHX_ "'X' outside of string in unpack");
1108 if (len > s - strbeg)
1109 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1113 case 'x' | TYPE_IS_SHRIEKING: {
1115 if (!len) /* Avoid division by 0 */
1117 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1118 else ai32 = (s - strbeg) % len;
1119 if (ai32 == 0) break;
1127 Perl_croak(aTHX_ "'x' outside of string in unpack");
1132 if (len > strend - s)
1133 Perl_croak(aTHX_ "'x' outside of string in unpack");
1138 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1144 /* Preliminary length estimate is assumed done in 'W' */
1145 if (len > strend - s) len = strend - s;
1151 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1152 if (hop >= strend) {
1154 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1159 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1161 } else if (len > strend - s)
1164 if (datumtype == 'Z') {
1165 /* 'Z' strips stuff after first null */
1166 const char *ptr, *end;
1168 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
1169 sv = newSVpvn(s, ptr-s);
1170 if (howlen == e_star) /* exact for 'Z*' */
1171 len = ptr-s + (ptr != strend ? 1 : 0);
1172 } else if (datumtype == 'A') {
1173 /* 'A' strips both nulls and spaces */
1175 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1176 for (ptr = s+len-1; ptr >= s; ptr--)
1177 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
1178 !isSPACE_utf8(ptr)) break;
1179 if (ptr >= s) ptr += UTF8SKIP(ptr);
1182 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1184 for (ptr = s+len-1; ptr >= s; ptr--)
1185 if (*ptr != 0 && !isSPACE(*ptr)) break;
1188 sv = newSVpvn(s, ptr-s);
1189 } else sv = newSVpvn(s, len);
1193 /* Undo any upgrade done due to need_utf8() */
1194 if (!(symptr->flags & FLAG_WAS_UTF8))
1195 sv_utf8_downgrade(sv, 0);
1203 if (howlen == e_star || len > (strend - s) * 8)
1204 len = (strend - s) * 8;
1207 while (len >= 8 && s < strend) {
1208 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
1213 cuv += PL_bitcount[*(U8 *)s++];
1216 if (len && s < strend) {
1218 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1219 if (datumtype == 'b')
1221 if (bits & 1) cuv++;
1226 if (bits & 0x80) cuv++;
1233 sv = sv_2mortal(newSV(len ? len : 1));
1236 if (datumtype == 'b') {
1238 const I32 ai32 = len;
1239 for (len = 0; len < ai32; len++) {
1240 if (len & 7) bits >>= 1;
1242 if (s >= strend) break;
1243 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1244 } else bits = *(U8 *) s++;
1245 *str++ = bits & 1 ? '1' : '0';
1249 const I32 ai32 = len;
1250 for (len = 0; len < ai32; len++) {
1251 if (len & 7) bits <<= 1;
1253 if (s >= strend) break;
1254 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1255 } else bits = *(U8 *) s++;
1256 *str++ = bits & 0x80 ? '1' : '0';
1260 SvCUR_set(sv, str - SvPVX_const(sv));
1267 /* Preliminary length estimate, acceptable for utf8 too */
1268 if (howlen == e_star || len > (strend - s) * 2)
1269 len = (strend - s) * 2;
1271 sv = sv_2mortal(newSV(len ? len : 1));
1275 if (datumtype == 'h') {
1278 for (len = 0; len < ai32; len++) {
1279 if (len & 1) bits >>= 4;
1281 if (s >= strend) break;
1282 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1283 } else bits = * (U8 *) s++;
1285 *str++ = PL_hexdigit[bits & 15];
1289 const I32 ai32 = len;
1290 for (len = 0; len < ai32; len++) {
1291 if (len & 1) bits <<= 4;
1293 if (s >= strend) break;
1294 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1295 } else bits = *(U8 *) s++;
1297 *str++ = PL_hexdigit[(bits >> 4) & 15];
1302 SvCUR_set(sv, str - SvPVX_const(sv));
1309 if (explicit_length)
1310 /* Switch to "character" mode */
1311 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1316 while (len-- > 0 && s < strend) {
1321 aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1322 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1323 if (retlen == (STRLEN) -1 || retlen == 0)
1324 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1328 aint = *(U8 *)(s)++;
1329 if (aint >= 128 && datumtype != 'C') /* fake up signed chars */
1333 else if (checksum > bits_in_uv)
1334 cdouble += (NV)aint;
1342 while (len-- > 0 && s < strend) {
1344 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1345 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1346 if (retlen == (STRLEN) -1 || retlen == 0)
1347 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1351 else if (checksum > bits_in_uv)
1352 cdouble += (NV) val;
1356 } else if (!checksum)
1358 const U8 ch = *(U8 *) s++;
1361 else if (checksum > bits_in_uv)
1362 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1364 while (len-- > 0) cuv += *(U8 *) s++;
1368 if (explicit_length && howlen != e_star) {
1369 /* Switch to "bytes in UTF-8" mode */
1370 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
1372 /* Should be impossible due to the need_utf8() test */
1373 Perl_croak(aTHX_ "U0 mode on a byte string");
1377 if (len > strend - s) len = strend - s;
1379 if (len && unpack_only_one) len = 1;
1383 while (len-- > 0 && s < strend) {
1387 U8 result[UTF8_MAXLEN];
1388 const char *ptr = s;
1390 /* Bug: warns about bad utf8 even if we are short on bytes
1391 and will break out of the loop */
1392 if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1395 len = UTF8SKIP(result);
1396 if (!uni_to_bytes(aTHX_ &ptr, strend,
1397 (char *) &result[1], len-1, 'U')) break;
1398 auv = utf8n_to_uvuni(result, len, &retlen, UTF8_ALLOW_DEFAULT);
1401 auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, UTF8_ALLOW_DEFAULT);
1402 if (retlen == (STRLEN) -1 || retlen == 0)
1403 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1408 else if (checksum > bits_in_uv)
1409 cdouble += (NV) auv;
1414 case 's' | TYPE_IS_SHRIEKING:
1415 #if SHORTSIZE != SIZE16
1418 SHIFT_VAR(utf8, s, strend, ashort, datumtype);
1419 DO_BO_UNPACK(ashort, s);
1422 else if (checksum > bits_in_uv)
1423 cdouble += (NV)ashort;
1435 #if U16SIZE > SIZE16
1438 SHIFT16(utf8, s, strend, &ai16, datumtype);
1439 DO_BO_UNPACK(ai16, 16);
1440 #if U16SIZE > SIZE16
1446 else if (checksum > bits_in_uv)
1447 cdouble += (NV)ai16;
1452 case 'S' | TYPE_IS_SHRIEKING:
1453 #if SHORTSIZE != SIZE16
1455 unsigned short aushort;
1456 SHIFT_VAR(utf8, s, strend, aushort, datumtype);
1457 DO_BO_UNPACK(aushort, s);
1460 else if (checksum > bits_in_uv)
1461 cdouble += (NV)aushort;
1474 #if U16SIZE > SIZE16
1477 SHIFT16(utf8, s, strend, &au16, datumtype);
1478 DO_BO_UNPACK(au16, 16);
1480 if (datumtype == 'n')
1481 au16 = PerlSock_ntohs(au16);
1484 if (datumtype == 'v')
1489 else if (checksum > bits_in_uv)
1490 cdouble += (NV) au16;
1495 case 'v' | TYPE_IS_SHRIEKING:
1496 case 'n' | TYPE_IS_SHRIEKING:
1499 # if U16SIZE > SIZE16
1502 SHIFT16(utf8, s, strend, &ai16, datumtype);
1504 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
1505 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1506 # endif /* HAS_NTOHS */
1508 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
1509 ai16 = (I16) vtohs((U16) ai16);
1510 # endif /* HAS_VTOHS */
1513 else if (checksum > bits_in_uv)
1514 cdouble += (NV) ai16;
1520 case 'i' | TYPE_IS_SHRIEKING:
1523 SHIFT_VAR(utf8, s, strend, aint, datumtype);
1524 DO_BO_UNPACK(aint, i);
1527 else if (checksum > bits_in_uv)
1528 cdouble += (NV)aint;
1534 case 'I' | TYPE_IS_SHRIEKING:
1537 SHIFT_VAR(utf8, s, strend, auint, datumtype);
1538 DO_BO_UNPACK(auint, i);
1541 else if (checksum > bits_in_uv)
1542 cdouble += (NV)auint;
1550 SHIFT_VAR(utf8, s, strend, aiv, datumtype);
1551 #if IVSIZE == INTSIZE
1552 DO_BO_UNPACK(aiv, i);
1553 #elif IVSIZE == LONGSIZE
1554 DO_BO_UNPACK(aiv, l);
1555 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
1556 DO_BO_UNPACK(aiv, 64);
1558 Perl_croak(aTHX_ "'j' not supported on this platform");
1562 else if (checksum > bits_in_uv)
1571 SHIFT_VAR(utf8, s, strend, auv, datumtype);
1572 #if IVSIZE == INTSIZE
1573 DO_BO_UNPACK(auv, i);
1574 #elif IVSIZE == LONGSIZE
1575 DO_BO_UNPACK(auv, l);
1576 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
1577 DO_BO_UNPACK(auv, 64);
1579 Perl_croak(aTHX_ "'J' not supported on this platform");
1583 else if (checksum > bits_in_uv)
1589 case 'l' | TYPE_IS_SHRIEKING:
1590 #if LONGSIZE != SIZE32
1593 SHIFT_VAR(utf8, s, strend, along, datumtype);
1594 DO_BO_UNPACK(along, l);
1597 else if (checksum > bits_in_uv)
1598 cdouble += (NV)along;
1609 #if U32SIZE > SIZE32
1612 SHIFT32(utf8, s, strend, &ai32, datumtype);
1613 DO_BO_UNPACK(ai32, 32);
1614 #if U32SIZE > SIZE32
1615 if (ai32 > 2147483647) ai32 -= 4294967296;
1619 else if (checksum > bits_in_uv)
1620 cdouble += (NV)ai32;
1625 case 'L' | TYPE_IS_SHRIEKING:
1626 #if LONGSIZE != SIZE32
1628 unsigned long aulong;
1629 SHIFT_VAR(utf8, s, strend, aulong, datumtype);
1630 DO_BO_UNPACK(aulong, l);
1633 else if (checksum > bits_in_uv)
1634 cdouble += (NV)aulong;
1647 #if U32SIZE > SIZE32
1650 SHIFT32(utf8, s, strend, &au32, datumtype);
1651 DO_BO_UNPACK(au32, 32);
1653 if (datumtype == 'N')
1654 au32 = PerlSock_ntohl(au32);
1657 if (datumtype == 'V')
1662 else if (checksum > bits_in_uv)
1663 cdouble += (NV)au32;
1668 case 'V' | TYPE_IS_SHRIEKING:
1669 case 'N' | TYPE_IS_SHRIEKING:
1672 #if U32SIZE > SIZE32
1675 SHIFT32(utf8, s, strend, &ai32, datumtype);
1677 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1678 ai32 = (I32)PerlSock_ntohl((U32)ai32);
1681 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1682 ai32 = (I32)vtohl((U32)ai32);
1686 else if (checksum > bits_in_uv)
1687 cdouble += (NV)ai32;
1695 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
1696 DO_BO_UNPACK_PC(aptr);
1697 /* newSVpv generates undef if aptr is NULL */
1698 mPUSHs(newSVpv(aptr, 0));
1706 while (len > 0 && s < strend) {
1708 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1709 auv = (auv << 7) | (ch & 0x7f);
1710 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
1718 if (++bytes >= sizeof(UV)) { /* promote to string */
1721 sv = Perl_newSVpvf(aTHX_ "%.*"UVuf, (int)TYPE_DIGITS(UV), auv);
1722 while (s < strend) {
1723 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1724 sv = mul128(sv, (U8)(ch & 0x7f));
1730 t = SvPV_nolen_const(sv);
1739 if ((s >= strend) && bytes)
1740 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
1744 if (symptr->howlen == e_star)
1745 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
1747 if (s + sizeof(char*) <= strend) {
1749 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
1750 DO_BO_UNPACK_PC(aptr);
1751 /* newSVpvn generates undef if aptr is NULL */
1752 PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP));
1759 SHIFT_VAR(utf8, s, strend, aquad, datumtype);
1760 DO_BO_UNPACK(aquad, 64);
1762 mPUSHs(aquad >= IV_MIN && aquad <= IV_MAX ?
1763 newSViv((IV)aquad) : newSVnv((NV)aquad));
1764 else if (checksum > bits_in_uv)
1765 cdouble += (NV)aquad;
1773 SHIFT_VAR(utf8, s, strend, auquad, datumtype);
1774 DO_BO_UNPACK(auquad, 64);
1776 mPUSHs(auquad <= UV_MAX ?
1777 newSVuv((UV)auquad) : newSVnv((NV)auquad));
1778 else if (checksum > bits_in_uv)
1779 cdouble += (NV)auquad;
1784 #endif /* HAS_QUAD */
1785 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
1789 SHIFT_VAR(utf8, s, strend, afloat, datumtype);
1790 DO_BO_UNPACK_N(afloat, float);
1800 SHIFT_VAR(utf8, s, strend, adouble, datumtype);
1801 DO_BO_UNPACK_N(adouble, double);
1811 SHIFT_BYTES(utf8, s, strend, anv.bytes, sizeof(anv.bytes), datumtype);
1812 DO_BO_UNPACK_N(anv.nv, NV);
1819 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
1823 SHIFT_BYTES(utf8, s, strend, aldouble.bytes, sizeof(aldouble.bytes), datumtype);
1824 DO_BO_UNPACK_N(aldouble.ld, long double);
1826 mPUSHn(aldouble.ld);
1828 cdouble += aldouble.ld;
1834 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
1835 sv = sv_2mortal(newSV(l));
1836 if (l) SvPOK_on(sv);
1839 while (next_uni_uu(aTHX_ &s, strend, &len)) {
1844 next_uni_uu(aTHX_ &s, strend, &a);
1845 next_uni_uu(aTHX_ &s, strend, &b);
1846 next_uni_uu(aTHX_ &s, strend, &c);
1847 next_uni_uu(aTHX_ &s, strend, &d);
1848 hunk[0] = (char)((a << 2) | (b >> 4));
1849 hunk[1] = (char)((b << 4) | (c >> 2));
1850 hunk[2] = (char)((c << 6) | d);
1852 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
1860 /* possible checksum byte */
1861 const char *skip = s+UTF8SKIP(s);
1862 if (skip < strend && *skip == '\n')
1868 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
1872 len = PL_uudmap[*(U8*)s++] & 077;
1874 if (s < strend && ISUUCHAR(*s))
1875 a = PL_uudmap[*(U8*)s++] & 077;
1878 if (s < strend && ISUUCHAR(*s))
1879 b = PL_uudmap[*(U8*)s++] & 077;
1882 if (s < strend && ISUUCHAR(*s))
1883 c = PL_uudmap[*(U8*)s++] & 077;
1886 if (s < strend && ISUUCHAR(*s))
1887 d = PL_uudmap[*(U8*)s++] & 077;
1890 hunk[0] = (char)((a << 2) | (b >> 4));
1891 hunk[1] = (char)((b << 4) | (c >> 2));
1892 hunk[2] = (char)((c << 6) | d);
1894 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
1899 else /* possible checksum byte */
1900 if (s + 1 < strend && s[1] == '\n')
1910 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
1911 (checksum > bits_in_uv &&
1912 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
1915 anv = (NV) (1 << (checksum & 15));
1916 while (checksum >= 16) {
1920 while (cdouble < 0.0)
1922 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
1923 sv = newSVnv(cdouble);
1926 if (checksum < bits_in_uv) {
1927 UV mask = ((UV)1 << checksum) - 1;
1936 if (symptr->flags & FLAG_SLASH){
1937 if (SP - PL_stack_base - start_sp_offset <= 0)
1939 if( next_symbol(symptr) ){
1940 if( symptr->howlen == e_number )
1941 Perl_croak(aTHX_ "Count after length/code in unpack" );
1943 /* ...end of char buffer then no decent length available */
1944 Perl_croak(aTHX_ "length/code after end of string in unpack" );
1946 /* take top of stack (hope it's numeric) */
1949 Perl_croak(aTHX_ "Negative '/' count in unpack" );
1952 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
1954 datumtype = symptr->code;
1955 explicit_length = FALSE;
1963 return SP - PL_stack_base - start_sp_offset;
1971 I32 gimme = GIMME_V;
1974 const char *pat = SvPV_const(left, llen);
1975 const char *s = SvPV_const(right, rlen);
1976 const char *strend = s + rlen;
1977 const char *patend = pat + llen;
1981 cnt = unpackstring(pat, patend, s, strend,
1982 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
1983 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
1986 if ( !cnt && gimme == G_SCALAR )
1987 PUSHs(&PL_sv_undef);
1992 doencodes(U8 *h, const char *s, I32 len)
1994 *h++ = PL_uuemap[len];
1996 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1997 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
1998 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
1999 *h++ = PL_uuemap[(077 & (s[2] & 077))];
2004 const char r = (len > 1 ? s[1] : '\0');
2005 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2006 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
2007 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
2008 *h++ = PL_uuemap[0];
2015 S_is_an_int(pTHX_ const char *s, STRLEN l)
2017 SV *result = newSVpvn(s, l);
2018 char *const result_c = SvPV_nolen(result); /* convenience */
2019 char *out = result_c;
2023 PERL_ARGS_ASSERT_IS_AN_INT;
2031 SvREFCNT_dec(result);
2054 SvREFCNT_dec(result);
2060 SvCUR_set(result, out - result_c);
2064 /* pnum must be '\0' terminated */
2066 S_div128(pTHX_ SV *pnum, bool *done)
2069 char * const s = SvPV(pnum, len);
2073 PERL_ARGS_ASSERT_DIV128;
2077 const int i = m * 10 + (*t - '0');
2078 const int r = (i >> 7); /* r < 10 */
2086 SvCUR_set(pnum, (STRLEN) (t - s));
2091 =for apidoc packlist
2093 The engine implementing pack() Perl function.
2099 Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, SV **beglist, SV **endlist )
2104 PERL_ARGS_ASSERT_PACKLIST;
2106 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
2108 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
2109 Also make sure any UTF8 flag is loaded */
2110 SvPV_force_nolen(cat);
2112 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
2114 (void)pack_rec( cat, &sym, beglist, endlist );
2117 /* like sv_utf8_upgrade, but also repoint the group start markers */
2119 marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2122 const char *from_ptr, *from_start, *from_end, **marks, **m;
2123 char *to_start, *to_ptr;
2125 if (SvUTF8(sv)) return;
2127 from_start = SvPVX_const(sv);
2128 from_end = from_start + SvCUR(sv);
2129 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2130 if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
2131 if (from_ptr == from_end) {
2132 /* Simple case: no character needs to be changed */
2137 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
2138 Newx(to_start, len, char);
2139 Copy(from_start, to_start, from_ptr-from_start, char);
2140 to_ptr = to_start + (from_ptr-from_start);
2142 Newx(marks, sym_ptr->level+2, const char *);
2143 for (group=sym_ptr; group; group = group->previous)
2144 marks[group->level] = from_start + group->strbeg;
2145 marks[sym_ptr->level+1] = from_end+1;
2146 for (m = marks; *m < from_ptr; m++)
2147 *m = to_start + (*m-from_start);
2149 for (;from_ptr < from_end; from_ptr++) {
2150 while (*m == from_ptr) *m++ = to_ptr;
2151 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
2155 while (*m == from_ptr) *m++ = to_ptr;
2156 if (m != marks + sym_ptr->level+1) {
2159 Perl_croak(aTHX_ "panic: marks beyond string end, m=%p, marks=%p, "
2160 "level=%d", m, marks, sym_ptr->level);
2162 for (group=sym_ptr; group; group = group->previous)
2163 group->strbeg = marks[group->level] - to_start;
2168 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
2169 from_start -= SvIVX(sv);
2172 SvFLAGS(sv) &= ~SVf_OOK;
2175 Safefree(from_start);
2176 SvPV_set(sv, to_start);
2177 SvCUR_set(sv, to_ptr - to_start);
2182 /* Exponential string grower. Makes string extension effectively O(n)
2183 needed says how many extra bytes we need (not counting the final '\0')
2184 Only grows the string if there is an actual lack of space
2187 S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
2188 const STRLEN cur = SvCUR(sv);
2189 const STRLEN len = SvLEN(sv);
2192 PERL_ARGS_ASSERT_SV_EXP_GROW;
2194 if (len - cur > needed) return SvPVX(sv);
2195 extend = needed > len ? needed : len;
2196 return SvGROW(sv, len+extend+1);
2201 S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
2204 tempsym_t lookahead;
2205 I32 items = endlist - beglist;
2206 bool found = next_symbol(symptr);
2207 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
2208 bool warn_utf8 = ckWARN(WARN_UTF8);
2210 PERL_ARGS_ASSERT_PACK_REC;
2212 if (symptr->level == 0 && found && symptr->code == 'U') {
2213 marked_upgrade(aTHX_ cat, symptr);
2214 symptr->flags |= FLAG_DO_UTF8;
2217 symptr->strbeg = SvCUR(cat);
2223 SV *lengthcode = NULL;
2224 I32 datumtype = symptr->code;
2225 howlen_t howlen = symptr->howlen;
2226 char *start = SvPVX(cat);
2227 char *cur = start + SvCUR(cat);
2229 #define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2233 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2237 /* e_no_len and e_number */
2238 len = symptr->length;
2243 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
2245 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2246 /* We can process this letter. */
2247 STRLEN size = props & PACK_SIZE_MASK;
2248 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2252 /* Look ahead for next symbol. Do we have code/code? */
2253 lookahead = *symptr;
2254 found = next_symbol(&lookahead);
2255 if (symptr->flags & FLAG_SLASH) {
2257 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
2258 if (strchr("aAZ", lookahead.code)) {
2259 if (lookahead.howlen == e_number) count = lookahead.length;
2262 count = sv_len_utf8(*beglist);
2265 if (lookahead.code == 'Z') count++;
2268 if (lookahead.howlen == e_number && lookahead.length < items)
2269 count = lookahead.length;
2272 lookahead.howlen = e_number;
2273 lookahead.length = count;
2274 lengthcode = sv_2mortal(newSViv(count));
2277 /* Code inside the switch must take care to properly update
2278 cat (CUR length and '\0' termination) if it updated *cur and
2279 doesn't simply leave using break */
2280 switch(TYPE_NO_ENDIANNESS(datumtype)) {
2282 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2283 (int) TYPE_NO_MODIFIERS(datumtype));
2285 Perl_croak(aTHX_ "'%%' may not be used in pack");
2288 case '.' | TYPE_IS_SHRIEKING:
2290 if (howlen == e_star) from = start;
2291 else if (len == 0) from = cur;
2293 tempsym_t *group = symptr;
2295 while (--len && group) group = group->previous;
2296 from = group ? start + group->strbeg : start;
2299 len = SvIV(fromstr);
2301 case '@' | TYPE_IS_SHRIEKING:
2303 from = start + symptr->strbeg;
2305 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
2307 while (len && from < cur) {
2308 from += UTF8SKIP(from);
2312 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2314 /* Here we know from == cur */
2316 GROWING(0, cat, start, cur, len);
2317 Zero(cur, len, char);
2319 } else if (from < cur) {
2322 } else goto no_change;
2330 if (len > 0) goto grow;
2331 if (len == 0) goto no_change;
2338 tempsym_t savsym = *symptr;
2339 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2340 symptr->flags |= group_modifiers;
2341 symptr->patend = savsym.grpend;
2343 symptr->previous = &lookahead;
2346 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2347 else symptr->flags &= ~FLAG_PARSE_UTF8;
2348 was_utf8 = SvUTF8(cat);
2349 symptr->patptr = savsym.grpbeg;
2350 beglist = pack_rec(cat, symptr, beglist, endlist);
2351 if (SvUTF8(cat) != was_utf8)
2352 /* This had better be an upgrade while in utf8==0 mode */
2355 if (savsym.howlen == e_star && beglist == endlist)
2356 break; /* No way to continue */
2358 items = endlist - beglist;
2359 lookahead.flags = symptr->flags & ~group_modifiers;
2362 case 'X' | TYPE_IS_SHRIEKING:
2363 if (!len) /* Avoid division by 0 */
2370 hop += UTF8SKIP(hop);
2377 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2381 len = (cur-start) % len;
2385 if (len < 1) goto no_change;
2389 Perl_croak(aTHX_ "'%c' outside of string in pack",
2390 (int) TYPE_NO_MODIFIERS(datumtype));
2391 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2393 Perl_croak(aTHX_ "'%c' outside of string in pack",
2394 (int) TYPE_NO_MODIFIERS(datumtype));
2400 if (cur - start < len)
2401 Perl_croak(aTHX_ "'%c' outside of string in pack",
2402 (int) TYPE_NO_MODIFIERS(datumtype));
2405 if (cur < start+symptr->strbeg) {
2406 /* Make sure group starts don't point into the void */
2408 const STRLEN length = cur-start;
2409 for (group = symptr;
2410 group && length < group->strbeg;
2411 group = group->previous) group->strbeg = length;
2412 lookahead.strbeg = length;
2415 case 'x' | TYPE_IS_SHRIEKING: {
2417 if (!len) /* Avoid division by 0 */
2419 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
2420 else ai32 = (cur - start) % len;
2421 if (ai32 == 0) goto no_change;
2433 aptr = SvPV_const(fromstr, fromlen);
2434 if (DO_UTF8(fromstr)) {
2435 const char *end, *s;
2437 if (!utf8 && !SvUTF8(cat)) {
2438 marked_upgrade(aTHX_ cat, symptr);
2439 lookahead.flags |= FLAG_DO_UTF8;
2440 lookahead.strbeg = symptr->strbeg;
2443 cur = start + SvCUR(cat);
2445 if (howlen == e_star) {
2446 if (utf8) goto string_copy;
2450 end = aptr + fromlen;
2451 fromlen = datumtype == 'Z' ? len-1 : len;
2452 while ((I32) fromlen > 0 && s < end) {
2457 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2460 if (datumtype == 'Z') len++;
2466 fromlen = len - fromlen;
2467 if (datumtype == 'Z') fromlen--;
2468 if (howlen == e_star) {
2470 if (datumtype == 'Z') len++;
2472 GROWING(0, cat, start, cur, len);
2473 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
2474 datumtype | TYPE_IS_PACK))
2475 Perl_croak(aTHX_ "panic: predicted utf8 length not available, "
2476 "for '%c', aptr=%p end=%p cur=%p, fromlen=%"UVuf,
2477 (int)datumtype, aptr, end, cur, (UV)fromlen);
2481 if (howlen == e_star) {
2483 if (datumtype == 'Z') len++;
2485 if (len <= (I32) fromlen) {
2487 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2489 /* assumes a byte expands to at most UTF8_EXPAND bytes on
2491 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2492 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
2494 while (fromlen > 0) {
2495 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
2501 if (howlen == e_star) {
2503 if (datumtype == 'Z') len++;
2505 if (len <= (I32) fromlen) {
2507 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2509 GROWING(0, cat, start, cur, len);
2510 Copy(aptr, cur, fromlen, char);
2514 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2521 const char *str, *end;
2528 str = SvPV_const(fromstr, fromlen);
2529 end = str + fromlen;
2530 if (DO_UTF8(fromstr)) {
2532 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2534 utf8_source = FALSE;
2535 utf8_flags = 0; /* Unused, but keep compilers happy */
2537 if (howlen == e_star) len = fromlen;
2538 field_len = (len+7)/8;
2539 GROWING(utf8, cat, start, cur, field_len);
2540 if (len > (I32)fromlen) len = fromlen;
2543 if (datumtype == 'B')
2547 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2549 } else bits |= *str++ & 1;
2550 if (l & 7) bits <<= 1;
2552 PUSH_BYTE(utf8, cur, bits);
2557 /* datumtype == 'b' */
2561 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2562 if (val & 1) bits |= 0x80;
2563 } else if (*str++ & 1)
2565 if (l & 7) bits >>= 1;
2567 PUSH_BYTE(utf8, cur, bits);
2573 if (datumtype == 'B')
2574 bits <<= 7 - (l & 7);
2576 bits >>= 7 - (l & 7);
2577 PUSH_BYTE(utf8, cur, bits);
2580 /* Determine how many chars are left in the requested field */
2582 if (howlen == e_star) field_len = 0;
2583 else field_len -= l;
2584 Zero(cur, field_len, char);
2590 const char *str, *end;
2597 str = SvPV_const(fromstr, fromlen);
2598 end = str + fromlen;
2599 if (DO_UTF8(fromstr)) {
2601 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2603 utf8_source = FALSE;
2604 utf8_flags = 0; /* Unused, but keep compilers happy */
2606 if (howlen == e_star) len = fromlen;
2607 field_len = (len+1)/2;
2608 GROWING(utf8, cat, start, cur, field_len);
2609 if (!utf8 && len > (I32)fromlen) len = fromlen;
2612 if (datumtype == 'H')
2616 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2617 if (val < 256 && isALPHA(val))
2618 bits |= (val + 9) & 0xf;
2621 } else if (isALPHA(*str))
2622 bits |= (*str++ + 9) & 0xf;
2624 bits |= *str++ & 0xf;
2625 if (l & 1) bits <<= 4;
2627 PUSH_BYTE(utf8, cur, bits);
2635 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2636 if (val < 256 && isALPHA(val))
2637 bits |= ((val + 9) & 0xf) << 4;
2639 bits |= (val & 0xf) << 4;
2640 } else if (isALPHA(*str))
2641 bits |= ((*str++ + 9) & 0xf) << 4;
2643 bits |= (*str++ & 0xf) << 4;
2644 if (l & 1) bits >>= 4;
2646 PUSH_BYTE(utf8, cur, bits);
2652 PUSH_BYTE(utf8, cur, bits);
2655 /* Determine how many chars are left in the requested field */
2657 if (howlen == e_star) field_len = 0;
2658 else field_len -= l;
2659 Zero(cur, field_len, char);
2667 aiv = SvIV(fromstr);
2668 if ((-128 > aiv || aiv > 127))
2669 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2670 "Character in 'c' format wrapped in pack");
2671 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
2676 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2682 aiv = SvIV(fromstr);
2683 if ((0 > aiv || aiv > 0xff))
2684 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2685 "Character in 'C' format wrapped in pack");
2686 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
2691 U8 in_bytes = (U8)IN_BYTES;
2693 end = start+SvLEN(cat)-1;
2694 if (utf8) end -= UTF8_MAXLEN-1;
2698 auv = SvUV(fromstr);
2699 if (in_bytes) auv = auv % 0x100;
2704 SvCUR_set(cat, cur - start);
2706 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2707 end = start+SvLEN(cat)-UTF8_MAXLEN;
2709 cur = (char *) uvuni_to_utf8_flags((U8 *) cur,
2712 0 : UNICODE_ALLOW_ANY);
2717 SvCUR_set(cat, cur - start);
2718 marked_upgrade(aTHX_ cat, symptr);
2719 lookahead.flags |= FLAG_DO_UTF8;
2720 lookahead.strbeg = symptr->strbeg;
2723 cur = start + SvCUR(cat);
2724 end = start+SvLEN(cat)-UTF8_MAXLEN;
2727 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2728 "Character in 'W' format wrapped in pack");
2733 SvCUR_set(cat, cur - start);
2734 GROWING(0, cat, start, cur, len+1);
2735 end = start+SvLEN(cat)-1;
2737 *(U8 *) cur++ = (U8)auv;
2746 if (!(symptr->flags & FLAG_DO_UTF8)) {
2747 marked_upgrade(aTHX_ cat, symptr);
2748 lookahead.flags |= FLAG_DO_UTF8;
2749 lookahead.strbeg = symptr->strbeg;
2755 end = start+SvLEN(cat);
2756 if (!utf8) end -= UTF8_MAXLEN;
2760 auv = SvUV(fromstr);
2762 U8 buffer[UTF8_MAXLEN], *endb;
2763 endb = uvuni_to_utf8_flags(buffer, auv,
2765 0 : UNICODE_ALLOW_ANY);
2766 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
2768 SvCUR_set(cat, cur - start);
2769 GROWING(0, cat, start, cur,
2770 len+(endb-buffer)*UTF8_EXPAND);
2771 end = start+SvLEN(cat);
2773 cur = bytes_to_uni(buffer, endb-buffer, cur);
2777 SvCUR_set(cat, cur - start);
2778 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2779 end = start+SvLEN(cat)-UTF8_MAXLEN;
2781 cur = (char *) uvuni_to_utf8_flags((U8 *) cur, auv,
2783 0 : UNICODE_ALLOW_ANY);
2788 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
2794 anv = SvNV(fromstr);
2795 # if defined(VMS) && !defined(_IEEE_FP)
2796 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
2797 * on Alpha; fake it if we don't have them.
2801 else if (anv < -FLT_MAX)
2803 else afloat = (float)anv;
2805 afloat = (float)anv;
2807 DO_BO_PACK_N(afloat, float);
2808 PUSH_VAR(utf8, cur, afloat);
2816 anv = SvNV(fromstr);
2817 # if defined(VMS) && !defined(_IEEE_FP)
2818 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
2819 * on Alpha; fake it if we don't have them.
2823 else if (anv < -DBL_MAX)
2825 else adouble = (double)anv;
2827 adouble = (double)anv;
2829 DO_BO_PACK_N(adouble, double);
2830 PUSH_VAR(utf8, cur, adouble);
2835 Zero(&anv, 1, NV); /* can be long double with unused bits */
2839 /* to work round a gcc/x86 bug; don't use SvNV */
2840 anv.nv = sv_2nv(fromstr);
2842 anv.nv = SvNV(fromstr);
2844 DO_BO_PACK_N(anv, NV);
2845 PUSH_BYTES(utf8, cur, anv.bytes, sizeof(anv.bytes));
2849 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2852 /* long doubles can have unused bits, which may be nonzero */
2853 Zero(&aldouble, 1, long double);
2857 /* to work round a gcc/x86 bug; don't use SvNV */
2858 aldouble.ld = (long double)sv_2nv(fromstr);
2860 aldouble.ld = (long double)SvNV(fromstr);
2862 DO_BO_PACK_N(aldouble, long double);
2863 PUSH_BYTES(utf8, cur, aldouble.bytes, sizeof(aldouble.bytes));
2868 case 'n' | TYPE_IS_SHRIEKING:
2873 ai16 = (I16)SvIV(fromstr);
2875 ai16 = PerlSock_htons(ai16);
2877 PUSH16(utf8, cur, &ai16);
2880 case 'v' | TYPE_IS_SHRIEKING:
2885 ai16 = (I16)SvIV(fromstr);
2889 PUSH16(utf8, cur, &ai16);
2892 case 'S' | TYPE_IS_SHRIEKING:
2893 #if SHORTSIZE != SIZE16
2895 unsigned short aushort;
2897 aushort = SvUV(fromstr);
2898 DO_BO_PACK(aushort, s);
2899 PUSH_VAR(utf8, cur, aushort);
2909 au16 = (U16)SvUV(fromstr);
2910 DO_BO_PACK(au16, 16);
2911 PUSH16(utf8, cur, &au16);
2914 case 's' | TYPE_IS_SHRIEKING:
2915 #if SHORTSIZE != SIZE16
2919 ashort = SvIV(fromstr);
2920 DO_BO_PACK(ashort, s);
2921 PUSH_VAR(utf8, cur, ashort);
2931 ai16 = (I16)SvIV(fromstr);
2932 DO_BO_PACK(ai16, 16);
2933 PUSH16(utf8, cur, &ai16);
2937 case 'I' | TYPE_IS_SHRIEKING:
2941 auint = SvUV(fromstr);
2942 DO_BO_PACK(auint, i);
2943 PUSH_VAR(utf8, cur, auint);
2950 aiv = SvIV(fromstr);
2951 #if IVSIZE == INTSIZE
2953 #elif IVSIZE == LONGSIZE
2955 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
2956 DO_BO_PACK(aiv, 64);
2958 Perl_croak(aTHX_ "'j' not supported on this platform");
2960 PUSH_VAR(utf8, cur, aiv);
2967 auv = SvUV(fromstr);
2968 #if UVSIZE == INTSIZE
2970 #elif UVSIZE == LONGSIZE
2972 #elif defined(HAS_QUAD) && UVSIZE == U64SIZE
2973 DO_BO_PACK(auv, 64);
2975 Perl_croak(aTHX_ "'J' not supported on this platform");
2977 PUSH_VAR(utf8, cur, auv);
2984 anv = SvNV(fromstr);
2988 SvCUR_set(cat, cur - start);
2989 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
2992 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
2993 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
2994 any negative IVs will have already been got by the croak()
2995 above. IOK is untrue for fractions, so we test them
2996 against UV_MAX_P1. */
2997 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
2998 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
2999 char *in = buf + sizeof(buf);
3000 UV auv = SvUV(fromstr);
3003 *--in = (char)((auv & 0x7f) | 0x80);
3006 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
3007 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3008 in, (buf + sizeof(buf)) - in);
3009 } else if (SvPOKp(fromstr))
3011 else if (SvNOKp(fromstr)) {
3012 /* 10**NV_MAX_10_EXP is the largest power of 10
3013 so 10**(NV_MAX_10_EXP+1) is definitely unrepresentable
3014 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
3015 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
3016 And with that many bytes only Inf can overflow.
3017 Some C compilers are strict about integral constant
3018 expressions so we conservatively divide by a slightly
3019 smaller integer instead of multiplying by the exact
3020 floating-point value.
3022 #ifdef NV_MAX_10_EXP
3023 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
3024 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
3026 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
3027 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
3029 char *in = buf + sizeof(buf);
3031 anv = Perl_floor(anv);
3033 const NV next = Perl_floor(anv / 128);
3034 if (in <= buf) /* this cannot happen ;-) */
3035 Perl_croak(aTHX_ "Cannot compress integer in pack");
3036 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
3039 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
3040 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3041 in, (buf + sizeof(buf)) - in);
3050 /* Copy string and check for compliance */
3051 from = SvPV_const(fromstr, len);
3052 if ((norm = is_an_int(from, len)) == NULL)
3053 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
3055 Newx(result, len, char);
3058 while (!done) *--in = div128(norm, &done) | 0x80;
3059 result[len - 1] &= 0x7F; /* clear continue bit */
3060 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3061 in, (result + len) - in);
3063 SvREFCNT_dec(norm); /* free norm */
3068 case 'i' | TYPE_IS_SHRIEKING:
3072 aint = SvIV(fromstr);
3073 DO_BO_PACK(aint, i);
3074 PUSH_VAR(utf8, cur, aint);
3077 case 'N' | TYPE_IS_SHRIEKING:
3082 au32 = SvUV(fromstr);
3084 au32 = PerlSock_htonl(au32);
3086 PUSH32(utf8, cur, &au32);
3089 case 'V' | TYPE_IS_SHRIEKING:
3094 au32 = SvUV(fromstr);
3098 PUSH32(utf8, cur, &au32);
3101 case 'L' | TYPE_IS_SHRIEKING:
3102 #if LONGSIZE != SIZE32
3104 unsigned long aulong;
3106 aulong = SvUV(fromstr);
3107 DO_BO_PACK(aulong, l);
3108 PUSH_VAR(utf8, cur, aulong);
3118 au32 = SvUV(fromstr);
3119 DO_BO_PACK(au32, 32);
3120 PUSH32(utf8, cur, &au32);
3123 case 'l' | TYPE_IS_SHRIEKING:
3124 #if LONGSIZE != SIZE32
3128 along = SvIV(fromstr);
3129 DO_BO_PACK(along, l);
3130 PUSH_VAR(utf8, cur, along);
3140 ai32 = SvIV(fromstr);
3141 DO_BO_PACK(ai32, 32);
3142 PUSH32(utf8, cur, &ai32);
3150 auquad = (Uquad_t) SvUV(fromstr);
3151 DO_BO_PACK(auquad, 64);
3152 PUSH_VAR(utf8, cur, auquad);
3159 aquad = (Quad_t)SvIV(fromstr);
3160 DO_BO_PACK(aquad, 64);
3161 PUSH_VAR(utf8, cur, aquad);
3164 #endif /* HAS_QUAD */
3166 len = 1; /* assume SV is correct length */
3167 GROWING(utf8, cat, start, cur, sizeof(char *));
3174 SvGETMAGIC(fromstr);
3175 if (!SvOK(fromstr)) aptr = NULL;
3177 /* XXX better yet, could spirit away the string to
3178 * a safe spot and hang on to it until the result
3179 * of pack() (and all copies of the result) are
3182 if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
3183 !SvREADONLY(fromstr)))) {
3184 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3185 "Attempt to pack pointer to temporary value");
3187 if (SvPOK(fromstr) || SvNIOK(fromstr))
3188 aptr = SvPV_nomg_const_nolen(fromstr);
3190 aptr = SvPV_force_flags_nolen(fromstr, 0);
3192 DO_BO_PACK_PC(aptr);
3193 PUSH_VAR(utf8, cur, aptr);
3197 const char *aptr, *aend;
3201 if (len <= 2) len = 45;
3202 else len = len / 3 * 3;
3204 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3205 "Field too wide in 'u' format in pack");
3208 aptr = SvPV_const(fromstr, fromlen);
3209 from_utf8 = DO_UTF8(fromstr);
3211 aend = aptr + fromlen;
3212 fromlen = sv_len_utf8_nomg(fromstr);
3213 } else aend = NULL; /* Unused, but keep compilers happy */
3214 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
3215 while (fromlen > 0) {
3218 U8 hunk[1+63/3*4+1];
3220 if ((I32)fromlen > len)
3226 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3227 'u' | TYPE_IS_PACK)) {
3229 SvCUR_set(cat, cur - start);
3230 Perl_croak(aTHX_ "panic: string is shorter than advertised, "
3231 "aptr=%p, aend=%p, buffer=%p, todo=%ld",
3232 aptr, aend, buffer, (long) todo);
3234 end = doencodes(hunk, buffer, todo);
3236 end = doencodes(hunk, aptr, todo);
3239 PUSH_BYTES(utf8, cur, hunk, end-hunk);
3246 SvCUR_set(cat, cur - start);
3248 *symptr = lookahead;
3257 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
3260 SV *pat_sv = *++MARK;
3261 const char *pat = SvPV_const(pat_sv, fromlen);
3262 const char *patend = pat + fromlen;
3268 packlist(cat, pat, patend, MARK, SP + 1);
3278 * c-indentation-style: bsd
3280 * indent-tabs-mode: nil
3283 * ex: set ts=8 sts=4 sw=4 et: