3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
18 * [p.653 of _The Lord of the Rings_, IV/iv: "Of Herbs and Stewed Rabbit"]
21 /* This file contains pp ("push/pop") functions that
22 * execute the opcodes that make up a perl program. A typical pp function
23 * expects to find its arguments on the stack, and usually pushes its
24 * results onto the stack, hence the 'pp' terminology. Each OP structure
25 * contains a pointer to the relevant pp_foo() function.
27 * This particular file just contains pp_pack() and pp_unpack(). See the
28 * other pp*.c files for the rest of the pp_ functions.
32 #define PERL_IN_PP_PACK_C
35 /* Types used by pack/unpack */
37 e_no_len, /* no length */
38 e_number, /* number, [] */
42 typedef struct tempsym {
43 const char* patptr; /* current template char */
44 const char* patend; /* one after last char */
45 const char* grpbeg; /* 1st char of ()-group */
46 const char* grpend; /* end of ()-group */
47 I32 code; /* template code (!<>) */
48 I32 length; /* length/repeat count */
49 howlen_t howlen; /* how length is given */
50 int level; /* () nesting level */
51 U32 flags; /* /=4, comma=2, pack=1 */
52 /* and group modifiers */
53 STRLEN strbeg; /* offset of group start */
54 struct tempsym *previous; /* previous group */
57 #define TEMPSYM_INIT(symptr, p, e, f) \
59 (symptr)->patptr = (p); \
60 (symptr)->patend = (e); \
61 (symptr)->grpbeg = NULL; \
62 (symptr)->grpend = NULL; \
63 (symptr)->grpend = NULL; \
65 (symptr)->length = 0; \
66 (symptr)->howlen = e_no_len; \
67 (symptr)->level = 0; \
68 (symptr)->flags = (f); \
69 (symptr)->strbeg = 0; \
70 (symptr)->previous = NULL; \
78 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
81 U8 bytes[sizeof(long double)];
88 /* Maximum number of bytes to which a byte can grow due to upgrade */
92 * Offset for integer pack/unpack.
94 * On architectures where I16 and I32 aren't really 16 and 32 bits,
95 * which for now are all Crays, pack and unpack have to play games.
99 * These values are required for portability of pack() output.
100 * If they're not right on your machine, then pack() and unpack()
101 * wouldn't work right anyway; you'll need to apply the Cray hack.
102 * (I'd like to check them with #if, but you can't use sizeof() in
103 * the preprocessor.) --???
106 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
107 defines are now in config.h. --Andy Dougherty April 1998
112 /* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
115 #if U16SIZE > SIZE16 || U32SIZE > SIZE32
116 # if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
117 # define OFF16(p) ((char*)(p))
118 # define OFF32(p) ((char*)(p))
120 # if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
121 # define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
122 # define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
124 ++++ bad cray byte order
128 # define OFF16(p) ((char *) (p))
129 # define OFF32(p) ((char *) (p))
132 /* Only to be used inside a loop (see the break) */
133 #define SHIFT16(utf8, s, strend, p, datumtype) STMT_START { \
135 if (!uni_to_bytes(aTHX_ &(s), strend, OFF16(p), SIZE16, datumtype)) break; \
137 Copy(s, OFF16(p), SIZE16, char); \
142 /* Only to be used inside a loop (see the break) */
143 #define SHIFT32(utf8, s, strend, p, datumtype) STMT_START { \
145 if (!uni_to_bytes(aTHX_ &(s), strend, OFF32(p), SIZE32, datumtype)) break; \
147 Copy(s, OFF32(p), SIZE32, char); \
152 #define PUSH16(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF16(p), SIZE16)
153 #define PUSH32(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF32(p), SIZE32)
155 /* Only to be used inside a loop (see the break) */
156 #define SHIFT_BYTES(utf8, s, strend, buf, len, datumtype) \
159 if (!uni_to_bytes(aTHX_ &s, strend, \
160 (char *) (buf), len, datumtype)) break; \
162 Copy(s, (char *) (buf), len, char); \
167 #define SHIFT_VAR(utf8, s, strend, var, datumtype) \
168 SHIFT_BYTES(utf8, s, strend, &(var), sizeof(var), datumtype)
170 #define PUSH_VAR(utf8, aptr, var) \
171 PUSH_BYTES(utf8, aptr, &(var), sizeof(var))
173 /* Avoid stack overflow due to pathological templates. 100 should be plenty. */
174 #define MAX_SUB_TEMPLATE_LEVEL 100
176 /* flags (note that type modifiers can also be used as flags!) */
177 #define FLAG_WAS_UTF8 0x40
178 #define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
179 #define FLAG_UNPACK_ONLY_ONE 0x10
180 #define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
181 #define FLAG_SLASH 0x04
182 #define FLAG_COMMA 0x02
183 #define FLAG_PACK 0x01
186 S_mul128(pTHX_ SV *sv, U8 m)
189 char *s = SvPV(sv, len);
192 PERL_ARGS_ASSERT_MUL128;
194 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
195 SV * const tmpNew = newSVpvs("0000000000");
197 sv_catsv(tmpNew, sv);
198 SvREFCNT_dec(sv); /* free old sv */
203 while (!*t) /* trailing '\0'? */
206 const U32 i = ((*t - '0') << 7) + m;
207 *(t--) = '0' + (char)(i % 10);
213 /* Explosives and implosives. */
215 #if 'I' == 73 && 'J' == 74
216 /* On an ASCII/ISO kind of system */
217 #define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
220 Some other sort of character set - use memchr() so we don't match
223 #define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
227 #define TYPE_IS_SHRIEKING 0x100
228 #define TYPE_IS_BIG_ENDIAN 0x200
229 #define TYPE_IS_LITTLE_ENDIAN 0x400
230 #define TYPE_IS_PACK 0x800
231 #define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
232 #define TYPE_MODIFIERS(t) ((t) & ~0xFF)
233 #define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
235 # define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
236 # define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
238 # define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
240 # define DO_BO_UNPACK(var, type) \
242 switch (TYPE_ENDIANNESS(datumtype)) { \
243 case TYPE_IS_BIG_ENDIAN: var = my_betoh ## type (var); break; \
244 case TYPE_IS_LITTLE_ENDIAN: var = my_letoh ## type (var); break; \
249 # define DO_BO_PACK(var, type) \
251 switch (TYPE_ENDIANNESS(datumtype)) { \
252 case TYPE_IS_BIG_ENDIAN: var = my_htobe ## type (var); break; \
253 case TYPE_IS_LITTLE_ENDIAN: var = my_htole ## type (var); break; \
258 # define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast) \
260 switch (TYPE_ENDIANNESS(datumtype)) { \
261 case TYPE_IS_BIG_ENDIAN: \
262 var = (post_cast*) my_betoh ## type ((pre_cast) var); \
264 case TYPE_IS_LITTLE_ENDIAN: \
265 var = (post_cast *) my_letoh ## type ((pre_cast) var); \
272 # define DO_BO_PACK_PTR(var, type, pre_cast, post_cast) \
274 switch (TYPE_ENDIANNESS(datumtype)) { \
275 case TYPE_IS_BIG_ENDIAN: \
276 var = (post_cast *) my_htobe ## type ((pre_cast) var); \
278 case TYPE_IS_LITTLE_ENDIAN: \
279 var = (post_cast *) my_htole ## type ((pre_cast) var); \
286 # define BO_CANT_DOIT(action, type) \
288 switch (TYPE_ENDIANNESS(datumtype)) { \
289 case TYPE_IS_BIG_ENDIAN: \
290 Perl_croak(aTHX_ "Can't %s big-endian %ss on this " \
291 "platform", #action, #type); \
293 case TYPE_IS_LITTLE_ENDIAN: \
294 Perl_croak(aTHX_ "Can't %s little-endian %ss on this " \
295 "platform", #action, #type); \
302 # if PTRSIZE == INTSIZE
303 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, i, int, char)
304 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, i, int, char)
305 # elif PTRSIZE == LONGSIZE
306 # if LONGSIZE < IVSIZE && IVSIZE == 8
307 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, 64, IV, char)
308 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, 64, IV, char)
310 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, IV, char)
311 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, IV, char)
313 # elif PTRSIZE == IVSIZE
314 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, IV, char)
315 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, IV, char)
317 # define DO_BO_UNPACK_PC(var) BO_CANT_DOIT(unpack, pointer)
318 # define DO_BO_PACK_PC(var) BO_CANT_DOIT(pack, pointer)
321 # if defined(my_htolen) && defined(my_letohn) && \
322 defined(my_htoben) && defined(my_betohn)
323 # define DO_BO_UNPACK_N(var, type) \
325 switch (TYPE_ENDIANNESS(datumtype)) { \
326 case TYPE_IS_BIG_ENDIAN: my_betohn(&var, sizeof(type)); break;\
327 case TYPE_IS_LITTLE_ENDIAN: my_letohn(&var, sizeof(type)); break;\
332 # define DO_BO_PACK_N(var, type) \
334 switch (TYPE_ENDIANNESS(datumtype)) { \
335 case TYPE_IS_BIG_ENDIAN: my_htoben(&var, sizeof(type)); break;\
336 case TYPE_IS_LITTLE_ENDIAN: my_htolen(&var, sizeof(type)); break;\
341 # define DO_BO_UNPACK_N(var, type) BO_CANT_DOIT(unpack, type)
342 # define DO_BO_PACK_N(var, type) BO_CANT_DOIT(pack, type)
345 #define PACK_SIZE_CANNOT_CSUM 0x80
346 #define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
347 #define PACK_SIZE_MASK 0x3F
349 #include "packsizetables.c"
352 uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
355 UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
356 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
357 /* We try to process malformed UTF-8 as much as possible (preferably with
358 warnings), but these two mean we make no progress in the string and
359 might enter an infinite loop */
360 if (retlen == (STRLEN) -1 || retlen == 0)
361 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
362 (int) TYPE_NO_MODIFIERS(datumtype));
364 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
365 "Character in '%c' format wrapped in unpack",
366 (int) TYPE_NO_MODIFIERS(datumtype));
373 #define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
374 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
378 uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
382 const char *from = *s;
384 const U32 flags = ckWARN(WARN_UTF8) ?
385 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
386 for (;buf_len > 0; buf_len--) {
387 if (from >= end) return FALSE;
388 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
389 if (retlen == (STRLEN) -1 || retlen == 0) {
390 from += UTF8SKIP(from);
392 } else from += retlen;
397 *(U8 *)buf++ = (U8)val;
399 /* We have enough characters for the buffer. Did we have problems ? */
402 /* Rewalk the string fragment while warning */
404 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
405 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
406 if (ptr >= end) break;
407 utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
409 if (from > end) from = end;
412 Perl_ck_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
413 WARN_PACK : WARN_UNPACK),
414 "Character(s) in '%c' format wrapped in %s",
415 (int) TYPE_NO_MODIFIERS(datumtype),
416 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
423 next_uni_uu(pTHX_ const char **s, const char *end, I32 *out)
427 const UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
428 if (val >= 0x100 || !ISUUCHAR(val) ||
429 retlen == (STRLEN) -1 || retlen == 0) {
433 *out = PL_uudmap[val] & 077;
439 S_bytes_to_uni(const U8 *start, STRLEN len, char *dest) {
440 const U8 * const end = start + len;
442 PERL_ARGS_ASSERT_BYTES_TO_UNI;
444 while (start < end) {
445 const UV uv = NATIVE_TO_ASCII(*start);
446 if (UNI_IS_INVARIANT(uv))
447 *dest++ = (char)(U8)UTF_TO_NATIVE(uv);
449 *dest++ = (char)(U8)UTF8_EIGHT_BIT_HI(uv);
450 *dest++ = (char)(U8)UTF8_EIGHT_BIT_LO(uv);
457 #define PUSH_BYTES(utf8, cur, buf, len) \
460 (cur) = bytes_to_uni((U8 *) buf, len, (cur)); \
462 Copy(buf, cur, len, char); \
467 #define GROWING(utf8, cat, start, cur, in_len) \
469 STRLEN glen = (in_len); \
470 if (utf8) glen *= UTF8_EXPAND; \
471 if ((cur) + glen >= (start) + SvLEN(cat)) { \
472 (start) = sv_exp_grow(cat, glen); \
473 (cur) = (start) + SvCUR(cat); \
477 #define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
479 const STRLEN glen = (in_len); \
481 if (utf8) gl *= UTF8_EXPAND; \
482 if ((cur) + gl >= (start) + SvLEN(cat)) { \
484 SvCUR_set((cat), (cur) - (start)); \
485 (start) = sv_exp_grow(cat, gl); \
486 (cur) = (start) + SvCUR(cat); \
488 PUSH_BYTES(utf8, cur, buf, glen); \
491 #define PUSH_BYTE(utf8, s, byte) \
494 const U8 au8 = (byte); \
495 (s) = bytes_to_uni(&au8, 1, (s)); \
496 } else *(U8 *)(s)++ = (byte); \
499 /* Only to be used inside a loop (see the break) */
500 #define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
503 if (str >= end) break; \
504 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
505 if (retlen == (STRLEN) -1 || retlen == 0) { \
507 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
512 static const char *_action( const tempsym_t* symptr )
514 return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
517 /* Returns the sizeof() struct described by pat */
519 S_measure_struct(pTHX_ tempsym_t* symptr)
523 PERL_ARGS_ASSERT_MEASURE_STRUCT;
525 while (next_symbol(symptr)) {
529 switch (symptr->howlen) {
531 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
535 /* e_no_len and e_number */
536 len = symptr->length;
540 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
543 /* endianness doesn't influence the size of a type */
544 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
546 Perl_croak(aTHX_ "Invalid type '%c' in %s",
547 (int)TYPE_NO_MODIFIERS(symptr->code),
549 case '.' | TYPE_IS_SHRIEKING:
550 case '@' | TYPE_IS_SHRIEKING:
554 case 'U': /* XXXX Is it correct? */
557 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
558 (int) TYPE_NO_MODIFIERS(symptr->code),
565 tempsym_t savsym = *symptr;
566 symptr->patptr = savsym.grpbeg;
567 symptr->patend = savsym.grpend;
568 /* XXXX Theoretically, we need to measure many times at
569 different positions, since the subexpression may contain
570 alignment commands, but be not of aligned length.
571 Need to detect this and croak(). */
572 size = measure_struct(symptr);
576 case 'X' | TYPE_IS_SHRIEKING:
577 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
579 if (!len) /* Avoid division by 0 */
581 len = total % len; /* Assumed: the start is aligned. */
586 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
588 case 'x' | TYPE_IS_SHRIEKING:
589 if (!len) /* Avoid division by 0 */
591 star = total % len; /* Assumed: the start is aligned. */
592 if (star) /* Other portable ways? */
616 size = sizeof(char*);
626 /* locate matching closing parenthesis or bracket
627 * returns char pointer to char after match, or NULL
630 S_group_end(pTHX_ const char *patptr, const char *patend, char ender)
632 PERL_ARGS_ASSERT_GROUP_END;
634 while (patptr < patend) {
635 const char c = *patptr++;
642 while (patptr < patend && *patptr != '\n')
646 patptr = group_end(patptr, patend, ')') + 1;
648 patptr = group_end(patptr, patend, ']') + 1;
650 Perl_croak(aTHX_ "No group ending character '%c' found in template",
656 /* Convert unsigned decimal number to binary.
657 * Expects a pointer to the first digit and address of length variable
658 * Advances char pointer to 1st non-digit char and returns number
661 S_get_num(pTHX_ const char *patptr, I32 *lenptr )
663 I32 len = *patptr++ - '0';
665 PERL_ARGS_ASSERT_GET_NUM;
667 while (isDIGIT(*patptr)) {
668 if (len >= 0x7FFFFFFF/10)
669 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
670 len = (len * 10) + (*patptr++ - '0');
676 /* The marvellous template parsing routine: Using state stored in *symptr,
677 * locates next template code and count
680 S_next_symbol(pTHX_ tempsym_t* symptr )
682 const char* patptr = symptr->patptr;
683 const char* const patend = symptr->patend;
685 PERL_ARGS_ASSERT_NEXT_SYMBOL;
687 symptr->flags &= ~FLAG_SLASH;
689 while (patptr < patend) {
690 if (isSPACE(*patptr))
692 else if (*patptr == '#') {
694 while (patptr < patend && *patptr != '\n')
699 /* We should have found a template code */
700 I32 code = *patptr++ & 0xFF;
701 U32 inherited_modifiers = 0;
703 if (code == ','){ /* grandfather in commas but with a warning */
704 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
705 symptr->flags |= FLAG_COMMA;
706 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
707 "Invalid type ',' in %s", _action( symptr ) );
712 /* for '(', skip to ')' */
714 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
715 Perl_croak(aTHX_ "()-group starts with a count in %s",
717 symptr->grpbeg = patptr;
718 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
719 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
720 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
724 /* look for group modifiers to inherit */
725 if (TYPE_ENDIANNESS(symptr->flags)) {
726 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
727 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
730 /* look for modifiers */
731 while (patptr < patend) {
736 modifier = TYPE_IS_SHRIEKING;
737 allowed = "sSiIlLxXnNvV@.";
740 modifier = TYPE_IS_BIG_ENDIAN;
741 allowed = ENDIANNESS_ALLOWED_TYPES;
744 modifier = TYPE_IS_LITTLE_ENDIAN;
745 allowed = ENDIANNESS_ALLOWED_TYPES;
756 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
757 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
758 allowed, _action( symptr ) );
760 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
761 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
762 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
763 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
764 TYPE_ENDIANNESS_MASK)
765 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
766 *patptr, _action( symptr ) );
768 if ((code & modifier)) {
769 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
770 "Duplicate modifier '%c' after '%c' in %s",
771 *patptr, (int) TYPE_NO_MODIFIERS(code),
779 /* inherit modifiers */
780 code |= inherited_modifiers;
782 /* look for count and/or / */
783 if (patptr < patend) {
784 if (isDIGIT(*patptr)) {
785 patptr = get_num( patptr, &symptr->length );
786 symptr->howlen = e_number;
788 } else if (*patptr == '*') {
790 symptr->howlen = e_star;
792 } else if (*patptr == '[') {
793 const char* lenptr = ++patptr;
794 symptr->howlen = e_number;
795 patptr = group_end( patptr, patend, ']' ) + 1;
796 /* what kind of [] is it? */
797 if (isDIGIT(*lenptr)) {
798 lenptr = get_num( lenptr, &symptr->length );
800 Perl_croak(aTHX_ "Malformed integer in [] in %s",
803 tempsym_t savsym = *symptr;
804 symptr->patend = patptr-1;
805 symptr->patptr = lenptr;
806 savsym.length = measure_struct(symptr);
810 symptr->howlen = e_no_len;
815 while (patptr < patend) {
816 if (isSPACE(*patptr))
818 else if (*patptr == '#') {
820 while (patptr < patend && *patptr != '\n')
825 if (*patptr == '/') {
826 symptr->flags |= FLAG_SLASH;
828 if (patptr < patend &&
829 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
830 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
837 /* at end - no count, no / */
838 symptr->howlen = e_no_len;
843 symptr->patptr = patptr;
847 symptr->patptr = patptr;
852 There is no way to cleanly handle the case where we should process the
853 string per byte in its upgraded form while it's really in downgraded form
854 (e.g. estimates like strend-s as an upper bound for the number of
855 characters left wouldn't work). So if we foresee the need of this
856 (pattern starts with U or contains U0), we want to work on the encoded
857 version of the string. Users are advised to upgrade their pack string
858 themselves if they need to do a lot of unpacks like this on it
861 need_utf8(const char *pat, const char *patend)
865 PERL_ARGS_ASSERT_NEED_UTF8;
867 while (pat < patend) {
870 pat = (const char *) memchr(pat, '\n', patend-pat);
871 if (!pat) return FALSE;
872 } else if (pat[0] == 'U') {
873 if (first || pat[1] == '0') return TRUE;
874 } else first = FALSE;
881 first_symbol(const char *pat, const char *patend) {
882 PERL_ARGS_ASSERT_FIRST_SYMBOL;
884 while (pat < patend) {
885 if (pat[0] != '#') return pat[0];
887 pat = (const char *) memchr(pat, '\n', patend-pat);
895 =for apidoc unpackstring
897 The engine implementing the unpack() Perl function.
899 Using the template pat..patend, this function unpacks the string
900 s..strend into a number of mortal SVs, which it pushes onto the perl
901 argument (@_) stack (so you will need to issue a C<PUTBACK> before and
902 C<SPAGAIN> after the call to this function). It returns the number of
905 The strend and patend pointers should point to the byte following the last
906 character of each string.
908 Although this function returns its values on the perl argument stack, it
909 doesn't take any parameters from that stack (and thus in particular
910 there's no need to do a PUSHMARK before calling it, unlike L</call_pv> for
916 Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
920 PERL_ARGS_ASSERT_UNPACKSTRING;
922 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
923 else if (need_utf8(pat, patend)) {
924 /* We probably should try to avoid this in case a scalar context call
925 wouldn't get to the "U0" */
926 STRLEN len = strend - s;
927 s = (char *) bytes_to_utf8((U8 *) s, &len);
930 flags |= FLAG_DO_UTF8;
933 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
934 flags |= FLAG_PARSE_UTF8;
936 TEMPSYM_INIT(&sym, pat, patend, flags);
938 return unpack_rec(&sym, s, s, strend, NULL );
942 S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
946 const I32 start_sp_offset = SP - PL_stack_base;
951 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
953 bool explicit_length;
954 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
955 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
957 PERL_ARGS_ASSERT_UNPACK_REC;
959 symptr->strbeg = s - strbeg;
961 while (next_symbol(symptr)) {
964 I32 datumtype = symptr->code;
965 /* do first one only unless in list context
966 / is implemented by unpacking the count, then popping it from the
967 stack, so must check that we're not in the middle of a / */
969 && (SP - PL_stack_base == start_sp_offset + 1)
970 && (datumtype != '/') ) /* XXX can this be omitted */
973 switch (howlen = symptr->howlen) {
975 len = strend - strbeg; /* long enough */
978 /* e_no_len and e_number */
979 len = symptr->length;
983 explicit_length = TRUE;
985 beyond = s >= strend;
987 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
989 /* props nonzero means we can process this letter. */
990 const long size = props & PACK_SIZE_MASK;
991 const long howmany = (strend - s) / size;
995 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
996 if (len && unpack_only_one) len = 1;
1002 switch(TYPE_NO_ENDIANNESS(datumtype)) {
1004 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
1007 if (howlen == e_no_len)
1008 len = 16; /* len is not specified */
1016 tempsym_t savsym = *symptr;
1017 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
1018 symptr->flags |= group_modifiers;
1019 symptr->patend = savsym.grpend;
1020 symptr->previous = &savsym;
1023 if (len && unpack_only_one) len = 1;
1025 symptr->patptr = savsym.grpbeg;
1026 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
1027 else symptr->flags &= ~FLAG_PARSE_UTF8;
1028 unpack_rec(symptr, s, strbeg, strend, &s);
1029 if (s == strend && savsym.howlen == e_star)
1030 break; /* No way to continue */
1033 savsym.flags = symptr->flags & ~group_modifiers;
1037 case '.' | TYPE_IS_SHRIEKING:
1041 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
1042 if (howlen == e_star) from = strbeg;
1043 else if (len <= 0) from = s;
1045 tempsym_t *group = symptr;
1047 while (--len && group) group = group->previous;
1048 from = group ? strbeg + group->strbeg : strbeg;
1051 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
1052 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
1056 case '@' | TYPE_IS_SHRIEKING:
1058 s = strbeg + symptr->strbeg;
1059 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
1063 Perl_croak(aTHX_ "'@' outside of string in unpack");
1068 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
1071 Perl_croak(aTHX_ "'@' outside of string in unpack");
1075 case 'X' | TYPE_IS_SHRIEKING:
1076 if (!len) /* Avoid division by 0 */
1079 const char *hop, *last;
1081 hop = last = strbeg;
1083 hop += UTF8SKIP(hop);
1090 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1094 len = (s - strbeg) % len;
1100 Perl_croak(aTHX_ "'X' outside of string in unpack");
1101 while (--s, UTF8_IS_CONTINUATION(*s)) {
1103 Perl_croak(aTHX_ "'X' outside of string in unpack");
1108 if (len > s - strbeg)
1109 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1113 case 'x' | TYPE_IS_SHRIEKING: {
1115 if (!len) /* Avoid division by 0 */
1117 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1118 else ai32 = (s - strbeg) % len;
1119 if (ai32 == 0) break;
1127 Perl_croak(aTHX_ "'x' outside of string in unpack");
1132 if (len > strend - s)
1133 Perl_croak(aTHX_ "'x' outside of string in unpack");
1138 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1144 /* Preliminary length estimate is assumed done in 'W' */
1145 if (len > strend - s) len = strend - s;
1151 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1152 if (hop >= strend) {
1154 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1159 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1161 } else if (len > strend - s)
1164 if (datumtype == 'Z') {
1165 /* 'Z' strips stuff after first null */
1166 const char *ptr, *end;
1168 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
1169 sv = newSVpvn(s, ptr-s);
1170 if (howlen == e_star) /* exact for 'Z*' */
1171 len = ptr-s + (ptr != strend ? 1 : 0);
1172 } else if (datumtype == 'A') {
1173 /* 'A' strips both nulls and spaces */
1175 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1176 for (ptr = s+len-1; ptr >= s; ptr--)
1177 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
1178 !isSPACE_utf8(ptr)) break;
1179 if (ptr >= s) ptr += UTF8SKIP(ptr);
1182 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1184 for (ptr = s+len-1; ptr >= s; ptr--)
1185 if (*ptr != 0 && !isSPACE(*ptr)) break;
1188 sv = newSVpvn(s, ptr-s);
1189 } else sv = newSVpvn(s, len);
1193 /* Undo any upgrade done due to need_utf8() */
1194 if (!(symptr->flags & FLAG_WAS_UTF8))
1195 sv_utf8_downgrade(sv, 0);
1203 if (howlen == e_star || len > (strend - s) * 8)
1204 len = (strend - s) * 8;
1207 while (len >= 8 && s < strend) {
1208 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
1213 cuv += PL_bitcount[*(U8 *)s++];
1216 if (len && s < strend) {
1218 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1219 if (datumtype == 'b')
1221 if (bits & 1) cuv++;
1226 if (bits & 0x80) cuv++;
1233 sv = sv_2mortal(newSV(len ? len : 1));
1236 if (datumtype == 'b') {
1238 const I32 ai32 = len;
1239 for (len = 0; len < ai32; len++) {
1240 if (len & 7) bits >>= 1;
1242 if (s >= strend) break;
1243 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1244 } else bits = *(U8 *) s++;
1245 *str++ = bits & 1 ? '1' : '0';
1249 const I32 ai32 = len;
1250 for (len = 0; len < ai32; len++) {
1251 if (len & 7) bits <<= 1;
1253 if (s >= strend) break;
1254 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1255 } else bits = *(U8 *) s++;
1256 *str++ = bits & 0x80 ? '1' : '0';
1260 SvCUR_set(sv, str - SvPVX_const(sv));
1267 /* Preliminary length estimate, acceptable for utf8 too */
1268 if (howlen == e_star || len > (strend - s) * 2)
1269 len = (strend - s) * 2;
1271 sv = sv_2mortal(newSV(len ? len : 1));
1275 if (datumtype == 'h') {
1278 for (len = 0; len < ai32; len++) {
1279 if (len & 1) bits >>= 4;
1281 if (s >= strend) break;
1282 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1283 } else bits = * (U8 *) s++;
1285 *str++ = PL_hexdigit[bits & 15];
1289 const I32 ai32 = len;
1290 for (len = 0; len < ai32; len++) {
1291 if (len & 1) bits <<= 4;
1293 if (s >= strend) break;
1294 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1295 } else bits = *(U8 *) s++;
1297 *str++ = PL_hexdigit[(bits >> 4) & 15];
1302 SvCUR_set(sv, str - SvPVX_const(sv));
1309 if (explicit_length)
1310 /* Switch to "character" mode */
1311 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1316 while (len-- > 0 && s < strend) {
1321 aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1322 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1323 if (retlen == (STRLEN) -1 || retlen == 0)
1324 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1328 aint = *(U8 *)(s)++;
1329 if (aint >= 128 && datumtype != 'C') /* fake up signed chars */
1333 else if (checksum > bits_in_uv)
1334 cdouble += (NV)aint;
1342 while (len-- > 0 && s < strend) {
1344 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1345 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1346 if (retlen == (STRLEN) -1 || retlen == 0)
1347 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1351 else if (checksum > bits_in_uv)
1352 cdouble += (NV) val;
1356 } else if (!checksum)
1358 const U8 ch = *(U8 *) s++;
1361 else if (checksum > bits_in_uv)
1362 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1364 while (len-- > 0) cuv += *(U8 *) s++;
1368 if (explicit_length && howlen != e_star) {
1369 /* Switch to "bytes in UTF-8" mode */
1370 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
1372 /* Should be impossible due to the need_utf8() test */
1373 Perl_croak(aTHX_ "U0 mode on a byte string");
1377 if (len > strend - s) len = strend - s;
1379 if (len && unpack_only_one) len = 1;
1383 while (len-- > 0 && s < strend) {
1387 U8 result[UTF8_MAXLEN];
1388 const char *ptr = s;
1390 /* Bug: warns about bad utf8 even if we are short on bytes
1391 and will break out of the loop */
1392 if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1395 len = UTF8SKIP(result);
1396 if (!uni_to_bytes(aTHX_ &ptr, strend,
1397 (char *) &result[1], len-1, 'U')) break;
1398 auv = utf8n_to_uvuni(result, len, &retlen, UTF8_ALLOW_DEFAULT);
1401 auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, UTF8_ALLOW_DEFAULT);
1402 if (retlen == (STRLEN) -1 || retlen == 0)
1403 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1408 else if (checksum > bits_in_uv)
1409 cdouble += (NV) auv;
1414 case 's' | TYPE_IS_SHRIEKING:
1415 #if SHORTSIZE != SIZE16
1418 SHIFT_VAR(utf8, s, strend, ashort, datumtype);
1419 DO_BO_UNPACK(ashort, s);
1422 else if (checksum > bits_in_uv)
1423 cdouble += (NV)ashort;
1435 #if U16SIZE > SIZE16
1438 SHIFT16(utf8, s, strend, &ai16, datumtype);
1439 DO_BO_UNPACK(ai16, 16);
1440 #if U16SIZE > SIZE16
1446 else if (checksum > bits_in_uv)
1447 cdouble += (NV)ai16;
1452 case 'S' | TYPE_IS_SHRIEKING:
1453 #if SHORTSIZE != SIZE16
1455 unsigned short aushort;
1456 SHIFT_VAR(utf8, s, strend, aushort, datumtype);
1457 DO_BO_UNPACK(aushort, s);
1460 else if (checksum > bits_in_uv)
1461 cdouble += (NV)aushort;
1474 #if U16SIZE > SIZE16
1477 SHIFT16(utf8, s, strend, &au16, datumtype);
1478 DO_BO_UNPACK(au16, 16);
1480 if (datumtype == 'n')
1481 au16 = PerlSock_ntohs(au16);
1483 if (datumtype == 'v')
1487 else if (checksum > bits_in_uv)
1488 cdouble += (NV) au16;
1493 case 'v' | TYPE_IS_SHRIEKING:
1494 case 'n' | TYPE_IS_SHRIEKING:
1497 # if U16SIZE > SIZE16
1500 SHIFT16(utf8, s, strend, &ai16, datumtype);
1502 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
1503 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1504 # endif /* HAS_NTOHS */
1505 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
1506 ai16 = (I16) vtohs((U16) ai16);
1509 else if (checksum > bits_in_uv)
1510 cdouble += (NV) ai16;
1516 case 'i' | TYPE_IS_SHRIEKING:
1519 SHIFT_VAR(utf8, s, strend, aint, datumtype);
1520 DO_BO_UNPACK(aint, i);
1523 else if (checksum > bits_in_uv)
1524 cdouble += (NV)aint;
1530 case 'I' | TYPE_IS_SHRIEKING:
1533 SHIFT_VAR(utf8, s, strend, auint, datumtype);
1534 DO_BO_UNPACK(auint, i);
1537 else if (checksum > bits_in_uv)
1538 cdouble += (NV)auint;
1546 SHIFT_VAR(utf8, s, strend, aiv, datumtype);
1547 #if IVSIZE == INTSIZE
1548 DO_BO_UNPACK(aiv, i);
1549 #elif IVSIZE == LONGSIZE
1550 DO_BO_UNPACK(aiv, l);
1551 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
1552 DO_BO_UNPACK(aiv, 64);
1554 Perl_croak(aTHX_ "'j' not supported on this platform");
1558 else if (checksum > bits_in_uv)
1567 SHIFT_VAR(utf8, s, strend, auv, datumtype);
1568 #if IVSIZE == INTSIZE
1569 DO_BO_UNPACK(auv, i);
1570 #elif IVSIZE == LONGSIZE
1571 DO_BO_UNPACK(auv, l);
1572 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
1573 DO_BO_UNPACK(auv, 64);
1575 Perl_croak(aTHX_ "'J' not supported on this platform");
1579 else if (checksum > bits_in_uv)
1585 case 'l' | TYPE_IS_SHRIEKING:
1586 #if LONGSIZE != SIZE32
1589 SHIFT_VAR(utf8, s, strend, along, datumtype);
1590 DO_BO_UNPACK(along, l);
1593 else if (checksum > bits_in_uv)
1594 cdouble += (NV)along;
1605 #if U32SIZE > SIZE32
1608 SHIFT32(utf8, s, strend, &ai32, datumtype);
1609 DO_BO_UNPACK(ai32, 32);
1610 #if U32SIZE > SIZE32
1611 if (ai32 > 2147483647) ai32 -= 4294967296;
1615 else if (checksum > bits_in_uv)
1616 cdouble += (NV)ai32;
1621 case 'L' | TYPE_IS_SHRIEKING:
1622 #if LONGSIZE != SIZE32
1624 unsigned long aulong;
1625 SHIFT_VAR(utf8, s, strend, aulong, datumtype);
1626 DO_BO_UNPACK(aulong, l);
1629 else if (checksum > bits_in_uv)
1630 cdouble += (NV)aulong;
1643 #if U32SIZE > SIZE32
1646 SHIFT32(utf8, s, strend, &au32, datumtype);
1647 DO_BO_UNPACK(au32, 32);
1649 if (datumtype == 'N')
1650 au32 = PerlSock_ntohl(au32);
1652 if (datumtype == 'V')
1656 else if (checksum > bits_in_uv)
1657 cdouble += (NV)au32;
1662 case 'V' | TYPE_IS_SHRIEKING:
1663 case 'N' | TYPE_IS_SHRIEKING:
1666 #if U32SIZE > SIZE32
1669 SHIFT32(utf8, s, strend, &ai32, datumtype);
1671 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1672 ai32 = (I32)PerlSock_ntohl((U32)ai32);
1674 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1675 ai32 = (I32)vtohl((U32)ai32);
1678 else if (checksum > bits_in_uv)
1679 cdouble += (NV)ai32;
1687 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
1688 DO_BO_UNPACK_PC(aptr);
1689 /* newSVpv generates undef if aptr is NULL */
1690 mPUSHs(newSVpv(aptr, 0));
1698 while (len > 0 && s < strend) {
1700 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1701 auv = (auv << 7) | (ch & 0x7f);
1702 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
1710 if (++bytes >= sizeof(UV)) { /* promote to string */
1713 sv = Perl_newSVpvf(aTHX_ "%.*"UVuf, (int)TYPE_DIGITS(UV), auv);
1714 while (s < strend) {
1715 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1716 sv = mul128(sv, (U8)(ch & 0x7f));
1722 t = SvPV_nolen_const(sv);
1731 if ((s >= strend) && bytes)
1732 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
1736 if (symptr->howlen == e_star)
1737 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
1739 if (s + sizeof(char*) <= strend) {
1741 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
1742 DO_BO_UNPACK_PC(aptr);
1743 /* newSVpvn generates undef if aptr is NULL */
1744 PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP));
1751 SHIFT_VAR(utf8, s, strend, aquad, datumtype);
1752 DO_BO_UNPACK(aquad, 64);
1754 mPUSHs(aquad >= IV_MIN && aquad <= IV_MAX ?
1755 newSViv((IV)aquad) : newSVnv((NV)aquad));
1756 else if (checksum > bits_in_uv)
1757 cdouble += (NV)aquad;
1765 SHIFT_VAR(utf8, s, strend, auquad, datumtype);
1766 DO_BO_UNPACK(auquad, 64);
1768 mPUSHs(auquad <= UV_MAX ?
1769 newSVuv((UV)auquad) : newSVnv((NV)auquad));
1770 else if (checksum > bits_in_uv)
1771 cdouble += (NV)auquad;
1776 #endif /* HAS_QUAD */
1777 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
1781 SHIFT_VAR(utf8, s, strend, afloat, datumtype);
1782 DO_BO_UNPACK_N(afloat, float);
1792 SHIFT_VAR(utf8, s, strend, adouble, datumtype);
1793 DO_BO_UNPACK_N(adouble, double);
1803 SHIFT_BYTES(utf8, s, strend, anv.bytes, sizeof(anv.bytes), datumtype);
1804 DO_BO_UNPACK_N(anv.nv, NV);
1811 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
1815 SHIFT_BYTES(utf8, s, strend, aldouble.bytes, sizeof(aldouble.bytes), datumtype);
1816 DO_BO_UNPACK_N(aldouble.ld, long double);
1818 mPUSHn(aldouble.ld);
1820 cdouble += aldouble.ld;
1826 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
1827 sv = sv_2mortal(newSV(l));
1828 if (l) SvPOK_on(sv);
1831 while (next_uni_uu(aTHX_ &s, strend, &len)) {
1836 next_uni_uu(aTHX_ &s, strend, &a);
1837 next_uni_uu(aTHX_ &s, strend, &b);
1838 next_uni_uu(aTHX_ &s, strend, &c);
1839 next_uni_uu(aTHX_ &s, strend, &d);
1840 hunk[0] = (char)((a << 2) | (b >> 4));
1841 hunk[1] = (char)((b << 4) | (c >> 2));
1842 hunk[2] = (char)((c << 6) | d);
1844 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
1852 /* possible checksum byte */
1853 const char *skip = s+UTF8SKIP(s);
1854 if (skip < strend && *skip == '\n')
1860 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
1864 len = PL_uudmap[*(U8*)s++] & 077;
1866 if (s < strend && ISUUCHAR(*s))
1867 a = PL_uudmap[*(U8*)s++] & 077;
1870 if (s < strend && ISUUCHAR(*s))
1871 b = PL_uudmap[*(U8*)s++] & 077;
1874 if (s < strend && ISUUCHAR(*s))
1875 c = PL_uudmap[*(U8*)s++] & 077;
1878 if (s < strend && ISUUCHAR(*s))
1879 d = PL_uudmap[*(U8*)s++] & 077;
1882 hunk[0] = (char)((a << 2) | (b >> 4));
1883 hunk[1] = (char)((b << 4) | (c >> 2));
1884 hunk[2] = (char)((c << 6) | d);
1886 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
1891 else /* possible checksum byte */
1892 if (s + 1 < strend && s[1] == '\n')
1902 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
1903 (checksum > bits_in_uv &&
1904 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
1907 anv = (NV) (1 << (checksum & 15));
1908 while (checksum >= 16) {
1912 while (cdouble < 0.0)
1914 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
1915 sv = newSVnv(cdouble);
1918 if (checksum < bits_in_uv) {
1919 UV mask = ((UV)1 << checksum) - 1;
1928 if (symptr->flags & FLAG_SLASH){
1929 if (SP - PL_stack_base - start_sp_offset <= 0)
1931 if( next_symbol(symptr) ){
1932 if( symptr->howlen == e_number )
1933 Perl_croak(aTHX_ "Count after length/code in unpack" );
1935 /* ...end of char buffer then no decent length available */
1936 Perl_croak(aTHX_ "length/code after end of string in unpack" );
1938 /* take top of stack (hope it's numeric) */
1941 Perl_croak(aTHX_ "Negative '/' count in unpack" );
1944 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
1946 datumtype = symptr->code;
1947 explicit_length = FALSE;
1955 return SP - PL_stack_base - start_sp_offset;
1963 I32 gimme = GIMME_V;
1966 const char *pat = SvPV_const(left, llen);
1967 const char *s = SvPV_const(right, rlen);
1968 const char *strend = s + rlen;
1969 const char *patend = pat + llen;
1973 cnt = unpackstring(pat, patend, s, strend,
1974 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
1975 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
1978 if ( !cnt && gimme == G_SCALAR )
1979 PUSHs(&PL_sv_undef);
1984 doencodes(U8 *h, const char *s, I32 len)
1986 *h++ = PL_uuemap[len];
1988 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1989 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
1990 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
1991 *h++ = PL_uuemap[(077 & (s[2] & 077))];
1996 const char r = (len > 1 ? s[1] : '\0');
1997 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1998 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
1999 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
2000 *h++ = PL_uuemap[0];
2007 S_is_an_int(pTHX_ const char *s, STRLEN l)
2009 SV *result = newSVpvn(s, l);
2010 char *const result_c = SvPV_nolen(result); /* convenience */
2011 char *out = result_c;
2015 PERL_ARGS_ASSERT_IS_AN_INT;
2023 SvREFCNT_dec(result);
2046 SvREFCNT_dec(result);
2052 SvCUR_set(result, out - result_c);
2056 /* pnum must be '\0' terminated */
2058 S_div128(pTHX_ SV *pnum, bool *done)
2061 char * const s = SvPV(pnum, len);
2065 PERL_ARGS_ASSERT_DIV128;
2069 const int i = m * 10 + (*t - '0');
2070 const int r = (i >> 7); /* r < 10 */
2078 SvCUR_set(pnum, (STRLEN) (t - s));
2083 =for apidoc packlist
2085 The engine implementing pack() Perl function.
2091 Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, SV **beglist, SV **endlist )
2096 PERL_ARGS_ASSERT_PACKLIST;
2098 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
2100 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
2101 Also make sure any UTF8 flag is loaded */
2102 SvPV_force_nolen(cat);
2104 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
2106 (void)pack_rec( cat, &sym, beglist, endlist );
2109 /* like sv_utf8_upgrade, but also repoint the group start markers */
2111 marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2114 const char *from_ptr, *from_start, *from_end, **marks, **m;
2115 char *to_start, *to_ptr;
2117 if (SvUTF8(sv)) return;
2119 from_start = SvPVX_const(sv);
2120 from_end = from_start + SvCUR(sv);
2121 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2122 if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
2123 if (from_ptr == from_end) {
2124 /* Simple case: no character needs to be changed */
2129 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
2130 Newx(to_start, len, char);
2131 Copy(from_start, to_start, from_ptr-from_start, char);
2132 to_ptr = to_start + (from_ptr-from_start);
2134 Newx(marks, sym_ptr->level+2, const char *);
2135 for (group=sym_ptr; group; group = group->previous)
2136 marks[group->level] = from_start + group->strbeg;
2137 marks[sym_ptr->level+1] = from_end+1;
2138 for (m = marks; *m < from_ptr; m++)
2139 *m = to_start + (*m-from_start);
2141 for (;from_ptr < from_end; from_ptr++) {
2142 while (*m == from_ptr) *m++ = to_ptr;
2143 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
2147 while (*m == from_ptr) *m++ = to_ptr;
2148 if (m != marks + sym_ptr->level+1) {
2151 Perl_croak(aTHX_ "panic: marks beyond string end, m=%p, marks=%p, "
2152 "level=%d", m, marks, sym_ptr->level);
2154 for (group=sym_ptr; group; group = group->previous)
2155 group->strbeg = marks[group->level] - to_start;
2160 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
2161 from_start -= SvIVX(sv);
2164 SvFLAGS(sv) &= ~SVf_OOK;
2167 Safefree(from_start);
2168 SvPV_set(sv, to_start);
2169 SvCUR_set(sv, to_ptr - to_start);
2174 /* Exponential string grower. Makes string extension effectively O(n)
2175 needed says how many extra bytes we need (not counting the final '\0')
2176 Only grows the string if there is an actual lack of space
2179 S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
2180 const STRLEN cur = SvCUR(sv);
2181 const STRLEN len = SvLEN(sv);
2184 PERL_ARGS_ASSERT_SV_EXP_GROW;
2186 if (len - cur > needed) return SvPVX(sv);
2187 extend = needed > len ? needed : len;
2188 return SvGROW(sv, len+extend+1);
2193 S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
2196 tempsym_t lookahead;
2197 I32 items = endlist - beglist;
2198 bool found = next_symbol(symptr);
2199 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
2200 bool warn_utf8 = ckWARN(WARN_UTF8);
2202 PERL_ARGS_ASSERT_PACK_REC;
2204 if (symptr->level == 0 && found && symptr->code == 'U') {
2205 marked_upgrade(aTHX_ cat, symptr);
2206 symptr->flags |= FLAG_DO_UTF8;
2209 symptr->strbeg = SvCUR(cat);
2215 SV *lengthcode = NULL;
2216 I32 datumtype = symptr->code;
2217 howlen_t howlen = symptr->howlen;
2218 char *start = SvPVX(cat);
2219 char *cur = start + SvCUR(cat);
2221 #define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2225 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2229 /* e_no_len and e_number */
2230 len = symptr->length;
2235 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
2237 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2238 /* We can process this letter. */
2239 STRLEN size = props & PACK_SIZE_MASK;
2240 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2244 /* Look ahead for next symbol. Do we have code/code? */
2245 lookahead = *symptr;
2246 found = next_symbol(&lookahead);
2247 if (symptr->flags & FLAG_SLASH) {
2249 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
2250 if (strchr("aAZ", lookahead.code)) {
2251 if (lookahead.howlen == e_number) count = lookahead.length;
2254 count = sv_len_utf8(*beglist);
2257 if (lookahead.code == 'Z') count++;
2260 if (lookahead.howlen == e_number && lookahead.length < items)
2261 count = lookahead.length;
2264 lookahead.howlen = e_number;
2265 lookahead.length = count;
2266 lengthcode = sv_2mortal(newSViv(count));
2269 /* Code inside the switch must take care to properly update
2270 cat (CUR length and '\0' termination) if it updated *cur and
2271 doesn't simply leave using break */
2272 switch(TYPE_NO_ENDIANNESS(datumtype)) {
2274 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2275 (int) TYPE_NO_MODIFIERS(datumtype));
2277 Perl_croak(aTHX_ "'%%' may not be used in pack");
2280 case '.' | TYPE_IS_SHRIEKING:
2282 if (howlen == e_star) from = start;
2283 else if (len == 0) from = cur;
2285 tempsym_t *group = symptr;
2287 while (--len && group) group = group->previous;
2288 from = group ? start + group->strbeg : start;
2291 len = SvIV(fromstr);
2293 case '@' | TYPE_IS_SHRIEKING:
2295 from = start + symptr->strbeg;
2297 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
2299 while (len && from < cur) {
2300 from += UTF8SKIP(from);
2304 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2306 /* Here we know from == cur */
2308 GROWING(0, cat, start, cur, len);
2309 Zero(cur, len, char);
2311 } else if (from < cur) {
2314 } else goto no_change;
2322 if (len > 0) goto grow;
2323 if (len == 0) goto no_change;
2330 tempsym_t savsym = *symptr;
2331 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2332 symptr->flags |= group_modifiers;
2333 symptr->patend = savsym.grpend;
2335 symptr->previous = &lookahead;
2338 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2339 else symptr->flags &= ~FLAG_PARSE_UTF8;
2340 was_utf8 = SvUTF8(cat);
2341 symptr->patptr = savsym.grpbeg;
2342 beglist = pack_rec(cat, symptr, beglist, endlist);
2343 if (SvUTF8(cat) != was_utf8)
2344 /* This had better be an upgrade while in utf8==0 mode */
2347 if (savsym.howlen == e_star && beglist == endlist)
2348 break; /* No way to continue */
2350 items = endlist - beglist;
2351 lookahead.flags = symptr->flags & ~group_modifiers;
2354 case 'X' | TYPE_IS_SHRIEKING:
2355 if (!len) /* Avoid division by 0 */
2362 hop += UTF8SKIP(hop);
2369 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2373 len = (cur-start) % len;
2377 if (len < 1) goto no_change;
2381 Perl_croak(aTHX_ "'%c' outside of string in pack",
2382 (int) TYPE_NO_MODIFIERS(datumtype));
2383 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2385 Perl_croak(aTHX_ "'%c' outside of string in pack",
2386 (int) TYPE_NO_MODIFIERS(datumtype));
2392 if (cur - start < len)
2393 Perl_croak(aTHX_ "'%c' outside of string in pack",
2394 (int) TYPE_NO_MODIFIERS(datumtype));
2397 if (cur < start+symptr->strbeg) {
2398 /* Make sure group starts don't point into the void */
2400 const STRLEN length = cur-start;
2401 for (group = symptr;
2402 group && length < group->strbeg;
2403 group = group->previous) group->strbeg = length;
2404 lookahead.strbeg = length;
2407 case 'x' | TYPE_IS_SHRIEKING: {
2409 if (!len) /* Avoid division by 0 */
2411 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
2412 else ai32 = (cur - start) % len;
2413 if (ai32 == 0) goto no_change;
2425 aptr = SvPV_const(fromstr, fromlen);
2426 if (DO_UTF8(fromstr)) {
2427 const char *end, *s;
2429 if (!utf8 && !SvUTF8(cat)) {
2430 marked_upgrade(aTHX_ cat, symptr);
2431 lookahead.flags |= FLAG_DO_UTF8;
2432 lookahead.strbeg = symptr->strbeg;
2435 cur = start + SvCUR(cat);
2437 if (howlen == e_star) {
2438 if (utf8) goto string_copy;
2442 end = aptr + fromlen;
2443 fromlen = datumtype == 'Z' ? len-1 : len;
2444 while ((I32) fromlen > 0 && s < end) {
2449 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2452 if (datumtype == 'Z') len++;
2458 fromlen = len - fromlen;
2459 if (datumtype == 'Z') fromlen--;
2460 if (howlen == e_star) {
2462 if (datumtype == 'Z') len++;
2464 GROWING(0, cat, start, cur, len);
2465 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
2466 datumtype | TYPE_IS_PACK))
2467 Perl_croak(aTHX_ "panic: predicted utf8 length not available, "
2468 "for '%c', aptr=%p end=%p cur=%p, fromlen=%"UVuf,
2469 (int)datumtype, aptr, end, cur, (UV)fromlen);
2473 if (howlen == e_star) {
2475 if (datumtype == 'Z') len++;
2477 if (len <= (I32) fromlen) {
2479 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2481 /* assumes a byte expands to at most UTF8_EXPAND bytes on
2483 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2484 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
2486 while (fromlen > 0) {
2487 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
2493 if (howlen == e_star) {
2495 if (datumtype == 'Z') len++;
2497 if (len <= (I32) fromlen) {
2499 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2501 GROWING(0, cat, start, cur, len);
2502 Copy(aptr, cur, fromlen, char);
2506 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2513 const char *str, *end;
2520 str = SvPV_const(fromstr, fromlen);
2521 end = str + fromlen;
2522 if (DO_UTF8(fromstr)) {
2524 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2526 utf8_source = FALSE;
2527 utf8_flags = 0; /* Unused, but keep compilers happy */
2529 if (howlen == e_star) len = fromlen;
2530 field_len = (len+7)/8;
2531 GROWING(utf8, cat, start, cur, field_len);
2532 if (len > (I32)fromlen) len = fromlen;
2535 if (datumtype == 'B')
2539 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2541 } else bits |= *str++ & 1;
2542 if (l & 7) bits <<= 1;
2544 PUSH_BYTE(utf8, cur, bits);
2549 /* datumtype == 'b' */
2553 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2554 if (val & 1) bits |= 0x80;
2555 } else if (*str++ & 1)
2557 if (l & 7) bits >>= 1;
2559 PUSH_BYTE(utf8, cur, bits);
2565 if (datumtype == 'B')
2566 bits <<= 7 - (l & 7);
2568 bits >>= 7 - (l & 7);
2569 PUSH_BYTE(utf8, cur, bits);
2572 /* Determine how many chars are left in the requested field */
2574 if (howlen == e_star) field_len = 0;
2575 else field_len -= l;
2576 Zero(cur, field_len, char);
2582 const char *str, *end;
2589 str = SvPV_const(fromstr, fromlen);
2590 end = str + fromlen;
2591 if (DO_UTF8(fromstr)) {
2593 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2595 utf8_source = FALSE;
2596 utf8_flags = 0; /* Unused, but keep compilers happy */
2598 if (howlen == e_star) len = fromlen;
2599 field_len = (len+1)/2;
2600 GROWING(utf8, cat, start, cur, field_len);
2601 if (!utf8 && len > (I32)fromlen) len = fromlen;
2604 if (datumtype == 'H')
2608 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2609 if (val < 256 && isALPHA(val))
2610 bits |= (val + 9) & 0xf;
2613 } else if (isALPHA(*str))
2614 bits |= (*str++ + 9) & 0xf;
2616 bits |= *str++ & 0xf;
2617 if (l & 1) bits <<= 4;
2619 PUSH_BYTE(utf8, cur, bits);
2627 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2628 if (val < 256 && isALPHA(val))
2629 bits |= ((val + 9) & 0xf) << 4;
2631 bits |= (val & 0xf) << 4;
2632 } else if (isALPHA(*str))
2633 bits |= ((*str++ + 9) & 0xf) << 4;
2635 bits |= (*str++ & 0xf) << 4;
2636 if (l & 1) bits >>= 4;
2638 PUSH_BYTE(utf8, cur, bits);
2644 PUSH_BYTE(utf8, cur, bits);
2647 /* Determine how many chars are left in the requested field */
2649 if (howlen == e_star) field_len = 0;
2650 else field_len -= l;
2651 Zero(cur, field_len, char);
2659 aiv = SvIV(fromstr);
2660 if ((-128 > aiv || aiv > 127))
2661 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2662 "Character in 'c' format wrapped in pack");
2663 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
2668 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2674 aiv = SvIV(fromstr);
2675 if ((0 > aiv || aiv > 0xff))
2676 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2677 "Character in 'C' format wrapped in pack");
2678 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
2683 U8 in_bytes = (U8)IN_BYTES;
2685 end = start+SvLEN(cat)-1;
2686 if (utf8) end -= UTF8_MAXLEN-1;
2690 auv = SvUV(fromstr);
2691 if (in_bytes) auv = auv % 0x100;
2696 SvCUR_set(cat, cur - start);
2698 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2699 end = start+SvLEN(cat)-UTF8_MAXLEN;
2701 cur = (char *) uvuni_to_utf8_flags((U8 *) cur,
2704 0 : UNICODE_ALLOW_ANY);
2709 SvCUR_set(cat, cur - start);
2710 marked_upgrade(aTHX_ cat, symptr);
2711 lookahead.flags |= FLAG_DO_UTF8;
2712 lookahead.strbeg = symptr->strbeg;
2715 cur = start + SvCUR(cat);
2716 end = start+SvLEN(cat)-UTF8_MAXLEN;
2719 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2720 "Character in 'W' format wrapped in pack");
2725 SvCUR_set(cat, cur - start);
2726 GROWING(0, cat, start, cur, len+1);
2727 end = start+SvLEN(cat)-1;
2729 *(U8 *) cur++ = (U8)auv;
2738 if (!(symptr->flags & FLAG_DO_UTF8)) {
2739 marked_upgrade(aTHX_ cat, symptr);
2740 lookahead.flags |= FLAG_DO_UTF8;
2741 lookahead.strbeg = symptr->strbeg;
2747 end = start+SvLEN(cat);
2748 if (!utf8) end -= UTF8_MAXLEN;
2752 auv = SvUV(fromstr);
2754 U8 buffer[UTF8_MAXLEN], *endb;
2755 endb = uvuni_to_utf8_flags(buffer, auv,
2757 0 : UNICODE_ALLOW_ANY);
2758 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
2760 SvCUR_set(cat, cur - start);
2761 GROWING(0, cat, start, cur,
2762 len+(endb-buffer)*UTF8_EXPAND);
2763 end = start+SvLEN(cat);
2765 cur = bytes_to_uni(buffer, endb-buffer, cur);
2769 SvCUR_set(cat, cur - start);
2770 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2771 end = start+SvLEN(cat)-UTF8_MAXLEN;
2773 cur = (char *) uvuni_to_utf8_flags((U8 *) cur, auv,
2775 0 : UNICODE_ALLOW_ANY);
2780 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
2786 anv = SvNV(fromstr);
2787 # if defined(VMS) && !defined(_IEEE_FP)
2788 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
2789 * on Alpha; fake it if we don't have them.
2793 else if (anv < -FLT_MAX)
2795 else afloat = (float)anv;
2797 afloat = (float)anv;
2799 DO_BO_PACK_N(afloat, float);
2800 PUSH_VAR(utf8, cur, afloat);
2808 anv = SvNV(fromstr);
2809 # if defined(VMS) && !defined(_IEEE_FP)
2810 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
2811 * on Alpha; fake it if we don't have them.
2815 else if (anv < -DBL_MAX)
2817 else adouble = (double)anv;
2819 adouble = (double)anv;
2821 DO_BO_PACK_N(adouble, double);
2822 PUSH_VAR(utf8, cur, adouble);
2827 Zero(&anv, 1, NV); /* can be long double with unused bits */
2831 /* to work round a gcc/x86 bug; don't use SvNV */
2832 anv.nv = sv_2nv(fromstr);
2834 anv.nv = SvNV(fromstr);
2836 DO_BO_PACK_N(anv, NV);
2837 PUSH_BYTES(utf8, cur, anv.bytes, sizeof(anv.bytes));
2841 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2844 /* long doubles can have unused bits, which may be nonzero */
2845 Zero(&aldouble, 1, long double);
2849 /* to work round a gcc/x86 bug; don't use SvNV */
2850 aldouble.ld = (long double)sv_2nv(fromstr);
2852 aldouble.ld = (long double)SvNV(fromstr);
2854 DO_BO_PACK_N(aldouble, long double);
2855 PUSH_BYTES(utf8, cur, aldouble.bytes, sizeof(aldouble.bytes));
2860 case 'n' | TYPE_IS_SHRIEKING:
2865 ai16 = (I16)SvIV(fromstr);
2867 ai16 = PerlSock_htons(ai16);
2869 PUSH16(utf8, cur, &ai16);
2872 case 'v' | TYPE_IS_SHRIEKING:
2877 ai16 = (I16)SvIV(fromstr);
2879 PUSH16(utf8, cur, &ai16);
2882 case 'S' | TYPE_IS_SHRIEKING:
2883 #if SHORTSIZE != SIZE16
2885 unsigned short aushort;
2887 aushort = SvUV(fromstr);
2888 DO_BO_PACK(aushort, s);
2889 PUSH_VAR(utf8, cur, aushort);
2899 au16 = (U16)SvUV(fromstr);
2900 DO_BO_PACK(au16, 16);
2901 PUSH16(utf8, cur, &au16);
2904 case 's' | TYPE_IS_SHRIEKING:
2905 #if SHORTSIZE != SIZE16
2909 ashort = SvIV(fromstr);
2910 DO_BO_PACK(ashort, s);
2911 PUSH_VAR(utf8, cur, ashort);
2921 ai16 = (I16)SvIV(fromstr);
2922 DO_BO_PACK(ai16, 16);
2923 PUSH16(utf8, cur, &ai16);
2927 case 'I' | TYPE_IS_SHRIEKING:
2931 auint = SvUV(fromstr);
2932 DO_BO_PACK(auint, i);
2933 PUSH_VAR(utf8, cur, auint);
2940 aiv = SvIV(fromstr);
2941 #if IVSIZE == INTSIZE
2943 #elif IVSIZE == LONGSIZE
2945 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
2946 DO_BO_PACK(aiv, 64);
2948 Perl_croak(aTHX_ "'j' not supported on this platform");
2950 PUSH_VAR(utf8, cur, aiv);
2957 auv = SvUV(fromstr);
2958 #if UVSIZE == INTSIZE
2960 #elif UVSIZE == LONGSIZE
2962 #elif defined(HAS_QUAD) && UVSIZE == U64SIZE
2963 DO_BO_PACK(auv, 64);
2965 Perl_croak(aTHX_ "'J' not supported on this platform");
2967 PUSH_VAR(utf8, cur, auv);
2974 anv = SvNV(fromstr);
2978 SvCUR_set(cat, cur - start);
2979 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
2982 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
2983 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
2984 any negative IVs will have already been got by the croak()
2985 above. IOK is untrue for fractions, so we test them
2986 against UV_MAX_P1. */
2987 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
2988 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
2989 char *in = buf + sizeof(buf);
2990 UV auv = SvUV(fromstr);
2993 *--in = (char)((auv & 0x7f) | 0x80);
2996 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
2997 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2998 in, (buf + sizeof(buf)) - in);
2999 } else if (SvPOKp(fromstr))
3001 else if (SvNOKp(fromstr)) {
3002 /* 10**NV_MAX_10_EXP is the largest power of 10
3003 so 10**(NV_MAX_10_EXP+1) is definitely unrepresentable
3004 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
3005 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
3006 And with that many bytes only Inf can overflow.
3007 Some C compilers are strict about integral constant
3008 expressions so we conservatively divide by a slightly
3009 smaller integer instead of multiplying by the exact
3010 floating-point value.
3012 #ifdef NV_MAX_10_EXP
3013 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
3014 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
3016 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
3017 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
3019 char *in = buf + sizeof(buf);
3021 anv = Perl_floor(anv);
3023 const NV next = Perl_floor(anv / 128);
3024 if (in <= buf) /* this cannot happen ;-) */
3025 Perl_croak(aTHX_ "Cannot compress integer in pack");
3026 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
3029 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
3030 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3031 in, (buf + sizeof(buf)) - in);
3040 /* Copy string and check for compliance */
3041 from = SvPV_const(fromstr, len);
3042 if ((norm = is_an_int(from, len)) == NULL)
3043 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
3045 Newx(result, len, char);
3048 while (!done) *--in = div128(norm, &done) | 0x80;
3049 result[len - 1] &= 0x7F; /* clear continue bit */
3050 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3051 in, (result + len) - in);
3053 SvREFCNT_dec(norm); /* free norm */
3058 case 'i' | TYPE_IS_SHRIEKING:
3062 aint = SvIV(fromstr);
3063 DO_BO_PACK(aint, i);
3064 PUSH_VAR(utf8, cur, aint);
3067 case 'N' | TYPE_IS_SHRIEKING:
3072 au32 = SvUV(fromstr);
3074 au32 = PerlSock_htonl(au32);
3076 PUSH32(utf8, cur, &au32);
3079 case 'V' | TYPE_IS_SHRIEKING:
3084 au32 = SvUV(fromstr);
3086 PUSH32(utf8, cur, &au32);
3089 case 'L' | TYPE_IS_SHRIEKING:
3090 #if LONGSIZE != SIZE32
3092 unsigned long aulong;
3094 aulong = SvUV(fromstr);
3095 DO_BO_PACK(aulong, l);
3096 PUSH_VAR(utf8, cur, aulong);
3106 au32 = SvUV(fromstr);
3107 DO_BO_PACK(au32, 32);
3108 PUSH32(utf8, cur, &au32);
3111 case 'l' | TYPE_IS_SHRIEKING:
3112 #if LONGSIZE != SIZE32
3116 along = SvIV(fromstr);
3117 DO_BO_PACK(along, l);
3118 PUSH_VAR(utf8, cur, along);
3128 ai32 = SvIV(fromstr);
3129 DO_BO_PACK(ai32, 32);
3130 PUSH32(utf8, cur, &ai32);
3138 auquad = (Uquad_t) SvUV(fromstr);
3139 DO_BO_PACK(auquad, 64);
3140 PUSH_VAR(utf8, cur, auquad);
3147 aquad = (Quad_t)SvIV(fromstr);
3148 DO_BO_PACK(aquad, 64);
3149 PUSH_VAR(utf8, cur, aquad);
3152 #endif /* HAS_QUAD */
3154 len = 1; /* assume SV is correct length */
3155 GROWING(utf8, cat, start, cur, sizeof(char *));
3162 SvGETMAGIC(fromstr);
3163 if (!SvOK(fromstr)) aptr = NULL;
3165 /* XXX better yet, could spirit away the string to
3166 * a safe spot and hang on to it until the result
3167 * of pack() (and all copies of the result) are
3170 if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
3171 !SvREADONLY(fromstr)))) {
3172 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3173 "Attempt to pack pointer to temporary value");
3175 if (SvPOK(fromstr) || SvNIOK(fromstr))
3176 aptr = SvPV_nomg_const_nolen(fromstr);
3178 aptr = SvPV_force_flags_nolen(fromstr, 0);
3180 DO_BO_PACK_PC(aptr);
3181 PUSH_VAR(utf8, cur, aptr);
3185 const char *aptr, *aend;
3189 if (len <= 2) len = 45;
3190 else len = len / 3 * 3;
3192 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3193 "Field too wide in 'u' format in pack");
3196 aptr = SvPV_const(fromstr, fromlen);
3197 from_utf8 = DO_UTF8(fromstr);
3199 aend = aptr + fromlen;
3200 fromlen = sv_len_utf8_nomg(fromstr);
3201 } else aend = NULL; /* Unused, but keep compilers happy */
3202 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
3203 while (fromlen > 0) {
3206 U8 hunk[1+63/3*4+1];
3208 if ((I32)fromlen > len)
3214 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3215 'u' | TYPE_IS_PACK)) {
3217 SvCUR_set(cat, cur - start);
3218 Perl_croak(aTHX_ "panic: string is shorter than advertised, "
3219 "aptr=%p, aend=%p, buffer=%p, todo=%ld",
3220 aptr, aend, buffer, (long) todo);
3222 end = doencodes(hunk, buffer, todo);
3224 end = doencodes(hunk, aptr, todo);
3227 PUSH_BYTES(utf8, cur, hunk, end-hunk);
3234 SvCUR_set(cat, cur - start);
3236 *symptr = lookahead;
3245 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
3248 SV *pat_sv = *++MARK;
3249 const char *pat = SvPV_const(pat_sv, fromlen);
3250 const char *patend = pat + fromlen;
3256 packlist(cat, pat, patend, MARK, SP + 1);
3266 * c-indentation-style: bsd
3268 * indent-tabs-mode: nil
3271 * ex: set ts=8 sts=4 sw=4 et: