3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
18 * [p.653 of _The Lord of the Rings_, IV/iv: "Of Herbs and Stewed Rabbit"]
21 /* This file contains pp ("push/pop") functions that
22 * execute the opcodes that make up a perl program. A typical pp function
23 * expects to find its arguments on the stack, and usually pushes its
24 * results onto the stack, hence the 'pp' terminology. Each OP structure
25 * contains a pointer to the relevant pp_foo() function.
27 * This particular file just contains pp_pack() and pp_unpack(). See the
28 * other pp*.c files for the rest of the pp_ functions.
32 #define PERL_IN_PP_PACK_C
35 /* Types used by pack/unpack */
37 e_no_len, /* no length */
38 e_number, /* number, [] */
42 typedef struct tempsym {
43 const char* patptr; /* current template char */
44 const char* patend; /* one after last char */
45 const char* grpbeg; /* 1st char of ()-group */
46 const char* grpend; /* end of ()-group */
47 I32 code; /* template code (!<>) */
48 I32 length; /* length/repeat count */
49 howlen_t howlen; /* how length is given */
50 int level; /* () nesting level */
51 U32 flags; /* /=4, comma=2, pack=1 */
52 /* and group modifiers */
53 STRLEN strbeg; /* offset of group start */
54 struct tempsym *previous; /* previous group */
57 #define TEMPSYM_INIT(symptr, p, e, f) \
59 (symptr)->patptr = (p); \
60 (symptr)->patend = (e); \
61 (symptr)->grpbeg = NULL; \
62 (symptr)->grpend = NULL; \
63 (symptr)->grpend = NULL; \
65 (symptr)->length = 0; \
66 (symptr)->howlen = e_no_len; \
67 (symptr)->level = 0; \
68 (symptr)->flags = (f); \
69 (symptr)->strbeg = 0; \
70 (symptr)->previous = NULL; \
78 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
81 U8 bytes[sizeof(long double)];
88 /* Maximum number of bytes to which a byte can grow due to upgrade */
92 * Offset for integer pack/unpack.
94 * On architectures where I16 and I32 aren't really 16 and 32 bits,
95 * which for now are all Crays, pack and unpack have to play games.
99 * These values are required for portability of pack() output.
100 * If they're not right on your machine, then pack() and unpack()
101 * wouldn't work right anyway; you'll need to apply the Cray hack.
102 * (I'd like to check them with #if, but you can't use sizeof() in
103 * the preprocessor.) --???
106 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
107 defines are now in config.h. --Andy Dougherty April 1998
112 /* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
115 #if U16SIZE > SIZE16 || U32SIZE > SIZE32
116 # if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
117 # define OFF16(p) ((char*)(p))
118 # define OFF32(p) ((char*)(p))
120 # if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
121 # define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
122 # define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
124 ++++ bad cray byte order
128 # define OFF16(p) ((char *) (p))
129 # define OFF32(p) ((char *) (p))
132 /* Only to be used inside a loop (see the break) */
133 #define SHIFT16(utf8, s, strend, p, datumtype) STMT_START { \
135 if (!uni_to_bytes(aTHX_ &(s), strend, OFF16(p), SIZE16, datumtype)) break; \
137 Copy(s, OFF16(p), SIZE16, char); \
142 /* Only to be used inside a loop (see the break) */
143 #define SHIFT32(utf8, s, strend, p, datumtype) STMT_START { \
145 if (!uni_to_bytes(aTHX_ &(s), strend, OFF32(p), SIZE32, datumtype)) break; \
147 Copy(s, OFF32(p), SIZE32, char); \
152 #define PUSH16(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF16(p), SIZE16)
153 #define PUSH32(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF32(p), SIZE32)
155 /* Only to be used inside a loop (see the break) */
156 #define SHIFT_BYTES(utf8, s, strend, buf, len, datumtype) \
159 if (!uni_to_bytes(aTHX_ &s, strend, \
160 (char *) (buf), len, datumtype)) break; \
162 Copy(s, (char *) (buf), len, char); \
167 #define SHIFT_VAR(utf8, s, strend, var, datumtype) \
168 SHIFT_BYTES(utf8, s, strend, &(var), sizeof(var), datumtype)
170 #define PUSH_VAR(utf8, aptr, var) \
171 PUSH_BYTES(utf8, aptr, &(var), sizeof(var))
173 /* Avoid stack overflow due to pathological templates. 100 should be plenty. */
174 #define MAX_SUB_TEMPLATE_LEVEL 100
176 /* flags (note that type modifiers can also be used as flags!) */
177 #define FLAG_WAS_UTF8 0x40
178 #define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
179 #define FLAG_UNPACK_ONLY_ONE 0x10
180 #define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
181 #define FLAG_SLASH 0x04
182 #define FLAG_COMMA 0x02
183 #define FLAG_PACK 0x01
186 S_mul128(pTHX_ SV *sv, U8 m)
189 char *s = SvPV(sv, len);
192 PERL_ARGS_ASSERT_MUL128;
194 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
195 SV * const tmpNew = newSVpvs("0000000000");
197 sv_catsv(tmpNew, sv);
198 SvREFCNT_dec(sv); /* free old sv */
203 while (!*t) /* trailing '\0'? */
206 const U32 i = ((*t - '0') << 7) + m;
207 *(t--) = '0' + (char)(i % 10);
213 /* Explosives and implosives. */
215 #if 'I' == 73 && 'J' == 74
216 /* On an ASCII/ISO kind of system */
217 #define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
220 Some other sort of character set - use memchr() so we don't match
223 #define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
227 #define TYPE_IS_SHRIEKING 0x100
228 #define TYPE_IS_BIG_ENDIAN 0x200
229 #define TYPE_IS_LITTLE_ENDIAN 0x400
230 #define TYPE_IS_PACK 0x800
231 #define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
232 #define TYPE_MODIFIERS(t) ((t) & ~0xFF)
233 #define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
235 # define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
236 # define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
238 # define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
240 # define DO_BO_UNPACK(var, type) \
242 switch (TYPE_ENDIANNESS(datumtype)) { \
243 case TYPE_IS_BIG_ENDIAN: var = my_betoh ## type (var); break; \
244 case TYPE_IS_LITTLE_ENDIAN: var = my_letoh ## type (var); break; \
249 # define DO_BO_PACK(var, type) \
251 switch (TYPE_ENDIANNESS(datumtype)) { \
252 case TYPE_IS_BIG_ENDIAN: var = my_htobe ## type (var); break; \
253 case TYPE_IS_LITTLE_ENDIAN: var = my_htole ## type (var); break; \
258 # define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast) \
260 switch (TYPE_ENDIANNESS(datumtype)) { \
261 case TYPE_IS_BIG_ENDIAN: \
262 var = (post_cast*) my_betoh ## type ((pre_cast) var); \
264 case TYPE_IS_LITTLE_ENDIAN: \
265 var = (post_cast *) my_letoh ## type ((pre_cast) var); \
272 # define DO_BO_PACK_PTR(var, type, pre_cast, post_cast) \
274 switch (TYPE_ENDIANNESS(datumtype)) { \
275 case TYPE_IS_BIG_ENDIAN: \
276 var = (post_cast *) my_htobe ## type ((pre_cast) var); \
278 case TYPE_IS_LITTLE_ENDIAN: \
279 var = (post_cast *) my_htole ## type ((pre_cast) var); \
286 # define BO_CANT_DOIT(action, type) \
288 switch (TYPE_ENDIANNESS(datumtype)) { \
289 case TYPE_IS_BIG_ENDIAN: \
290 Perl_croak(aTHX_ "Can't %s big-endian %ss on this " \
291 "platform", #action, #type); \
293 case TYPE_IS_LITTLE_ENDIAN: \
294 Perl_croak(aTHX_ "Can't %s little-endian %ss on this " \
295 "platform", #action, #type); \
302 # if PTRSIZE == INTSIZE
303 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, i, int, char)
304 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, i, int, char)
305 # elif PTRSIZE == LONGSIZE
306 # if LONGSIZE < IVSIZE && IVSIZE == 8
307 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, 64, IV, char)
308 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, 64, IV, char)
310 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, IV, char)
311 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, IV, char)
313 # elif PTRSIZE == IVSIZE
314 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, IV, char)
315 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, IV, char)
317 # define DO_BO_UNPACK_PC(var) BO_CANT_DOIT(unpack, pointer)
318 # define DO_BO_PACK_PC(var) BO_CANT_DOIT(pack, pointer)
321 # if defined(my_htolen) && defined(my_letohn) && \
322 defined(my_htoben) && defined(my_betohn)
323 # define DO_BO_UNPACK_N(var, type) \
325 switch (TYPE_ENDIANNESS(datumtype)) { \
326 case TYPE_IS_BIG_ENDIAN: my_betohn(&var, sizeof(type)); break;\
327 case TYPE_IS_LITTLE_ENDIAN: my_letohn(&var, sizeof(type)); break;\
332 # define DO_BO_PACK_N(var, type) \
334 switch (TYPE_ENDIANNESS(datumtype)) { \
335 case TYPE_IS_BIG_ENDIAN: my_htoben(&var, sizeof(type)); break;\
336 case TYPE_IS_LITTLE_ENDIAN: my_htolen(&var, sizeof(type)); break;\
341 # define DO_BO_UNPACK_N(var, type) BO_CANT_DOIT(unpack, type)
342 # define DO_BO_PACK_N(var, type) BO_CANT_DOIT(pack, type)
345 #define PACK_SIZE_CANNOT_CSUM 0x80
346 #define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
347 #define PACK_SIZE_MASK 0x3F
349 #include "packsizetables.c"
352 uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
355 UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
356 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
357 /* We try to process malformed UTF-8 as much as possible (preferably with
358 warnings), but these two mean we make no progress in the string and
359 might enter an infinite loop */
360 if (retlen == (STRLEN) -1 || retlen == 0)
361 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
362 (int) TYPE_NO_MODIFIERS(datumtype));
364 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
365 "Character in '%c' format wrapped in unpack",
366 (int) TYPE_NO_MODIFIERS(datumtype));
373 #define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
374 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
378 uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
382 const char *from = *s;
384 const U32 flags = ckWARN(WARN_UTF8) ?
385 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
386 for (;buf_len > 0; buf_len--) {
387 if (from >= end) return FALSE;
388 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
389 if (retlen == (STRLEN) -1 || retlen == 0) {
390 from += UTF8SKIP(from);
392 } else from += retlen;
397 *(U8 *)buf++ = (U8)val;
399 /* We have enough characters for the buffer. Did we have problems ? */
402 /* Rewalk the string fragment while warning */
404 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
405 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
406 if (ptr >= end) break;
407 utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
409 if (from > end) from = end;
412 Perl_ck_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
413 WARN_PACK : WARN_UNPACK),
414 "Character(s) in '%c' format wrapped in %s",
415 (int) TYPE_NO_MODIFIERS(datumtype),
416 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
423 next_uni_uu(pTHX_ const char **s, const char *end, I32 *out)
427 const UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
428 if (val >= 0x100 || !ISUUCHAR(val) ||
429 retlen == (STRLEN) -1 || retlen == 0) {
433 *out = PL_uudmap[val] & 077;
439 S_bytes_to_uni(const U8 *start, STRLEN len, char *dest) {
440 const U8 * const end = start + len;
442 PERL_ARGS_ASSERT_BYTES_TO_UNI;
444 while (start < end) {
445 const UV uv = NATIVE_TO_ASCII(*start);
446 if (UNI_IS_INVARIANT(uv))
447 *dest++ = (char)(U8)UTF_TO_NATIVE(uv);
449 *dest++ = (char)(U8)UTF8_EIGHT_BIT_HI(uv);
450 *dest++ = (char)(U8)UTF8_EIGHT_BIT_LO(uv);
457 #define PUSH_BYTES(utf8, cur, buf, len) \
460 (cur) = bytes_to_uni((U8 *) buf, len, (cur)); \
462 Copy(buf, cur, len, char); \
467 #define GROWING(utf8, cat, start, cur, in_len) \
469 STRLEN glen = (in_len); \
470 if (utf8) glen *= UTF8_EXPAND; \
471 if ((cur) + glen >= (start) + SvLEN(cat)) { \
472 (start) = sv_exp_grow(cat, glen); \
473 (cur) = (start) + SvCUR(cat); \
477 #define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
479 const STRLEN glen = (in_len); \
481 if (utf8) gl *= UTF8_EXPAND; \
482 if ((cur) + gl >= (start) + SvLEN(cat)) { \
484 SvCUR_set((cat), (cur) - (start)); \
485 (start) = sv_exp_grow(cat, gl); \
486 (cur) = (start) + SvCUR(cat); \
488 PUSH_BYTES(utf8, cur, buf, glen); \
491 #define PUSH_BYTE(utf8, s, byte) \
494 const U8 au8 = (byte); \
495 (s) = bytes_to_uni(&au8, 1, (s)); \
496 } else *(U8 *)(s)++ = (byte); \
499 /* Only to be used inside a loop (see the break) */
500 #define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
503 if (str >= end) break; \
504 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
505 if (retlen == (STRLEN) -1 || retlen == 0) { \
507 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
512 static const char *_action( const tempsym_t* symptr )
514 return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
517 /* Returns the sizeof() struct described by pat */
519 S_measure_struct(pTHX_ tempsym_t* symptr)
523 PERL_ARGS_ASSERT_MEASURE_STRUCT;
525 while (next_symbol(symptr)) {
529 switch (symptr->howlen) {
531 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
535 /* e_no_len and e_number */
536 len = symptr->length;
540 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
543 /* endianness doesn't influence the size of a type */
544 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
546 Perl_croak(aTHX_ "Invalid type '%c' in %s",
547 (int)TYPE_NO_MODIFIERS(symptr->code),
549 case '.' | TYPE_IS_SHRIEKING:
550 case '@' | TYPE_IS_SHRIEKING:
554 case 'U': /* XXXX Is it correct? */
557 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
558 (int) TYPE_NO_MODIFIERS(symptr->code),
565 tempsym_t savsym = *symptr;
566 symptr->patptr = savsym.grpbeg;
567 symptr->patend = savsym.grpend;
568 /* XXXX Theoretically, we need to measure many times at
569 different positions, since the subexpression may contain
570 alignment commands, but be not of aligned length.
571 Need to detect this and croak(). */
572 size = measure_struct(symptr);
576 case 'X' | TYPE_IS_SHRIEKING:
577 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
579 if (!len) /* Avoid division by 0 */
581 len = total % len; /* Assumed: the start is aligned. */
586 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
588 case 'x' | TYPE_IS_SHRIEKING:
589 if (!len) /* Avoid division by 0 */
591 star = total % len; /* Assumed: the start is aligned. */
592 if (star) /* Other portable ways? */
616 size = sizeof(char*);
626 /* locate matching closing parenthesis or bracket
627 * returns char pointer to char after match, or NULL
630 S_group_end(pTHX_ const char *patptr, const char *patend, char ender)
632 PERL_ARGS_ASSERT_GROUP_END;
634 while (patptr < patend) {
635 const char c = *patptr++;
642 while (patptr < patend && *patptr != '\n')
646 patptr = group_end(patptr, patend, ')') + 1;
648 patptr = group_end(patptr, patend, ']') + 1;
650 Perl_croak(aTHX_ "No group ending character '%c' found in template",
656 /* Convert unsigned decimal number to binary.
657 * Expects a pointer to the first digit and address of length variable
658 * Advances char pointer to 1st non-digit char and returns number
661 S_get_num(pTHX_ const char *patptr, I32 *lenptr )
663 I32 len = *patptr++ - '0';
665 PERL_ARGS_ASSERT_GET_NUM;
667 while (isDIGIT(*patptr)) {
668 if (len >= 0x7FFFFFFF/10)
669 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
670 len = (len * 10) + (*patptr++ - '0');
676 /* The marvellous template parsing routine: Using state stored in *symptr,
677 * locates next template code and count
680 S_next_symbol(pTHX_ tempsym_t* symptr )
682 const char* patptr = symptr->patptr;
683 const char* const patend = symptr->patend;
685 PERL_ARGS_ASSERT_NEXT_SYMBOL;
687 symptr->flags &= ~FLAG_SLASH;
689 while (patptr < patend) {
690 if (isSPACE(*patptr))
692 else if (*patptr == '#') {
694 while (patptr < patend && *patptr != '\n')
699 /* We should have found a template code */
700 I32 code = *patptr++ & 0xFF;
701 U32 inherited_modifiers = 0;
703 if (code == ','){ /* grandfather in commas but with a warning */
704 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
705 symptr->flags |= FLAG_COMMA;
706 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
707 "Invalid type ',' in %s", _action( symptr ) );
712 /* for '(', skip to ')' */
714 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
715 Perl_croak(aTHX_ "()-group starts with a count in %s",
717 symptr->grpbeg = patptr;
718 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
719 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
720 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
724 /* look for group modifiers to inherit */
725 if (TYPE_ENDIANNESS(symptr->flags)) {
726 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
727 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
730 /* look for modifiers */
731 while (patptr < patend) {
736 modifier = TYPE_IS_SHRIEKING;
737 allowed = "sSiIlLxXnNvV@.";
740 modifier = TYPE_IS_BIG_ENDIAN;
741 allowed = ENDIANNESS_ALLOWED_TYPES;
744 modifier = TYPE_IS_LITTLE_ENDIAN;
745 allowed = ENDIANNESS_ALLOWED_TYPES;
756 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
757 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
758 allowed, _action( symptr ) );
760 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
761 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
762 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
763 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
764 TYPE_ENDIANNESS_MASK)
765 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
766 *patptr, _action( symptr ) );
768 if ((code & modifier)) {
769 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
770 "Duplicate modifier '%c' after '%c' in %s",
771 *patptr, (int) TYPE_NO_MODIFIERS(code),
779 /* inherit modifiers */
780 code |= inherited_modifiers;
782 /* look for count and/or / */
783 if (patptr < patend) {
784 if (isDIGIT(*patptr)) {
785 patptr = get_num( patptr, &symptr->length );
786 symptr->howlen = e_number;
788 } else if (*patptr == '*') {
790 symptr->howlen = e_star;
792 } else if (*patptr == '[') {
793 const char* lenptr = ++patptr;
794 symptr->howlen = e_number;
795 patptr = group_end( patptr, patend, ']' ) + 1;
796 /* what kind of [] is it? */
797 if (isDIGIT(*lenptr)) {
798 lenptr = get_num( lenptr, &symptr->length );
800 Perl_croak(aTHX_ "Malformed integer in [] in %s",
803 tempsym_t savsym = *symptr;
804 symptr->patend = patptr-1;
805 symptr->patptr = lenptr;
806 savsym.length = measure_struct(symptr);
810 symptr->howlen = e_no_len;
815 while (patptr < patend) {
816 if (isSPACE(*patptr))
818 else if (*patptr == '#') {
820 while (patptr < patend && *patptr != '\n')
825 if (*patptr == '/') {
826 symptr->flags |= FLAG_SLASH;
828 if (patptr < patend &&
829 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
830 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
837 /* at end - no count, no / */
838 symptr->howlen = e_no_len;
843 symptr->patptr = patptr;
847 symptr->patptr = patptr;
852 There is no way to cleanly handle the case where we should process the
853 string per byte in its upgraded form while it's really in downgraded form
854 (e.g. estimates like strend-s as an upper bound for the number of
855 characters left wouldn't work). So if we foresee the need of this
856 (pattern starts with U or contains U0), we want to work on the encoded
857 version of the string. Users are advised to upgrade their pack string
858 themselves if they need to do a lot of unpacks like this on it
861 need_utf8(const char *pat, const char *patend)
865 PERL_ARGS_ASSERT_NEED_UTF8;
867 while (pat < patend) {
870 pat = (const char *) memchr(pat, '\n', patend-pat);
871 if (!pat) return FALSE;
872 } else if (pat[0] == 'U') {
873 if (first || pat[1] == '0') return TRUE;
874 } else first = FALSE;
881 first_symbol(const char *pat, const char *patend) {
882 PERL_ARGS_ASSERT_FIRST_SYMBOL;
884 while (pat < patend) {
885 if (pat[0] != '#') return pat[0];
887 pat = (const char *) memchr(pat, '\n', patend-pat);
895 =for apidoc unpackstring
897 The engine implementing the unpack() Perl function.
899 Using the template pat..patend, this function unpacks the string
900 s..strend into a number of mortal SVs, which it pushes onto the perl
901 argument (@_) stack (so you will need to issue a C<PUTBACK> before and
902 C<SPAGAIN> after the call to this function). It returns the number of
905 The strend and patend pointers should point to the byte following the last
906 character of each string.
908 Although this function returns its values on the perl argument stack, it
909 doesn't take any parameters from that stack (and thus in particular
910 there's no need to do a PUSHMARK before calling it, unlike L</call_pv> for
916 Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
920 PERL_ARGS_ASSERT_UNPACKSTRING;
922 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
923 else if (need_utf8(pat, patend)) {
924 /* We probably should try to avoid this in case a scalar context call
925 wouldn't get to the "U0" */
926 STRLEN len = strend - s;
927 s = (char *) bytes_to_utf8((U8 *) s, &len);
930 flags |= FLAG_DO_UTF8;
933 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
934 flags |= FLAG_PARSE_UTF8;
936 TEMPSYM_INIT(&sym, pat, patend, flags);
938 return unpack_rec(&sym, s, s, strend, NULL );
942 S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
946 const I32 start_sp_offset = SP - PL_stack_base;
951 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
953 bool explicit_length;
954 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
955 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
957 PERL_ARGS_ASSERT_UNPACK_REC;
959 symptr->strbeg = s - strbeg;
961 while (next_symbol(symptr)) {
964 I32 datumtype = symptr->code;
965 /* do first one only unless in list context
966 / is implemented by unpacking the count, then popping it from the
967 stack, so must check that we're not in the middle of a / */
969 && (SP - PL_stack_base == start_sp_offset + 1)
970 && (datumtype != '/') ) /* XXX can this be omitted */
973 switch (howlen = symptr->howlen) {
975 len = strend - strbeg; /* long enough */
978 /* e_no_len and e_number */
979 len = symptr->length;
983 explicit_length = TRUE;
985 beyond = s >= strend;
987 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
989 /* props nonzero means we can process this letter. */
990 const long size = props & PACK_SIZE_MASK;
991 const long howmany = (strend - s) / size;
995 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
996 if (len && unpack_only_one) len = 1;
1002 switch(TYPE_NO_ENDIANNESS(datumtype)) {
1004 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
1007 if (howlen == e_no_len)
1008 len = 16; /* len is not specified */
1016 tempsym_t savsym = *symptr;
1017 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
1018 symptr->flags |= group_modifiers;
1019 symptr->patend = savsym.grpend;
1020 symptr->previous = &savsym;
1023 if (len && unpack_only_one) len = 1;
1025 symptr->patptr = savsym.grpbeg;
1026 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
1027 else symptr->flags &= ~FLAG_PARSE_UTF8;
1028 unpack_rec(symptr, s, strbeg, strend, &s);
1029 if (s == strend && savsym.howlen == e_star)
1030 break; /* No way to continue */
1033 savsym.flags = symptr->flags & ~group_modifiers;
1037 case '.' | TYPE_IS_SHRIEKING:
1041 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
1042 if (howlen == e_star) from = strbeg;
1043 else if (len <= 0) from = s;
1045 tempsym_t *group = symptr;
1047 while (--len && group) group = group->previous;
1048 from = group ? strbeg + group->strbeg : strbeg;
1051 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
1052 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
1056 case '@' | TYPE_IS_SHRIEKING:
1058 s = strbeg + symptr->strbeg;
1059 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
1063 Perl_croak(aTHX_ "'@' outside of string in unpack");
1068 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
1071 Perl_croak(aTHX_ "'@' outside of string in unpack");
1075 case 'X' | TYPE_IS_SHRIEKING:
1076 if (!len) /* Avoid division by 0 */
1079 const char *hop, *last;
1081 hop = last = strbeg;
1083 hop += UTF8SKIP(hop);
1090 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1094 len = (s - strbeg) % len;
1100 Perl_croak(aTHX_ "'X' outside of string in unpack");
1101 while (--s, UTF8_IS_CONTINUATION(*s)) {
1103 Perl_croak(aTHX_ "'X' outside of string in unpack");
1108 if (len > s - strbeg)
1109 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1113 case 'x' | TYPE_IS_SHRIEKING: {
1115 if (!len) /* Avoid division by 0 */
1117 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1118 else ai32 = (s - strbeg) % len;
1119 if (ai32 == 0) break;
1127 Perl_croak(aTHX_ "'x' outside of string in unpack");
1132 if (len > strend - s)
1133 Perl_croak(aTHX_ "'x' outside of string in unpack");
1138 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1144 /* Preliminary length estimate is assumed done in 'W' */
1145 if (len > strend - s) len = strend - s;
1151 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1152 if (hop >= strend) {
1154 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1159 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1161 } else if (len > strend - s)
1164 if (datumtype == 'Z') {
1165 /* 'Z' strips stuff after first null */
1166 const char *ptr, *end;
1168 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
1169 sv = newSVpvn(s, ptr-s);
1170 if (howlen == e_star) /* exact for 'Z*' */
1171 len = ptr-s + (ptr != strend ? 1 : 0);
1172 } else if (datumtype == 'A') {
1173 /* 'A' strips both nulls and spaces */
1175 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1176 for (ptr = s+len-1; ptr >= s; ptr--)
1177 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
1178 !isSPACE_utf8(ptr)) break;
1179 if (ptr >= s) ptr += UTF8SKIP(ptr);
1182 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1184 for (ptr = s+len-1; ptr >= s; ptr--)
1185 if (*ptr != 0 && !isSPACE(*ptr)) break;
1188 sv = newSVpvn(s, ptr-s);
1189 } else sv = newSVpvn(s, len);
1193 /* Undo any upgrade done due to need_utf8() */
1194 if (!(symptr->flags & FLAG_WAS_UTF8))
1195 sv_utf8_downgrade(sv, 0);
1203 if (howlen == e_star || len > (strend - s) * 8)
1204 len = (strend - s) * 8;
1207 while (len >= 8 && s < strend) {
1208 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
1213 cuv += PL_bitcount[*(U8 *)s++];
1216 if (len && s < strend) {
1218 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1219 if (datumtype == 'b')
1221 if (bits & 1) cuv++;
1226 if (bits & 0x80) cuv++;
1233 sv = sv_2mortal(newSV(len ? len : 1));
1236 if (datumtype == 'b') {
1238 const I32 ai32 = len;
1239 for (len = 0; len < ai32; len++) {
1240 if (len & 7) bits >>= 1;
1242 if (s >= strend) break;
1243 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1244 } else bits = *(U8 *) s++;
1245 *str++ = bits & 1 ? '1' : '0';
1249 const I32 ai32 = len;
1250 for (len = 0; len < ai32; len++) {
1251 if (len & 7) bits <<= 1;
1253 if (s >= strend) break;
1254 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1255 } else bits = *(U8 *) s++;
1256 *str++ = bits & 0x80 ? '1' : '0';
1260 SvCUR_set(sv, str - SvPVX_const(sv));
1267 /* Preliminary length estimate, acceptable for utf8 too */
1268 if (howlen == e_star || len > (strend - s) * 2)
1269 len = (strend - s) * 2;
1271 sv = sv_2mortal(newSV(len ? len : 1));
1275 if (datumtype == 'h') {
1278 for (len = 0; len < ai32; len++) {
1279 if (len & 1) bits >>= 4;
1281 if (s >= strend) break;
1282 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1283 } else bits = * (U8 *) s++;
1285 *str++ = PL_hexdigit[bits & 15];
1289 const I32 ai32 = len;
1290 for (len = 0; len < ai32; len++) {
1291 if (len & 1) bits <<= 4;
1293 if (s >= strend) break;
1294 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1295 } else bits = *(U8 *) s++;
1297 *str++ = PL_hexdigit[(bits >> 4) & 15];
1302 SvCUR_set(sv, str - SvPVX_const(sv));
1309 if (explicit_length)
1310 /* Switch to "character" mode */
1311 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1316 while (len-- > 0 && s < strend) {
1321 aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1322 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1323 if (retlen == (STRLEN) -1 || retlen == 0)
1324 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1328 aint = *(U8 *)(s)++;
1329 if (aint >= 128 && datumtype != 'C') /* fake up signed chars */
1333 else if (checksum > bits_in_uv)
1334 cdouble += (NV)aint;
1342 while (len-- > 0 && s < strend) {
1344 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1345 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1346 if (retlen == (STRLEN) -1 || retlen == 0)
1347 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1351 else if (checksum > bits_in_uv)
1352 cdouble += (NV) val;
1356 } else if (!checksum)
1358 const U8 ch = *(U8 *) s++;
1361 else if (checksum > bits_in_uv)
1362 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1364 while (len-- > 0) cuv += *(U8 *) s++;
1368 if (explicit_length && howlen != e_star) {
1369 /* Switch to "bytes in UTF-8" mode */
1370 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
1372 /* Should be impossible due to the need_utf8() test */
1373 Perl_croak(aTHX_ "U0 mode on a byte string");
1377 if (len > strend - s) len = strend - s;
1379 if (len && unpack_only_one) len = 1;
1383 while (len-- > 0 && s < strend) {
1387 U8 result[UTF8_MAXLEN];
1388 const char *ptr = s;
1390 /* Bug: warns about bad utf8 even if we are short on bytes
1391 and will break out of the loop */
1392 if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1395 len = UTF8SKIP(result);
1396 if (!uni_to_bytes(aTHX_ &ptr, strend,
1397 (char *) &result[1], len-1, 'U')) break;
1398 auv = utf8n_to_uvuni(result, len, &retlen, UTF8_ALLOW_DEFAULT);
1401 auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, UTF8_ALLOW_DEFAULT);
1402 if (retlen == (STRLEN) -1 || retlen == 0)
1403 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1408 else if (checksum > bits_in_uv)
1409 cdouble += (NV) auv;
1414 case 's' | TYPE_IS_SHRIEKING:
1415 #if SHORTSIZE != SIZE16
1418 SHIFT_VAR(utf8, s, strend, ashort, datumtype);
1419 DO_BO_UNPACK(ashort, s);
1422 else if (checksum > bits_in_uv)
1423 cdouble += (NV)ashort;
1435 #if U16SIZE > SIZE16
1438 SHIFT16(utf8, s, strend, &ai16, datumtype);
1439 DO_BO_UNPACK(ai16, 16);
1440 #if U16SIZE > SIZE16
1446 else if (checksum > bits_in_uv)
1447 cdouble += (NV)ai16;
1452 case 'S' | TYPE_IS_SHRIEKING:
1453 #if SHORTSIZE != SIZE16
1455 unsigned short aushort;
1456 SHIFT_VAR(utf8, s, strend, aushort, datumtype);
1457 DO_BO_UNPACK(aushort, s);
1460 else if (checksum > bits_in_uv)
1461 cdouble += (NV)aushort;
1474 #if U16SIZE > SIZE16
1477 SHIFT16(utf8, s, strend, &au16, datumtype);
1478 DO_BO_UNPACK(au16, 16);
1479 if (datumtype == 'n')
1480 au16 = PerlSock_ntohs(au16);
1481 if (datumtype == 'v')
1485 else if (checksum > bits_in_uv)
1486 cdouble += (NV) au16;
1491 case 'v' | TYPE_IS_SHRIEKING:
1492 case 'n' | TYPE_IS_SHRIEKING:
1495 # if U16SIZE > SIZE16
1498 SHIFT16(utf8, s, strend, &ai16, datumtype);
1499 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
1500 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1501 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
1502 ai16 = (I16) vtohs((U16) ai16);
1505 else if (checksum > bits_in_uv)
1506 cdouble += (NV) ai16;
1512 case 'i' | TYPE_IS_SHRIEKING:
1515 SHIFT_VAR(utf8, s, strend, aint, datumtype);
1516 DO_BO_UNPACK(aint, i);
1519 else if (checksum > bits_in_uv)
1520 cdouble += (NV)aint;
1526 case 'I' | TYPE_IS_SHRIEKING:
1529 SHIFT_VAR(utf8, s, strend, auint, datumtype);
1530 DO_BO_UNPACK(auint, i);
1533 else if (checksum > bits_in_uv)
1534 cdouble += (NV)auint;
1542 SHIFT_VAR(utf8, s, strend, aiv, datumtype);
1543 #if IVSIZE == INTSIZE
1544 DO_BO_UNPACK(aiv, i);
1545 #elif IVSIZE == LONGSIZE
1546 DO_BO_UNPACK(aiv, l);
1547 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
1548 DO_BO_UNPACK(aiv, 64);
1550 Perl_croak(aTHX_ "'j' not supported on this platform");
1554 else if (checksum > bits_in_uv)
1563 SHIFT_VAR(utf8, s, strend, auv, datumtype);
1564 #if IVSIZE == INTSIZE
1565 DO_BO_UNPACK(auv, i);
1566 #elif IVSIZE == LONGSIZE
1567 DO_BO_UNPACK(auv, l);
1568 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
1569 DO_BO_UNPACK(auv, 64);
1571 Perl_croak(aTHX_ "'J' not supported on this platform");
1575 else if (checksum > bits_in_uv)
1581 case 'l' | TYPE_IS_SHRIEKING:
1582 #if LONGSIZE != SIZE32
1585 SHIFT_VAR(utf8, s, strend, along, datumtype);
1586 DO_BO_UNPACK(along, l);
1589 else if (checksum > bits_in_uv)
1590 cdouble += (NV)along;
1601 #if U32SIZE > SIZE32
1604 SHIFT32(utf8, s, strend, &ai32, datumtype);
1605 DO_BO_UNPACK(ai32, 32);
1606 #if U32SIZE > SIZE32
1607 if (ai32 > 2147483647) ai32 -= 4294967296;
1611 else if (checksum > bits_in_uv)
1612 cdouble += (NV)ai32;
1617 case 'L' | TYPE_IS_SHRIEKING:
1618 #if LONGSIZE != SIZE32
1620 unsigned long aulong;
1621 SHIFT_VAR(utf8, s, strend, aulong, datumtype);
1622 DO_BO_UNPACK(aulong, l);
1625 else if (checksum > bits_in_uv)
1626 cdouble += (NV)aulong;
1639 #if U32SIZE > SIZE32
1642 SHIFT32(utf8, s, strend, &au32, datumtype);
1643 DO_BO_UNPACK(au32, 32);
1644 if (datumtype == 'N')
1645 au32 = PerlSock_ntohl(au32);
1646 if (datumtype == 'V')
1650 else if (checksum > bits_in_uv)
1651 cdouble += (NV)au32;
1656 case 'V' | TYPE_IS_SHRIEKING:
1657 case 'N' | TYPE_IS_SHRIEKING:
1660 #if U32SIZE > SIZE32
1663 SHIFT32(utf8, s, strend, &ai32, datumtype);
1664 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1665 ai32 = (I32)PerlSock_ntohl((U32)ai32);
1666 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1667 ai32 = (I32)vtohl((U32)ai32);
1670 else if (checksum > bits_in_uv)
1671 cdouble += (NV)ai32;
1679 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
1680 DO_BO_UNPACK_PC(aptr);
1681 /* newSVpv generates undef if aptr is NULL */
1682 mPUSHs(newSVpv(aptr, 0));
1690 while (len > 0 && s < strend) {
1692 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1693 auv = (auv << 7) | (ch & 0x7f);
1694 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
1702 if (++bytes >= sizeof(UV)) { /* promote to string */
1705 sv = Perl_newSVpvf(aTHX_ "%.*"UVuf, (int)TYPE_DIGITS(UV), auv);
1706 while (s < strend) {
1707 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1708 sv = mul128(sv, (U8)(ch & 0x7f));
1714 t = SvPV_nolen_const(sv);
1723 if ((s >= strend) && bytes)
1724 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
1728 if (symptr->howlen == e_star)
1729 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
1731 if (s + sizeof(char*) <= strend) {
1733 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
1734 DO_BO_UNPACK_PC(aptr);
1735 /* newSVpvn generates undef if aptr is NULL */
1736 PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP));
1743 SHIFT_VAR(utf8, s, strend, aquad, datumtype);
1744 DO_BO_UNPACK(aquad, 64);
1746 mPUSHs(aquad >= IV_MIN && aquad <= IV_MAX ?
1747 newSViv((IV)aquad) : newSVnv((NV)aquad));
1748 else if (checksum > bits_in_uv)
1749 cdouble += (NV)aquad;
1757 SHIFT_VAR(utf8, s, strend, auquad, datumtype);
1758 DO_BO_UNPACK(auquad, 64);
1760 mPUSHs(auquad <= UV_MAX ?
1761 newSVuv((UV)auquad) : newSVnv((NV)auquad));
1762 else if (checksum > bits_in_uv)
1763 cdouble += (NV)auquad;
1768 #endif /* HAS_QUAD */
1769 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
1773 SHIFT_VAR(utf8, s, strend, afloat, datumtype);
1774 DO_BO_UNPACK_N(afloat, float);
1784 SHIFT_VAR(utf8, s, strend, adouble, datumtype);
1785 DO_BO_UNPACK_N(adouble, double);
1795 SHIFT_BYTES(utf8, s, strend, anv.bytes, sizeof(anv.bytes), datumtype);
1796 DO_BO_UNPACK_N(anv.nv, NV);
1803 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
1807 SHIFT_BYTES(utf8, s, strend, aldouble.bytes, sizeof(aldouble.bytes), datumtype);
1808 DO_BO_UNPACK_N(aldouble.ld, long double);
1810 mPUSHn(aldouble.ld);
1812 cdouble += aldouble.ld;
1818 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
1819 sv = sv_2mortal(newSV(l));
1820 if (l) SvPOK_on(sv);
1823 while (next_uni_uu(aTHX_ &s, strend, &len)) {
1828 next_uni_uu(aTHX_ &s, strend, &a);
1829 next_uni_uu(aTHX_ &s, strend, &b);
1830 next_uni_uu(aTHX_ &s, strend, &c);
1831 next_uni_uu(aTHX_ &s, strend, &d);
1832 hunk[0] = (char)((a << 2) | (b >> 4));
1833 hunk[1] = (char)((b << 4) | (c >> 2));
1834 hunk[2] = (char)((c << 6) | d);
1836 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
1844 /* possible checksum byte */
1845 const char *skip = s+UTF8SKIP(s);
1846 if (skip < strend && *skip == '\n')
1852 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
1856 len = PL_uudmap[*(U8*)s++] & 077;
1858 if (s < strend && ISUUCHAR(*s))
1859 a = PL_uudmap[*(U8*)s++] & 077;
1862 if (s < strend && ISUUCHAR(*s))
1863 b = PL_uudmap[*(U8*)s++] & 077;
1866 if (s < strend && ISUUCHAR(*s))
1867 c = PL_uudmap[*(U8*)s++] & 077;
1870 if (s < strend && ISUUCHAR(*s))
1871 d = PL_uudmap[*(U8*)s++] & 077;
1874 hunk[0] = (char)((a << 2) | (b >> 4));
1875 hunk[1] = (char)((b << 4) | (c >> 2));
1876 hunk[2] = (char)((c << 6) | d);
1878 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
1883 else /* possible checksum byte */
1884 if (s + 1 < strend && s[1] == '\n')
1894 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
1895 (checksum > bits_in_uv &&
1896 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
1899 anv = (NV) (1 << (checksum & 15));
1900 while (checksum >= 16) {
1904 while (cdouble < 0.0)
1906 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
1907 sv = newSVnv(cdouble);
1910 if (checksum < bits_in_uv) {
1911 UV mask = ((UV)1 << checksum) - 1;
1920 if (symptr->flags & FLAG_SLASH){
1921 if (SP - PL_stack_base - start_sp_offset <= 0)
1923 if( next_symbol(symptr) ){
1924 if( symptr->howlen == e_number )
1925 Perl_croak(aTHX_ "Count after length/code in unpack" );
1927 /* ...end of char buffer then no decent length available */
1928 Perl_croak(aTHX_ "length/code after end of string in unpack" );
1930 /* take top of stack (hope it's numeric) */
1933 Perl_croak(aTHX_ "Negative '/' count in unpack" );
1936 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
1938 datumtype = symptr->code;
1939 explicit_length = FALSE;
1947 return SP - PL_stack_base - start_sp_offset;
1955 I32 gimme = GIMME_V;
1958 const char *pat = SvPV_const(left, llen);
1959 const char *s = SvPV_const(right, rlen);
1960 const char *strend = s + rlen;
1961 const char *patend = pat + llen;
1965 cnt = unpackstring(pat, patend, s, strend,
1966 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
1967 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
1970 if ( !cnt && gimme == G_SCALAR )
1971 PUSHs(&PL_sv_undef);
1976 doencodes(U8 *h, const char *s, I32 len)
1978 *h++ = PL_uuemap[len];
1980 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1981 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
1982 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
1983 *h++ = PL_uuemap[(077 & (s[2] & 077))];
1988 const char r = (len > 1 ? s[1] : '\0');
1989 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1990 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
1991 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
1992 *h++ = PL_uuemap[0];
1999 S_is_an_int(pTHX_ const char *s, STRLEN l)
2001 SV *result = newSVpvn(s, l);
2002 char *const result_c = SvPV_nolen(result); /* convenience */
2003 char *out = result_c;
2007 PERL_ARGS_ASSERT_IS_AN_INT;
2015 SvREFCNT_dec(result);
2038 SvREFCNT_dec(result);
2044 SvCUR_set(result, out - result_c);
2048 /* pnum must be '\0' terminated */
2050 S_div128(pTHX_ SV *pnum, bool *done)
2053 char * const s = SvPV(pnum, len);
2057 PERL_ARGS_ASSERT_DIV128;
2061 const int i = m * 10 + (*t - '0');
2062 const int r = (i >> 7); /* r < 10 */
2070 SvCUR_set(pnum, (STRLEN) (t - s));
2075 =for apidoc packlist
2077 The engine implementing pack() Perl function.
2083 Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, SV **beglist, SV **endlist )
2088 PERL_ARGS_ASSERT_PACKLIST;
2090 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
2092 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
2093 Also make sure any UTF8 flag is loaded */
2094 SvPV_force_nolen(cat);
2096 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
2098 (void)pack_rec( cat, &sym, beglist, endlist );
2101 /* like sv_utf8_upgrade, but also repoint the group start markers */
2103 marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2106 const char *from_ptr, *from_start, *from_end, **marks, **m;
2107 char *to_start, *to_ptr;
2109 if (SvUTF8(sv)) return;
2111 from_start = SvPVX_const(sv);
2112 from_end = from_start + SvCUR(sv);
2113 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2114 if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
2115 if (from_ptr == from_end) {
2116 /* Simple case: no character needs to be changed */
2121 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
2122 Newx(to_start, len, char);
2123 Copy(from_start, to_start, from_ptr-from_start, char);
2124 to_ptr = to_start + (from_ptr-from_start);
2126 Newx(marks, sym_ptr->level+2, const char *);
2127 for (group=sym_ptr; group; group = group->previous)
2128 marks[group->level] = from_start + group->strbeg;
2129 marks[sym_ptr->level+1] = from_end+1;
2130 for (m = marks; *m < from_ptr; m++)
2131 *m = to_start + (*m-from_start);
2133 for (;from_ptr < from_end; from_ptr++) {
2134 while (*m == from_ptr) *m++ = to_ptr;
2135 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
2139 while (*m == from_ptr) *m++ = to_ptr;
2140 if (m != marks + sym_ptr->level+1) {
2143 Perl_croak(aTHX_ "panic: marks beyond string end, m=%p, marks=%p, "
2144 "level=%d", m, marks, sym_ptr->level);
2146 for (group=sym_ptr; group; group = group->previous)
2147 group->strbeg = marks[group->level] - to_start;
2152 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
2153 from_start -= SvIVX(sv);
2156 SvFLAGS(sv) &= ~SVf_OOK;
2159 Safefree(from_start);
2160 SvPV_set(sv, to_start);
2161 SvCUR_set(sv, to_ptr - to_start);
2166 /* Exponential string grower. Makes string extension effectively O(n)
2167 needed says how many extra bytes we need (not counting the final '\0')
2168 Only grows the string if there is an actual lack of space
2171 S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
2172 const STRLEN cur = SvCUR(sv);
2173 const STRLEN len = SvLEN(sv);
2176 PERL_ARGS_ASSERT_SV_EXP_GROW;
2178 if (len - cur > needed) return SvPVX(sv);
2179 extend = needed > len ? needed : len;
2180 return SvGROW(sv, len+extend+1);
2185 S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
2188 tempsym_t lookahead;
2189 I32 items = endlist - beglist;
2190 bool found = next_symbol(symptr);
2191 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
2192 bool warn_utf8 = ckWARN(WARN_UTF8);
2194 PERL_ARGS_ASSERT_PACK_REC;
2196 if (symptr->level == 0 && found && symptr->code == 'U') {
2197 marked_upgrade(aTHX_ cat, symptr);
2198 symptr->flags |= FLAG_DO_UTF8;
2201 symptr->strbeg = SvCUR(cat);
2207 SV *lengthcode = NULL;
2208 I32 datumtype = symptr->code;
2209 howlen_t howlen = symptr->howlen;
2210 char *start = SvPVX(cat);
2211 char *cur = start + SvCUR(cat);
2213 #define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2217 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2221 /* e_no_len and e_number */
2222 len = symptr->length;
2227 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
2229 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2230 /* We can process this letter. */
2231 STRLEN size = props & PACK_SIZE_MASK;
2232 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2236 /* Look ahead for next symbol. Do we have code/code? */
2237 lookahead = *symptr;
2238 found = next_symbol(&lookahead);
2239 if (symptr->flags & FLAG_SLASH) {
2241 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
2242 if (strchr("aAZ", lookahead.code)) {
2243 if (lookahead.howlen == e_number) count = lookahead.length;
2246 count = sv_len_utf8(*beglist);
2249 if (lookahead.code == 'Z') count++;
2252 if (lookahead.howlen == e_number && lookahead.length < items)
2253 count = lookahead.length;
2256 lookahead.howlen = e_number;
2257 lookahead.length = count;
2258 lengthcode = sv_2mortal(newSViv(count));
2261 /* Code inside the switch must take care to properly update
2262 cat (CUR length and '\0' termination) if it updated *cur and
2263 doesn't simply leave using break */
2264 switch(TYPE_NO_ENDIANNESS(datumtype)) {
2266 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2267 (int) TYPE_NO_MODIFIERS(datumtype));
2269 Perl_croak(aTHX_ "'%%' may not be used in pack");
2272 case '.' | TYPE_IS_SHRIEKING:
2274 if (howlen == e_star) from = start;
2275 else if (len == 0) from = cur;
2277 tempsym_t *group = symptr;
2279 while (--len && group) group = group->previous;
2280 from = group ? start + group->strbeg : start;
2283 len = SvIV(fromstr);
2285 case '@' | TYPE_IS_SHRIEKING:
2287 from = start + symptr->strbeg;
2289 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
2291 while (len && from < cur) {
2292 from += UTF8SKIP(from);
2296 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2298 /* Here we know from == cur */
2300 GROWING(0, cat, start, cur, len);
2301 Zero(cur, len, char);
2303 } else if (from < cur) {
2306 } else goto no_change;
2314 if (len > 0) goto grow;
2315 if (len == 0) goto no_change;
2322 tempsym_t savsym = *symptr;
2323 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2324 symptr->flags |= group_modifiers;
2325 symptr->patend = savsym.grpend;
2327 symptr->previous = &lookahead;
2330 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2331 else symptr->flags &= ~FLAG_PARSE_UTF8;
2332 was_utf8 = SvUTF8(cat);
2333 symptr->patptr = savsym.grpbeg;
2334 beglist = pack_rec(cat, symptr, beglist, endlist);
2335 if (SvUTF8(cat) != was_utf8)
2336 /* This had better be an upgrade while in utf8==0 mode */
2339 if (savsym.howlen == e_star && beglist == endlist)
2340 break; /* No way to continue */
2342 items = endlist - beglist;
2343 lookahead.flags = symptr->flags & ~group_modifiers;
2346 case 'X' | TYPE_IS_SHRIEKING:
2347 if (!len) /* Avoid division by 0 */
2354 hop += UTF8SKIP(hop);
2361 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2365 len = (cur-start) % len;
2369 if (len < 1) goto no_change;
2373 Perl_croak(aTHX_ "'%c' outside of string in pack",
2374 (int) TYPE_NO_MODIFIERS(datumtype));
2375 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2377 Perl_croak(aTHX_ "'%c' outside of string in pack",
2378 (int) TYPE_NO_MODIFIERS(datumtype));
2384 if (cur - start < len)
2385 Perl_croak(aTHX_ "'%c' outside of string in pack",
2386 (int) TYPE_NO_MODIFIERS(datumtype));
2389 if (cur < start+symptr->strbeg) {
2390 /* Make sure group starts don't point into the void */
2392 const STRLEN length = cur-start;
2393 for (group = symptr;
2394 group && length < group->strbeg;
2395 group = group->previous) group->strbeg = length;
2396 lookahead.strbeg = length;
2399 case 'x' | TYPE_IS_SHRIEKING: {
2401 if (!len) /* Avoid division by 0 */
2403 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
2404 else ai32 = (cur - start) % len;
2405 if (ai32 == 0) goto no_change;
2417 aptr = SvPV_const(fromstr, fromlen);
2418 if (DO_UTF8(fromstr)) {
2419 const char *end, *s;
2421 if (!utf8 && !SvUTF8(cat)) {
2422 marked_upgrade(aTHX_ cat, symptr);
2423 lookahead.flags |= FLAG_DO_UTF8;
2424 lookahead.strbeg = symptr->strbeg;
2427 cur = start + SvCUR(cat);
2429 if (howlen == e_star) {
2430 if (utf8) goto string_copy;
2434 end = aptr + fromlen;
2435 fromlen = datumtype == 'Z' ? len-1 : len;
2436 while ((I32) fromlen > 0 && s < end) {
2441 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2444 if (datumtype == 'Z') len++;
2450 fromlen = len - fromlen;
2451 if (datumtype == 'Z') fromlen--;
2452 if (howlen == e_star) {
2454 if (datumtype == 'Z') len++;
2456 GROWING(0, cat, start, cur, len);
2457 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
2458 datumtype | TYPE_IS_PACK))
2459 Perl_croak(aTHX_ "panic: predicted utf8 length not available, "
2460 "for '%c', aptr=%p end=%p cur=%p, fromlen=%"UVuf,
2461 (int)datumtype, aptr, end, cur, (UV)fromlen);
2465 if (howlen == e_star) {
2467 if (datumtype == 'Z') len++;
2469 if (len <= (I32) fromlen) {
2471 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2473 /* assumes a byte expands to at most UTF8_EXPAND bytes on
2475 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2476 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
2478 while (fromlen > 0) {
2479 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
2485 if (howlen == e_star) {
2487 if (datumtype == 'Z') len++;
2489 if (len <= (I32) fromlen) {
2491 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2493 GROWING(0, cat, start, cur, len);
2494 Copy(aptr, cur, fromlen, char);
2498 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2505 const char *str, *end;
2512 str = SvPV_const(fromstr, fromlen);
2513 end = str + fromlen;
2514 if (DO_UTF8(fromstr)) {
2516 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2518 utf8_source = FALSE;
2519 utf8_flags = 0; /* Unused, but keep compilers happy */
2521 if (howlen == e_star) len = fromlen;
2522 field_len = (len+7)/8;
2523 GROWING(utf8, cat, start, cur, field_len);
2524 if (len > (I32)fromlen) len = fromlen;
2527 if (datumtype == 'B')
2531 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2533 } else bits |= *str++ & 1;
2534 if (l & 7) bits <<= 1;
2536 PUSH_BYTE(utf8, cur, bits);
2541 /* datumtype == 'b' */
2545 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2546 if (val & 1) bits |= 0x80;
2547 } else if (*str++ & 1)
2549 if (l & 7) bits >>= 1;
2551 PUSH_BYTE(utf8, cur, bits);
2557 if (datumtype == 'B')
2558 bits <<= 7 - (l & 7);
2560 bits >>= 7 - (l & 7);
2561 PUSH_BYTE(utf8, cur, bits);
2564 /* Determine how many chars are left in the requested field */
2566 if (howlen == e_star) field_len = 0;
2567 else field_len -= l;
2568 Zero(cur, field_len, char);
2574 const char *str, *end;
2581 str = SvPV_const(fromstr, fromlen);
2582 end = str + fromlen;
2583 if (DO_UTF8(fromstr)) {
2585 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2587 utf8_source = FALSE;
2588 utf8_flags = 0; /* Unused, but keep compilers happy */
2590 if (howlen == e_star) len = fromlen;
2591 field_len = (len+1)/2;
2592 GROWING(utf8, cat, start, cur, field_len);
2593 if (!utf8 && len > (I32)fromlen) len = fromlen;
2596 if (datumtype == 'H')
2600 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2601 if (val < 256 && isALPHA(val))
2602 bits |= (val + 9) & 0xf;
2605 } else if (isALPHA(*str))
2606 bits |= (*str++ + 9) & 0xf;
2608 bits |= *str++ & 0xf;
2609 if (l & 1) bits <<= 4;
2611 PUSH_BYTE(utf8, cur, bits);
2619 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2620 if (val < 256 && isALPHA(val))
2621 bits |= ((val + 9) & 0xf) << 4;
2623 bits |= (val & 0xf) << 4;
2624 } else if (isALPHA(*str))
2625 bits |= ((*str++ + 9) & 0xf) << 4;
2627 bits |= (*str++ & 0xf) << 4;
2628 if (l & 1) bits >>= 4;
2630 PUSH_BYTE(utf8, cur, bits);
2636 PUSH_BYTE(utf8, cur, bits);
2639 /* Determine how many chars are left in the requested field */
2641 if (howlen == e_star) field_len = 0;
2642 else field_len -= l;
2643 Zero(cur, field_len, char);
2651 aiv = SvIV(fromstr);
2652 if ((-128 > aiv || aiv > 127))
2653 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2654 "Character in 'c' format wrapped in pack");
2655 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
2660 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2666 aiv = SvIV(fromstr);
2667 if ((0 > aiv || aiv > 0xff))
2668 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2669 "Character in 'C' format wrapped in pack");
2670 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
2675 U8 in_bytes = (U8)IN_BYTES;
2677 end = start+SvLEN(cat)-1;
2678 if (utf8) end -= UTF8_MAXLEN-1;
2682 auv = SvUV(fromstr);
2683 if (in_bytes) auv = auv % 0x100;
2688 SvCUR_set(cat, cur - start);
2690 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2691 end = start+SvLEN(cat)-UTF8_MAXLEN;
2693 cur = (char *) uvuni_to_utf8_flags((U8 *) cur,
2696 0 : UNICODE_ALLOW_ANY);
2701 SvCUR_set(cat, cur - start);
2702 marked_upgrade(aTHX_ cat, symptr);
2703 lookahead.flags |= FLAG_DO_UTF8;
2704 lookahead.strbeg = symptr->strbeg;
2707 cur = start + SvCUR(cat);
2708 end = start+SvLEN(cat)-UTF8_MAXLEN;
2711 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2712 "Character in 'W' format wrapped in pack");
2717 SvCUR_set(cat, cur - start);
2718 GROWING(0, cat, start, cur, len+1);
2719 end = start+SvLEN(cat)-1;
2721 *(U8 *) cur++ = (U8)auv;
2730 if (!(symptr->flags & FLAG_DO_UTF8)) {
2731 marked_upgrade(aTHX_ cat, symptr);
2732 lookahead.flags |= FLAG_DO_UTF8;
2733 lookahead.strbeg = symptr->strbeg;
2739 end = start+SvLEN(cat);
2740 if (!utf8) end -= UTF8_MAXLEN;
2744 auv = SvUV(fromstr);
2746 U8 buffer[UTF8_MAXLEN], *endb;
2747 endb = uvuni_to_utf8_flags(buffer, auv,
2749 0 : UNICODE_ALLOW_ANY);
2750 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
2752 SvCUR_set(cat, cur - start);
2753 GROWING(0, cat, start, cur,
2754 len+(endb-buffer)*UTF8_EXPAND);
2755 end = start+SvLEN(cat);
2757 cur = bytes_to_uni(buffer, endb-buffer, cur);
2761 SvCUR_set(cat, cur - start);
2762 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2763 end = start+SvLEN(cat)-UTF8_MAXLEN;
2765 cur = (char *) uvuni_to_utf8_flags((U8 *) cur, auv,
2767 0 : UNICODE_ALLOW_ANY);
2772 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
2778 anv = SvNV(fromstr);
2779 # if defined(VMS) && !defined(_IEEE_FP)
2780 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
2781 * on Alpha; fake it if we don't have them.
2785 else if (anv < -FLT_MAX)
2787 else afloat = (float)anv;
2789 afloat = (float)anv;
2791 DO_BO_PACK_N(afloat, float);
2792 PUSH_VAR(utf8, cur, afloat);
2800 anv = SvNV(fromstr);
2801 # if defined(VMS) && !defined(_IEEE_FP)
2802 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
2803 * on Alpha; fake it if we don't have them.
2807 else if (anv < -DBL_MAX)
2809 else adouble = (double)anv;
2811 adouble = (double)anv;
2813 DO_BO_PACK_N(adouble, double);
2814 PUSH_VAR(utf8, cur, adouble);
2819 Zero(&anv, 1, NV); /* can be long double with unused bits */
2823 /* to work round a gcc/x86 bug; don't use SvNV */
2824 anv.nv = sv_2nv(fromstr);
2826 anv.nv = SvNV(fromstr);
2828 DO_BO_PACK_N(anv, NV);
2829 PUSH_BYTES(utf8, cur, anv.bytes, sizeof(anv.bytes));
2833 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2836 /* long doubles can have unused bits, which may be nonzero */
2837 Zero(&aldouble, 1, long double);
2841 /* to work round a gcc/x86 bug; don't use SvNV */
2842 aldouble.ld = (long double)sv_2nv(fromstr);
2844 aldouble.ld = (long double)SvNV(fromstr);
2846 DO_BO_PACK_N(aldouble, long double);
2847 PUSH_BYTES(utf8, cur, aldouble.bytes, sizeof(aldouble.bytes));
2852 case 'n' | TYPE_IS_SHRIEKING:
2857 ai16 = (I16)SvIV(fromstr);
2858 ai16 = PerlSock_htons(ai16);
2859 PUSH16(utf8, cur, &ai16);
2862 case 'v' | TYPE_IS_SHRIEKING:
2867 ai16 = (I16)SvIV(fromstr);
2869 PUSH16(utf8, cur, &ai16);
2872 case 'S' | TYPE_IS_SHRIEKING:
2873 #if SHORTSIZE != SIZE16
2875 unsigned short aushort;
2877 aushort = SvUV(fromstr);
2878 DO_BO_PACK(aushort, s);
2879 PUSH_VAR(utf8, cur, aushort);
2889 au16 = (U16)SvUV(fromstr);
2890 DO_BO_PACK(au16, 16);
2891 PUSH16(utf8, cur, &au16);
2894 case 's' | TYPE_IS_SHRIEKING:
2895 #if SHORTSIZE != SIZE16
2899 ashort = SvIV(fromstr);
2900 DO_BO_PACK(ashort, s);
2901 PUSH_VAR(utf8, cur, ashort);
2911 ai16 = (I16)SvIV(fromstr);
2912 DO_BO_PACK(ai16, 16);
2913 PUSH16(utf8, cur, &ai16);
2917 case 'I' | TYPE_IS_SHRIEKING:
2921 auint = SvUV(fromstr);
2922 DO_BO_PACK(auint, i);
2923 PUSH_VAR(utf8, cur, auint);
2930 aiv = SvIV(fromstr);
2931 #if IVSIZE == INTSIZE
2933 #elif IVSIZE == LONGSIZE
2935 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
2936 DO_BO_PACK(aiv, 64);
2938 Perl_croak(aTHX_ "'j' not supported on this platform");
2940 PUSH_VAR(utf8, cur, aiv);
2947 auv = SvUV(fromstr);
2948 #if UVSIZE == INTSIZE
2950 #elif UVSIZE == LONGSIZE
2952 #elif defined(HAS_QUAD) && UVSIZE == U64SIZE
2953 DO_BO_PACK(auv, 64);
2955 Perl_croak(aTHX_ "'J' not supported on this platform");
2957 PUSH_VAR(utf8, cur, auv);
2964 anv = SvNV(fromstr);
2968 SvCUR_set(cat, cur - start);
2969 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
2972 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
2973 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
2974 any negative IVs will have already been got by the croak()
2975 above. IOK is untrue for fractions, so we test them
2976 against UV_MAX_P1. */
2977 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
2978 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
2979 char *in = buf + sizeof(buf);
2980 UV auv = SvUV(fromstr);
2983 *--in = (char)((auv & 0x7f) | 0x80);
2986 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
2987 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2988 in, (buf + sizeof(buf)) - in);
2989 } else if (SvPOKp(fromstr))
2991 else if (SvNOKp(fromstr)) {
2992 /* 10**NV_MAX_10_EXP is the largest power of 10
2993 so 10**(NV_MAX_10_EXP+1) is definitely unrepresentable
2994 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
2995 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
2996 And with that many bytes only Inf can overflow.
2997 Some C compilers are strict about integral constant
2998 expressions so we conservatively divide by a slightly
2999 smaller integer instead of multiplying by the exact
3000 floating-point value.
3002 #ifdef NV_MAX_10_EXP
3003 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
3004 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
3006 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
3007 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
3009 char *in = buf + sizeof(buf);
3011 anv = Perl_floor(anv);
3013 const NV next = Perl_floor(anv / 128);
3014 if (in <= buf) /* this cannot happen ;-) */
3015 Perl_croak(aTHX_ "Cannot compress integer in pack");
3016 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
3019 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
3020 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3021 in, (buf + sizeof(buf)) - in);
3030 /* Copy string and check for compliance */
3031 from = SvPV_const(fromstr, len);
3032 if ((norm = is_an_int(from, len)) == NULL)
3033 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
3035 Newx(result, len, char);
3038 while (!done) *--in = div128(norm, &done) | 0x80;
3039 result[len - 1] &= 0x7F; /* clear continue bit */
3040 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3041 in, (result + len) - in);
3043 SvREFCNT_dec(norm); /* free norm */
3048 case 'i' | TYPE_IS_SHRIEKING:
3052 aint = SvIV(fromstr);
3053 DO_BO_PACK(aint, i);
3054 PUSH_VAR(utf8, cur, aint);
3057 case 'N' | TYPE_IS_SHRIEKING:
3062 au32 = SvUV(fromstr);
3063 au32 = PerlSock_htonl(au32);
3064 PUSH32(utf8, cur, &au32);
3067 case 'V' | TYPE_IS_SHRIEKING:
3072 au32 = SvUV(fromstr);
3074 PUSH32(utf8, cur, &au32);
3077 case 'L' | TYPE_IS_SHRIEKING:
3078 #if LONGSIZE != SIZE32
3080 unsigned long aulong;
3082 aulong = SvUV(fromstr);
3083 DO_BO_PACK(aulong, l);
3084 PUSH_VAR(utf8, cur, aulong);
3094 au32 = SvUV(fromstr);
3095 DO_BO_PACK(au32, 32);
3096 PUSH32(utf8, cur, &au32);
3099 case 'l' | TYPE_IS_SHRIEKING:
3100 #if LONGSIZE != SIZE32
3104 along = SvIV(fromstr);
3105 DO_BO_PACK(along, l);
3106 PUSH_VAR(utf8, cur, along);
3116 ai32 = SvIV(fromstr);
3117 DO_BO_PACK(ai32, 32);
3118 PUSH32(utf8, cur, &ai32);
3126 auquad = (Uquad_t) SvUV(fromstr);
3127 DO_BO_PACK(auquad, 64);
3128 PUSH_VAR(utf8, cur, auquad);
3135 aquad = (Quad_t)SvIV(fromstr);
3136 DO_BO_PACK(aquad, 64);
3137 PUSH_VAR(utf8, cur, aquad);
3140 #endif /* HAS_QUAD */
3142 len = 1; /* assume SV is correct length */
3143 GROWING(utf8, cat, start, cur, sizeof(char *));
3150 SvGETMAGIC(fromstr);
3151 if (!SvOK(fromstr)) aptr = NULL;
3153 /* XXX better yet, could spirit away the string to
3154 * a safe spot and hang on to it until the result
3155 * of pack() (and all copies of the result) are
3158 if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
3159 !SvREADONLY(fromstr)))) {
3160 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3161 "Attempt to pack pointer to temporary value");
3163 if (SvPOK(fromstr) || SvNIOK(fromstr))
3164 aptr = SvPV_nomg_const_nolen(fromstr);
3166 aptr = SvPV_force_flags_nolen(fromstr, 0);
3168 DO_BO_PACK_PC(aptr);
3169 PUSH_VAR(utf8, cur, aptr);
3173 const char *aptr, *aend;
3177 if (len <= 2) len = 45;
3178 else len = len / 3 * 3;
3180 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3181 "Field too wide in 'u' format in pack");
3184 aptr = SvPV_const(fromstr, fromlen);
3185 from_utf8 = DO_UTF8(fromstr);
3187 aend = aptr + fromlen;
3188 fromlen = sv_len_utf8_nomg(fromstr);
3189 } else aend = NULL; /* Unused, but keep compilers happy */
3190 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
3191 while (fromlen > 0) {
3194 U8 hunk[1+63/3*4+1];
3196 if ((I32)fromlen > len)
3202 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3203 'u' | TYPE_IS_PACK)) {
3205 SvCUR_set(cat, cur - start);
3206 Perl_croak(aTHX_ "panic: string is shorter than advertised, "
3207 "aptr=%p, aend=%p, buffer=%p, todo=%ld",
3208 aptr, aend, buffer, (long) todo);
3210 end = doencodes(hunk, buffer, todo);
3212 end = doencodes(hunk, aptr, todo);
3215 PUSH_BYTES(utf8, cur, hunk, end-hunk);
3222 SvCUR_set(cat, cur - start);
3224 *symptr = lookahead;
3233 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
3236 SV *pat_sv = *++MARK;
3237 const char *pat = SvPV_const(pat_sv, fromlen);
3238 const char *patend = pat + fromlen;
3244 packlist(cat, pat, patend, MARK, SP + 1);
3254 * c-indentation-style: bsd
3256 * indent-tabs-mode: nil
3259 * ex: set ts=8 sts=4 sw=4 et: