3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
18 * [p.653 of _The Lord of the Rings_, IV/iv: "Of Herbs and Stewed Rabbit"]
21 /* This file contains pp ("push/pop") functions that
22 * execute the opcodes that make up a perl program. A typical pp function
23 * expects to find its arguments on the stack, and usually pushes its
24 * results onto the stack, hence the 'pp' terminology. Each OP structure
25 * contains a pointer to the relevant pp_foo() function.
27 * This particular file just contains pp_pack() and pp_unpack(). See the
28 * other pp*.c files for the rest of the pp_ functions.
32 #define PERL_IN_PP_PACK_C
35 /* Types used by pack/unpack */
37 e_no_len, /* no length */
38 e_number, /* number, [] */
42 typedef struct tempsym {
43 const char* patptr; /* current template char */
44 const char* patend; /* one after last char */
45 const char* grpbeg; /* 1st char of ()-group */
46 const char* grpend; /* end of ()-group */
47 I32 code; /* template code (!<>) */
48 I32 length; /* length/repeat count */
49 howlen_t howlen; /* how length is given */
50 int level; /* () nesting level */
51 U32 flags; /* /=4, comma=2, pack=1 */
52 /* and group modifiers */
53 STRLEN strbeg; /* offset of group start */
54 struct tempsym *previous; /* previous group */
57 #define TEMPSYM_INIT(symptr, p, e, f) \
59 (symptr)->patptr = (p); \
60 (symptr)->patend = (e); \
61 (symptr)->grpbeg = NULL; \
62 (symptr)->grpend = NULL; \
63 (symptr)->grpend = NULL; \
65 (symptr)->length = 0; \
66 (symptr)->howlen = e_no_len; \
67 (symptr)->level = 0; \
68 (symptr)->flags = (f); \
69 (symptr)->strbeg = 0; \
70 (symptr)->previous = NULL; \
78 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
81 U8 bytes[sizeof(long double)];
88 /* Maximum number of bytes to which a byte can grow due to upgrade */
92 * Offset for integer pack/unpack.
94 * On architectures where I16 and I32 aren't really 16 and 32 bits,
95 * which for now are all Crays, pack and unpack have to play games.
99 * These values are required for portability of pack() output.
100 * If they're not right on your machine, then pack() and unpack()
101 * wouldn't work right anyway; you'll need to apply the Cray hack.
102 * (I'd like to check them with #if, but you can't use sizeof() in
103 * the preprocessor.) --???
106 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
107 defines are now in config.h. --Andy Dougherty April 1998
112 /* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
115 #if U16SIZE > SIZE16 || U32SIZE > SIZE32
116 # if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
117 # define OFF16(p) ((char*)(p))
118 # define OFF32(p) ((char*)(p))
120 # if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
121 # define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
122 # define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
124 ++++ bad cray byte order
128 # define OFF16(p) ((char *) (p))
129 # define OFF32(p) ((char *) (p))
132 #define PUSH16(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF16(p), SIZE16)
133 #define PUSH32(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF32(p), SIZE32)
135 #if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
136 # define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_LITTLE_ENDIAN)
137 #elif BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
138 # define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_BIG_ENDIAN)
140 # error "Unsupported byteorder"
141 /* Need to add code here to re-instate mixed endian support.
142 NEEDS_SWAP would need to hold a flag indicating which action to
143 take, and S_reverse_copy and the code in uni_to_bytes would need
144 logic adding to deal with any mixed-endian transformations needed.
148 /* Only to be used inside a loop (see the break) */
149 #define SHIFT_BYTES(utf8, s, strend, buf, len, datumtype, needs_swap) \
152 if (!uni_to_bytes(aTHX_ &s, strend, \
153 (char *) (buf), len, datumtype)) break; \
156 S_reverse_copy(s, (char *) (buf), len); \
158 Copy(s, (char *) (buf), len, char); \
163 #define SHIFT16(utf8, s, strend, p, datumtype, needs_swap) \
164 SHIFT_BYTES(utf8, s, strend, OFF16(p), SIZE16, datumtype, needs_swap)
166 #define SHIFT32(utf8, s, strend, p, datumtype, needs_swap) \
167 SHIFT_BYTES(utf8, s, strend, OFF32(p), SIZE32, datumtype, needs_swap)
169 #define SHIFT_VAR(utf8, s, strend, var, datumtype, needs_swap) \
170 SHIFT_BYTES(utf8, s, strend, &(var), sizeof(var), datumtype, needs_swap)
172 #define PUSH_VAR(utf8, aptr, var) \
173 PUSH_BYTES(utf8, aptr, &(var), sizeof(var))
175 /* Avoid stack overflow due to pathological templates. 100 should be plenty. */
176 #define MAX_SUB_TEMPLATE_LEVEL 100
178 /* flags (note that type modifiers can also be used as flags!) */
179 #define FLAG_WAS_UTF8 0x40
180 #define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
181 #define FLAG_UNPACK_ONLY_ONE 0x10
182 #define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
183 #define FLAG_SLASH 0x04
184 #define FLAG_COMMA 0x02
185 #define FLAG_PACK 0x01
188 S_mul128(pTHX_ SV *sv, U8 m)
191 char *s = SvPV(sv, len);
194 PERL_ARGS_ASSERT_MUL128;
196 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
197 SV * const tmpNew = newSVpvs("0000000000");
199 sv_catsv(tmpNew, sv);
200 SvREFCNT_dec(sv); /* free old sv */
205 while (!*t) /* trailing '\0'? */
208 const U32 i = ((*t - '0') << 7) + m;
209 *(t--) = '0' + (char)(i % 10);
215 /* Explosives and implosives. */
217 #if 'I' == 73 && 'J' == 74
218 /* On an ASCII/ISO kind of system */
219 #define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
222 Some other sort of character set - use memchr() so we don't match
225 #define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
229 #define TYPE_IS_SHRIEKING 0x100
230 #define TYPE_IS_BIG_ENDIAN 0x200
231 #define TYPE_IS_LITTLE_ENDIAN 0x400
232 #define TYPE_IS_PACK 0x800
233 #define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
234 #define TYPE_MODIFIERS(t) ((t) & ~0xFF)
235 #define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
237 # define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
238 # define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
240 # define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
242 # define DO_BO_PACK(var) \
245 my_swabn(&var, sizeof(var)); \
249 #define PACK_SIZE_CANNOT_CSUM 0x80
250 #define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
251 #define PACK_SIZE_MASK 0x3F
253 #include "packsizetables.c"
256 S_reverse_copy(const char *src, char *dest, STRLEN len)
264 uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
267 UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
268 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
269 /* We try to process malformed UTF-8 as much as possible (preferably with
270 warnings), but these two mean we make no progress in the string and
271 might enter an infinite loop */
272 if (retlen == (STRLEN) -1 || retlen == 0)
273 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
274 (int) TYPE_NO_MODIFIERS(datumtype));
276 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
277 "Character in '%c' format wrapped in unpack",
278 (int) TYPE_NO_MODIFIERS(datumtype));
285 #define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
286 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
290 uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
294 const char *from = *s;
296 const U32 flags = ckWARN(WARN_UTF8) ?
297 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
298 const bool needs_swap = NEEDS_SWAP(datumtype);
303 for (;buf_len > 0; buf_len--) {
304 if (from >= end) return FALSE;
305 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
306 if (retlen == (STRLEN) -1 || retlen == 0) {
307 from += UTF8SKIP(from);
309 } else from += retlen;
315 *(U8 *)--buf = (U8)val;
317 *(U8 *)buf++ = (U8)val;
319 /* We have enough characters for the buffer. Did we have problems ? */
322 /* Rewalk the string fragment while warning */
324 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
325 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
326 if (ptr >= end) break;
327 utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
329 if (from > end) from = end;
332 Perl_ck_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
333 WARN_PACK : WARN_UNPACK),
334 "Character(s) in '%c' format wrapped in %s",
335 (int) TYPE_NO_MODIFIERS(datumtype),
336 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
343 next_uni_uu(pTHX_ const char **s, const char *end, I32 *out)
347 const UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
348 if (val >= 0x100 || !ISUUCHAR(val) ||
349 retlen == (STRLEN) -1 || retlen == 0) {
353 *out = PL_uudmap[val] & 077;
359 S_bytes_to_uni(const U8 *start, STRLEN len, char *dest) {
360 const U8 * const end = start + len;
362 PERL_ARGS_ASSERT_BYTES_TO_UNI;
364 while (start < end) {
365 const UV uv = NATIVE_TO_ASCII(*start);
366 if (UNI_IS_INVARIANT(uv))
367 *dest++ = (char)(U8)UTF_TO_NATIVE(uv);
369 *dest++ = (char)(U8)UTF8_EIGHT_BIT_HI(uv);
370 *dest++ = (char)(U8)UTF8_EIGHT_BIT_LO(uv);
377 #define PUSH_BYTES(utf8, cur, buf, len) \
380 (cur) = bytes_to_uni((U8 *) buf, len, (cur)); \
382 Copy(buf, cur, len, char); \
387 #define GROWING(utf8, cat, start, cur, in_len) \
389 STRLEN glen = (in_len); \
390 if (utf8) glen *= UTF8_EXPAND; \
391 if ((cur) + glen >= (start) + SvLEN(cat)) { \
392 (start) = sv_exp_grow(cat, glen); \
393 (cur) = (start) + SvCUR(cat); \
397 #define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
399 const STRLEN glen = (in_len); \
401 if (utf8) gl *= UTF8_EXPAND; \
402 if ((cur) + gl >= (start) + SvLEN(cat)) { \
404 SvCUR_set((cat), (cur) - (start)); \
405 (start) = sv_exp_grow(cat, gl); \
406 (cur) = (start) + SvCUR(cat); \
408 PUSH_BYTES(utf8, cur, buf, glen); \
411 #define PUSH_BYTE(utf8, s, byte) \
414 const U8 au8 = (byte); \
415 (s) = bytes_to_uni(&au8, 1, (s)); \
416 } else *(U8 *)(s)++ = (byte); \
419 /* Only to be used inside a loop (see the break) */
420 #define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
423 if (str >= end) break; \
424 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
425 if (retlen == (STRLEN) -1 || retlen == 0) { \
427 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
432 static const char *_action( const tempsym_t* symptr )
434 return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
437 /* Returns the sizeof() struct described by pat */
439 S_measure_struct(pTHX_ tempsym_t* symptr)
443 PERL_ARGS_ASSERT_MEASURE_STRUCT;
445 while (next_symbol(symptr)) {
449 switch (symptr->howlen) {
451 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
455 /* e_no_len and e_number */
456 len = symptr->length;
460 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
463 /* endianness doesn't influence the size of a type */
464 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
466 Perl_croak(aTHX_ "Invalid type '%c' in %s",
467 (int)TYPE_NO_MODIFIERS(symptr->code),
469 case '.' | TYPE_IS_SHRIEKING:
470 case '@' | TYPE_IS_SHRIEKING:
474 case 'U': /* XXXX Is it correct? */
477 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
478 (int) TYPE_NO_MODIFIERS(symptr->code),
485 tempsym_t savsym = *symptr;
486 symptr->patptr = savsym.grpbeg;
487 symptr->patend = savsym.grpend;
488 /* XXXX Theoretically, we need to measure many times at
489 different positions, since the subexpression may contain
490 alignment commands, but be not of aligned length.
491 Need to detect this and croak(). */
492 size = measure_struct(symptr);
496 case 'X' | TYPE_IS_SHRIEKING:
497 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
499 if (!len) /* Avoid division by 0 */
501 len = total % len; /* Assumed: the start is aligned. */
506 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
508 case 'x' | TYPE_IS_SHRIEKING:
509 if (!len) /* Avoid division by 0 */
511 star = total % len; /* Assumed: the start is aligned. */
512 if (star) /* Other portable ways? */
536 size = sizeof(char*);
546 /* locate matching closing parenthesis or bracket
547 * returns char pointer to char after match, or NULL
550 S_group_end(pTHX_ const char *patptr, const char *patend, char ender)
552 PERL_ARGS_ASSERT_GROUP_END;
554 while (patptr < patend) {
555 const char c = *patptr++;
562 while (patptr < patend && *patptr != '\n')
566 patptr = group_end(patptr, patend, ')') + 1;
568 patptr = group_end(patptr, patend, ']') + 1;
570 Perl_croak(aTHX_ "No group ending character '%c' found in template",
576 /* Convert unsigned decimal number to binary.
577 * Expects a pointer to the first digit and address of length variable
578 * Advances char pointer to 1st non-digit char and returns number
581 S_get_num(pTHX_ const char *patptr, I32 *lenptr )
583 I32 len = *patptr++ - '0';
585 PERL_ARGS_ASSERT_GET_NUM;
587 while (isDIGIT(*patptr)) {
588 if (len >= 0x7FFFFFFF/10)
589 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
590 len = (len * 10) + (*patptr++ - '0');
596 /* The marvellous template parsing routine: Using state stored in *symptr,
597 * locates next template code and count
600 S_next_symbol(pTHX_ tempsym_t* symptr )
602 const char* patptr = symptr->patptr;
603 const char* const patend = symptr->patend;
605 PERL_ARGS_ASSERT_NEXT_SYMBOL;
607 symptr->flags &= ~FLAG_SLASH;
609 while (patptr < patend) {
610 if (isSPACE(*patptr))
612 else if (*patptr == '#') {
614 while (patptr < patend && *patptr != '\n')
619 /* We should have found a template code */
620 I32 code = *patptr++ & 0xFF;
621 U32 inherited_modifiers = 0;
623 if (code == ','){ /* grandfather in commas but with a warning */
624 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
625 symptr->flags |= FLAG_COMMA;
626 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
627 "Invalid type ',' in %s", _action( symptr ) );
632 /* for '(', skip to ')' */
634 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
635 Perl_croak(aTHX_ "()-group starts with a count in %s",
637 symptr->grpbeg = patptr;
638 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
639 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
640 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
644 /* look for group modifiers to inherit */
645 if (TYPE_ENDIANNESS(symptr->flags)) {
646 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
647 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
650 /* look for modifiers */
651 while (patptr < patend) {
656 modifier = TYPE_IS_SHRIEKING;
657 allowed = "sSiIlLxXnNvV@.";
660 modifier = TYPE_IS_BIG_ENDIAN;
661 allowed = ENDIANNESS_ALLOWED_TYPES;
664 modifier = TYPE_IS_LITTLE_ENDIAN;
665 allowed = ENDIANNESS_ALLOWED_TYPES;
676 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
677 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
678 allowed, _action( symptr ) );
680 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
681 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
682 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
683 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
684 TYPE_ENDIANNESS_MASK)
685 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
686 *patptr, _action( symptr ) );
688 if ((code & modifier)) {
689 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
690 "Duplicate modifier '%c' after '%c' in %s",
691 *patptr, (int) TYPE_NO_MODIFIERS(code),
699 /* inherit modifiers */
700 code |= inherited_modifiers;
702 /* look for count and/or / */
703 if (patptr < patend) {
704 if (isDIGIT(*patptr)) {
705 patptr = get_num( patptr, &symptr->length );
706 symptr->howlen = e_number;
708 } else if (*patptr == '*') {
710 symptr->howlen = e_star;
712 } else if (*patptr == '[') {
713 const char* lenptr = ++patptr;
714 symptr->howlen = e_number;
715 patptr = group_end( patptr, patend, ']' ) + 1;
716 /* what kind of [] is it? */
717 if (isDIGIT(*lenptr)) {
718 lenptr = get_num( lenptr, &symptr->length );
720 Perl_croak(aTHX_ "Malformed integer in [] in %s",
723 tempsym_t savsym = *symptr;
724 symptr->patend = patptr-1;
725 symptr->patptr = lenptr;
726 savsym.length = measure_struct(symptr);
730 symptr->howlen = e_no_len;
735 while (patptr < patend) {
736 if (isSPACE(*patptr))
738 else if (*patptr == '#') {
740 while (patptr < patend && *patptr != '\n')
745 if (*patptr == '/') {
746 symptr->flags |= FLAG_SLASH;
748 if (patptr < patend &&
749 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
750 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
757 /* at end - no count, no / */
758 symptr->howlen = e_no_len;
763 symptr->patptr = patptr;
767 symptr->patptr = patptr;
772 There is no way to cleanly handle the case where we should process the
773 string per byte in its upgraded form while it's really in downgraded form
774 (e.g. estimates like strend-s as an upper bound for the number of
775 characters left wouldn't work). So if we foresee the need of this
776 (pattern starts with U or contains U0), we want to work on the encoded
777 version of the string. Users are advised to upgrade their pack string
778 themselves if they need to do a lot of unpacks like this on it
781 need_utf8(const char *pat, const char *patend)
785 PERL_ARGS_ASSERT_NEED_UTF8;
787 while (pat < patend) {
790 pat = (const char *) memchr(pat, '\n', patend-pat);
791 if (!pat) return FALSE;
792 } else if (pat[0] == 'U') {
793 if (first || pat[1] == '0') return TRUE;
794 } else first = FALSE;
801 first_symbol(const char *pat, const char *patend) {
802 PERL_ARGS_ASSERT_FIRST_SYMBOL;
804 while (pat < patend) {
805 if (pat[0] != '#') return pat[0];
807 pat = (const char *) memchr(pat, '\n', patend-pat);
815 =for apidoc unpackstring
817 The engine implementing the unpack() Perl function.
819 Using the template pat..patend, this function unpacks the string
820 s..strend into a number of mortal SVs, which it pushes onto the perl
821 argument (@_) stack (so you will need to issue a C<PUTBACK> before and
822 C<SPAGAIN> after the call to this function). It returns the number of
825 The strend and patend pointers should point to the byte following the last
826 character of each string.
828 Although this function returns its values on the perl argument stack, it
829 doesn't take any parameters from that stack (and thus in particular
830 there's no need to do a PUSHMARK before calling it, unlike L</call_pv> for
836 Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
840 PERL_ARGS_ASSERT_UNPACKSTRING;
842 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
843 else if (need_utf8(pat, patend)) {
844 /* We probably should try to avoid this in case a scalar context call
845 wouldn't get to the "U0" */
846 STRLEN len = strend - s;
847 s = (char *) bytes_to_utf8((U8 *) s, &len);
850 flags |= FLAG_DO_UTF8;
853 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
854 flags |= FLAG_PARSE_UTF8;
856 TEMPSYM_INIT(&sym, pat, patend, flags);
858 return unpack_rec(&sym, s, s, strend, NULL );
862 S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
866 const I32 start_sp_offset = SP - PL_stack_base;
871 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
873 bool explicit_length;
874 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
875 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
877 PERL_ARGS_ASSERT_UNPACK_REC;
879 symptr->strbeg = s - strbeg;
881 while (next_symbol(symptr)) {
884 I32 datumtype = symptr->code;
886 /* do first one only unless in list context
887 / is implemented by unpacking the count, then popping it from the
888 stack, so must check that we're not in the middle of a / */
890 && (SP - PL_stack_base == start_sp_offset + 1)
891 && (datumtype != '/') ) /* XXX can this be omitted */
894 switch (howlen = symptr->howlen) {
896 len = strend - strbeg; /* long enough */
899 /* e_no_len and e_number */
900 len = symptr->length;
904 explicit_length = TRUE;
906 beyond = s >= strend;
908 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
910 /* props nonzero means we can process this letter. */
911 const long size = props & PACK_SIZE_MASK;
912 const long howmany = (strend - s) / size;
916 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
917 if (len && unpack_only_one) len = 1;
923 needs_swap = NEEDS_SWAP(datumtype);
925 switch(TYPE_NO_ENDIANNESS(datumtype)) {
927 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
930 if (howlen == e_no_len)
931 len = 16; /* len is not specified */
939 tempsym_t savsym = *symptr;
940 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
941 symptr->flags |= group_modifiers;
942 symptr->patend = savsym.grpend;
943 symptr->previous = &savsym;
946 if (len && unpack_only_one) len = 1;
948 symptr->patptr = savsym.grpbeg;
949 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
950 else symptr->flags &= ~FLAG_PARSE_UTF8;
951 unpack_rec(symptr, s, strbeg, strend, &s);
952 if (s == strend && savsym.howlen == e_star)
953 break; /* No way to continue */
956 savsym.flags = symptr->flags & ~group_modifiers;
960 case '.' | TYPE_IS_SHRIEKING:
964 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
965 if (howlen == e_star) from = strbeg;
966 else if (len <= 0) from = s;
968 tempsym_t *group = symptr;
970 while (--len && group) group = group->previous;
971 from = group ? strbeg + group->strbeg : strbeg;
974 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
975 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
979 case '@' | TYPE_IS_SHRIEKING:
981 s = strbeg + symptr->strbeg;
982 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
986 Perl_croak(aTHX_ "'@' outside of string in unpack");
991 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
994 Perl_croak(aTHX_ "'@' outside of string in unpack");
998 case 'X' | TYPE_IS_SHRIEKING:
999 if (!len) /* Avoid division by 0 */
1002 const char *hop, *last;
1004 hop = last = strbeg;
1006 hop += UTF8SKIP(hop);
1013 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1017 len = (s - strbeg) % len;
1023 Perl_croak(aTHX_ "'X' outside of string in unpack");
1024 while (--s, UTF8_IS_CONTINUATION(*s)) {
1026 Perl_croak(aTHX_ "'X' outside of string in unpack");
1031 if (len > s - strbeg)
1032 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1036 case 'x' | TYPE_IS_SHRIEKING: {
1038 if (!len) /* Avoid division by 0 */
1040 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1041 else ai32 = (s - strbeg) % len;
1042 if (ai32 == 0) break;
1050 Perl_croak(aTHX_ "'x' outside of string in unpack");
1055 if (len > strend - s)
1056 Perl_croak(aTHX_ "'x' outside of string in unpack");
1061 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1067 /* Preliminary length estimate is assumed done in 'W' */
1068 if (len > strend - s) len = strend - s;
1074 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1075 if (hop >= strend) {
1077 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1082 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1084 } else if (len > strend - s)
1087 if (datumtype == 'Z') {
1088 /* 'Z' strips stuff after first null */
1089 const char *ptr, *end;
1091 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
1092 sv = newSVpvn(s, ptr-s);
1093 if (howlen == e_star) /* exact for 'Z*' */
1094 len = ptr-s + (ptr != strend ? 1 : 0);
1095 } else if (datumtype == 'A') {
1096 /* 'A' strips both nulls and spaces */
1098 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1099 for (ptr = s+len-1; ptr >= s; ptr--)
1100 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
1101 !isSPACE_utf8(ptr)) break;
1102 if (ptr >= s) ptr += UTF8SKIP(ptr);
1105 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1107 for (ptr = s+len-1; ptr >= s; ptr--)
1108 if (*ptr != 0 && !isSPACE(*ptr)) break;
1111 sv = newSVpvn(s, ptr-s);
1112 } else sv = newSVpvn(s, len);
1116 /* Undo any upgrade done due to need_utf8() */
1117 if (!(symptr->flags & FLAG_WAS_UTF8))
1118 sv_utf8_downgrade(sv, 0);
1126 if (howlen == e_star || len > (strend - s) * 8)
1127 len = (strend - s) * 8;
1130 while (len >= 8 && s < strend) {
1131 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
1136 cuv += PL_bitcount[*(U8 *)s++];
1139 if (len && s < strend) {
1141 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1142 if (datumtype == 'b')
1144 if (bits & 1) cuv++;
1149 if (bits & 0x80) cuv++;
1156 sv = sv_2mortal(newSV(len ? len : 1));
1159 if (datumtype == 'b') {
1161 const I32 ai32 = len;
1162 for (len = 0; len < ai32; len++) {
1163 if (len & 7) bits >>= 1;
1165 if (s >= strend) break;
1166 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1167 } else bits = *(U8 *) s++;
1168 *str++ = bits & 1 ? '1' : '0';
1172 const I32 ai32 = len;
1173 for (len = 0; len < ai32; len++) {
1174 if (len & 7) bits <<= 1;
1176 if (s >= strend) break;
1177 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1178 } else bits = *(U8 *) s++;
1179 *str++ = bits & 0x80 ? '1' : '0';
1183 SvCUR_set(sv, str - SvPVX_const(sv));
1190 /* Preliminary length estimate, acceptable for utf8 too */
1191 if (howlen == e_star || len > (strend - s) * 2)
1192 len = (strend - s) * 2;
1194 sv = sv_2mortal(newSV(len ? len : 1));
1198 if (datumtype == 'h') {
1201 for (len = 0; len < ai32; len++) {
1202 if (len & 1) bits >>= 4;
1204 if (s >= strend) break;
1205 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1206 } else bits = * (U8 *) s++;
1208 *str++ = PL_hexdigit[bits & 15];
1212 const I32 ai32 = len;
1213 for (len = 0; len < ai32; len++) {
1214 if (len & 1) bits <<= 4;
1216 if (s >= strend) break;
1217 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1218 } else bits = *(U8 *) s++;
1220 *str++ = PL_hexdigit[(bits >> 4) & 15];
1225 SvCUR_set(sv, str - SvPVX_const(sv));
1232 if (explicit_length)
1233 /* Switch to "character" mode */
1234 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1239 while (len-- > 0 && s < strend) {
1244 aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1245 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1246 if (retlen == (STRLEN) -1 || retlen == 0)
1247 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1251 aint = *(U8 *)(s)++;
1252 if (aint >= 128 && datumtype != 'C') /* fake up signed chars */
1256 else if (checksum > bits_in_uv)
1257 cdouble += (NV)aint;
1265 while (len-- > 0 && s < strend) {
1267 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1268 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1269 if (retlen == (STRLEN) -1 || retlen == 0)
1270 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1274 else if (checksum > bits_in_uv)
1275 cdouble += (NV) val;
1279 } else if (!checksum)
1281 const U8 ch = *(U8 *) s++;
1284 else if (checksum > bits_in_uv)
1285 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1287 while (len-- > 0) cuv += *(U8 *) s++;
1291 if (explicit_length && howlen != e_star) {
1292 /* Switch to "bytes in UTF-8" mode */
1293 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
1295 /* Should be impossible due to the need_utf8() test */
1296 Perl_croak(aTHX_ "U0 mode on a byte string");
1300 if (len > strend - s) len = strend - s;
1302 if (len && unpack_only_one) len = 1;
1306 while (len-- > 0 && s < strend) {
1310 U8 result[UTF8_MAXLEN];
1311 const char *ptr = s;
1313 /* Bug: warns about bad utf8 even if we are short on bytes
1314 and will break out of the loop */
1315 if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1318 len = UTF8SKIP(result);
1319 if (!uni_to_bytes(aTHX_ &ptr, strend,
1320 (char *) &result[1], len-1, 'U')) break;
1321 auv = utf8n_to_uvuni(result, len, &retlen, UTF8_ALLOW_DEFAULT);
1324 auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, UTF8_ALLOW_DEFAULT);
1325 if (retlen == (STRLEN) -1 || retlen == 0)
1326 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1331 else if (checksum > bits_in_uv)
1332 cdouble += (NV) auv;
1337 case 's' | TYPE_IS_SHRIEKING:
1338 #if SHORTSIZE != SIZE16
1341 SHIFT_VAR(utf8, s, strend, ashort, datumtype, needs_swap);
1344 else if (checksum > bits_in_uv)
1345 cdouble += (NV)ashort;
1357 #if U16SIZE > SIZE16
1360 SHIFT16(utf8, s, strend, &ai16, datumtype, needs_swap);
1361 #if U16SIZE > SIZE16
1367 else if (checksum > bits_in_uv)
1368 cdouble += (NV)ai16;
1373 case 'S' | TYPE_IS_SHRIEKING:
1374 #if SHORTSIZE != SIZE16
1376 unsigned short aushort;
1377 SHIFT_VAR(utf8, s, strend, aushort, datumtype, needs_swap,
1381 else if (checksum > bits_in_uv)
1382 cdouble += (NV)aushort;
1395 #if U16SIZE > SIZE16
1398 SHIFT16(utf8, s, strend, &au16, datumtype, needs_swap);
1399 if (datumtype == 'n')
1400 au16 = PerlSock_ntohs(au16);
1401 if (datumtype == 'v')
1405 else if (checksum > bits_in_uv)
1406 cdouble += (NV) au16;
1411 case 'v' | TYPE_IS_SHRIEKING:
1412 case 'n' | TYPE_IS_SHRIEKING:
1415 # if U16SIZE > SIZE16
1418 SHIFT16(utf8, s, strend, &ai16, datumtype, needs_swap);
1419 /* There should never be any byte-swapping here. */
1420 assert(!TYPE_ENDIANNESS(datumtype));
1421 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
1422 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1423 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
1424 ai16 = (I16) vtohs((U16) ai16);
1427 else if (checksum > bits_in_uv)
1428 cdouble += (NV) ai16;
1434 case 'i' | TYPE_IS_SHRIEKING:
1437 SHIFT_VAR(utf8, s, strend, aint, datumtype, needs_swap);
1440 else if (checksum > bits_in_uv)
1441 cdouble += (NV)aint;
1447 case 'I' | TYPE_IS_SHRIEKING:
1450 SHIFT_VAR(utf8, s, strend, auint, datumtype, needs_swap);
1453 else if (checksum > bits_in_uv)
1454 cdouble += (NV)auint;
1462 SHIFT_VAR(utf8, s, strend, aiv, datumtype, needs_swap);
1465 else if (checksum > bits_in_uv)
1474 SHIFT_VAR(utf8, s, strend, auv, datumtype, needs_swap);
1477 else if (checksum > bits_in_uv)
1483 case 'l' | TYPE_IS_SHRIEKING:
1484 #if LONGSIZE != SIZE32
1487 SHIFT_VAR(utf8, s, strend, along, datumtype, needs_swap);
1490 else if (checksum > bits_in_uv)
1491 cdouble += (NV)along;
1502 #if U32SIZE > SIZE32
1505 SHIFT32(utf8, s, strend, &ai32, datumtype, needs_swap);
1506 #if U32SIZE > SIZE32
1507 if (ai32 > 2147483647) ai32 -= 4294967296;
1511 else if (checksum > bits_in_uv)
1512 cdouble += (NV)ai32;
1517 case 'L' | TYPE_IS_SHRIEKING:
1518 #if LONGSIZE != SIZE32
1520 unsigned long aulong;
1521 SHIFT_VAR(utf8, s, strend, aulong, datumtype, needs_swap);
1524 else if (checksum > bits_in_uv)
1525 cdouble += (NV)aulong;
1538 #if U32SIZE > SIZE32
1541 SHIFT32(utf8, s, strend, &au32, datumtype, needs_swap);
1542 if (datumtype == 'N')
1543 au32 = PerlSock_ntohl(au32);
1544 if (datumtype == 'V')
1548 else if (checksum > bits_in_uv)
1549 cdouble += (NV)au32;
1554 case 'V' | TYPE_IS_SHRIEKING:
1555 case 'N' | TYPE_IS_SHRIEKING:
1558 #if U32SIZE > SIZE32
1561 SHIFT32(utf8, s, strend, &ai32, datumtype, needs_swap);
1562 /* There should never be any byte swapping here. */
1563 assert(!TYPE_ENDIANNESS(datumtype));
1564 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1565 ai32 = (I32)PerlSock_ntohl((U32)ai32);
1566 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1567 ai32 = (I32)vtohl((U32)ai32);
1570 else if (checksum > bits_in_uv)
1571 cdouble += (NV)ai32;
1579 SHIFT_VAR(utf8, s, strend, aptr, datumtype, needs_swap);
1580 /* newSVpv generates undef if aptr is NULL */
1581 mPUSHs(newSVpv(aptr, 0));
1589 while (len > 0 && s < strend) {
1591 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1592 auv = (auv << 7) | (ch & 0x7f);
1593 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
1601 if (++bytes >= sizeof(UV)) { /* promote to string */
1604 sv = Perl_newSVpvf(aTHX_ "%.*"UVuf, (int)TYPE_DIGITS(UV), auv);
1605 while (s < strend) {
1606 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1607 sv = mul128(sv, (U8)(ch & 0x7f));
1613 t = SvPV_nolen_const(sv);
1622 if ((s >= strend) && bytes)
1623 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
1627 if (symptr->howlen == e_star)
1628 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
1630 if (s + sizeof(char*) <= strend) {
1632 SHIFT_VAR(utf8, s, strend, aptr, datumtype, needs_swap);
1633 /* newSVpvn generates undef if aptr is NULL */
1634 PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP));
1641 SHIFT_VAR(utf8, s, strend, aquad, datumtype, needs_swap);
1643 mPUSHs(aquad >= IV_MIN && aquad <= IV_MAX ?
1644 newSViv((IV)aquad) : newSVnv((NV)aquad));
1645 else if (checksum > bits_in_uv)
1646 cdouble += (NV)aquad;
1654 SHIFT_VAR(utf8, s, strend, auquad, datumtype, needs_swap);
1656 mPUSHs(auquad <= UV_MAX ?
1657 newSVuv((UV)auquad) : newSVnv((NV)auquad));
1658 else if (checksum > bits_in_uv)
1659 cdouble += (NV)auquad;
1664 #endif /* HAS_QUAD */
1665 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
1669 SHIFT_VAR(utf8, s, strend, afloat, datumtype, needs_swap);
1679 SHIFT_VAR(utf8, s, strend, adouble, datumtype, needs_swap);
1689 SHIFT_BYTES(utf8, s, strend, anv.bytes, sizeof(anv.bytes),
1690 datumtype, needs_swap);
1697 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
1701 SHIFT_BYTES(utf8, s, strend, aldouble.bytes,
1702 sizeof(aldouble.bytes), datumtype, needs_swap);
1704 mPUSHn(aldouble.ld);
1706 cdouble += aldouble.ld;
1712 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
1713 sv = sv_2mortal(newSV(l));
1714 if (l) SvPOK_on(sv);
1717 while (next_uni_uu(aTHX_ &s, strend, &len)) {
1722 next_uni_uu(aTHX_ &s, strend, &a);
1723 next_uni_uu(aTHX_ &s, strend, &b);
1724 next_uni_uu(aTHX_ &s, strend, &c);
1725 next_uni_uu(aTHX_ &s, strend, &d);
1726 hunk[0] = (char)((a << 2) | (b >> 4));
1727 hunk[1] = (char)((b << 4) | (c >> 2));
1728 hunk[2] = (char)((c << 6) | d);
1730 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
1738 /* possible checksum byte */
1739 const char *skip = s+UTF8SKIP(s);
1740 if (skip < strend && *skip == '\n')
1746 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
1750 len = PL_uudmap[*(U8*)s++] & 077;
1752 if (s < strend && ISUUCHAR(*s))
1753 a = PL_uudmap[*(U8*)s++] & 077;
1756 if (s < strend && ISUUCHAR(*s))
1757 b = PL_uudmap[*(U8*)s++] & 077;
1760 if (s < strend && ISUUCHAR(*s))
1761 c = PL_uudmap[*(U8*)s++] & 077;
1764 if (s < strend && ISUUCHAR(*s))
1765 d = PL_uudmap[*(U8*)s++] & 077;
1768 hunk[0] = (char)((a << 2) | (b >> 4));
1769 hunk[1] = (char)((b << 4) | (c >> 2));
1770 hunk[2] = (char)((c << 6) | d);
1772 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
1777 else /* possible checksum byte */
1778 if (s + 1 < strend && s[1] == '\n')
1788 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
1789 (checksum > bits_in_uv &&
1790 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
1793 anv = (NV) (1 << (checksum & 15));
1794 while (checksum >= 16) {
1798 while (cdouble < 0.0)
1800 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
1801 sv = newSVnv(cdouble);
1804 if (checksum < bits_in_uv) {
1805 UV mask = ((UV)1 << checksum) - 1;
1814 if (symptr->flags & FLAG_SLASH){
1815 if (SP - PL_stack_base - start_sp_offset <= 0)
1817 if( next_symbol(symptr) ){
1818 if( symptr->howlen == e_number )
1819 Perl_croak(aTHX_ "Count after length/code in unpack" );
1821 /* ...end of char buffer then no decent length available */
1822 Perl_croak(aTHX_ "length/code after end of string in unpack" );
1824 /* take top of stack (hope it's numeric) */
1827 Perl_croak(aTHX_ "Negative '/' count in unpack" );
1830 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
1832 datumtype = symptr->code;
1833 explicit_length = FALSE;
1841 return SP - PL_stack_base - start_sp_offset;
1849 I32 gimme = GIMME_V;
1852 const char *pat = SvPV_const(left, llen);
1853 const char *s = SvPV_const(right, rlen);
1854 const char *strend = s + rlen;
1855 const char *patend = pat + llen;
1859 cnt = unpackstring(pat, patend, s, strend,
1860 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
1861 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
1864 if ( !cnt && gimme == G_SCALAR )
1865 PUSHs(&PL_sv_undef);
1870 doencodes(U8 *h, const char *s, I32 len)
1872 *h++ = PL_uuemap[len];
1874 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1875 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
1876 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
1877 *h++ = PL_uuemap[(077 & (s[2] & 077))];
1882 const char r = (len > 1 ? s[1] : '\0');
1883 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1884 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
1885 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
1886 *h++ = PL_uuemap[0];
1893 S_is_an_int(pTHX_ const char *s, STRLEN l)
1895 SV *result = newSVpvn(s, l);
1896 char *const result_c = SvPV_nolen(result); /* convenience */
1897 char *out = result_c;
1901 PERL_ARGS_ASSERT_IS_AN_INT;
1909 SvREFCNT_dec(result);
1932 SvREFCNT_dec(result);
1938 SvCUR_set(result, out - result_c);
1942 /* pnum must be '\0' terminated */
1944 S_div128(pTHX_ SV *pnum, bool *done)
1947 char * const s = SvPV(pnum, len);
1951 PERL_ARGS_ASSERT_DIV128;
1955 const int i = m * 10 + (*t - '0');
1956 const int r = (i >> 7); /* r < 10 */
1964 SvCUR_set(pnum, (STRLEN) (t - s));
1969 =for apidoc packlist
1971 The engine implementing pack() Perl function.
1977 Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, SV **beglist, SV **endlist )
1982 PERL_ARGS_ASSERT_PACKLIST;
1984 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
1986 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
1987 Also make sure any UTF8 flag is loaded */
1988 SvPV_force_nolen(cat);
1990 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
1992 (void)pack_rec( cat, &sym, beglist, endlist );
1995 /* like sv_utf8_upgrade, but also repoint the group start markers */
1997 marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2000 const char *from_ptr, *from_start, *from_end, **marks, **m;
2001 char *to_start, *to_ptr;
2003 if (SvUTF8(sv)) return;
2005 from_start = SvPVX_const(sv);
2006 from_end = from_start + SvCUR(sv);
2007 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2008 if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
2009 if (from_ptr == from_end) {
2010 /* Simple case: no character needs to be changed */
2015 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
2016 Newx(to_start, len, char);
2017 Copy(from_start, to_start, from_ptr-from_start, char);
2018 to_ptr = to_start + (from_ptr-from_start);
2020 Newx(marks, sym_ptr->level+2, const char *);
2021 for (group=sym_ptr; group; group = group->previous)
2022 marks[group->level] = from_start + group->strbeg;
2023 marks[sym_ptr->level+1] = from_end+1;
2024 for (m = marks; *m < from_ptr; m++)
2025 *m = to_start + (*m-from_start);
2027 for (;from_ptr < from_end; from_ptr++) {
2028 while (*m == from_ptr) *m++ = to_ptr;
2029 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
2033 while (*m == from_ptr) *m++ = to_ptr;
2034 if (m != marks + sym_ptr->level+1) {
2037 Perl_croak(aTHX_ "panic: marks beyond string end, m=%p, marks=%p, "
2038 "level=%d", m, marks, sym_ptr->level);
2040 for (group=sym_ptr; group; group = group->previous)
2041 group->strbeg = marks[group->level] - to_start;
2046 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
2047 from_start -= SvIVX(sv);
2050 SvFLAGS(sv) &= ~SVf_OOK;
2053 Safefree(from_start);
2054 SvPV_set(sv, to_start);
2055 SvCUR_set(sv, to_ptr - to_start);
2060 /* Exponential string grower. Makes string extension effectively O(n)
2061 needed says how many extra bytes we need (not counting the final '\0')
2062 Only grows the string if there is an actual lack of space
2065 S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
2066 const STRLEN cur = SvCUR(sv);
2067 const STRLEN len = SvLEN(sv);
2070 PERL_ARGS_ASSERT_SV_EXP_GROW;
2072 if (len - cur > needed) return SvPVX(sv);
2073 extend = needed > len ? needed : len;
2074 return SvGROW(sv, len+extend+1);
2079 S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
2082 tempsym_t lookahead;
2083 I32 items = endlist - beglist;
2084 bool found = next_symbol(symptr);
2085 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
2086 bool warn_utf8 = ckWARN(WARN_UTF8);
2088 PERL_ARGS_ASSERT_PACK_REC;
2090 if (symptr->level == 0 && found && symptr->code == 'U') {
2091 marked_upgrade(aTHX_ cat, symptr);
2092 symptr->flags |= FLAG_DO_UTF8;
2095 symptr->strbeg = SvCUR(cat);
2101 SV *lengthcode = NULL;
2102 I32 datumtype = symptr->code;
2103 howlen_t howlen = symptr->howlen;
2104 char *start = SvPVX(cat);
2105 char *cur = start + SvCUR(cat);
2108 #define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2112 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2116 /* e_no_len and e_number */
2117 len = symptr->length;
2122 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
2124 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2125 /* We can process this letter. */
2126 STRLEN size = props & PACK_SIZE_MASK;
2127 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2131 /* Look ahead for next symbol. Do we have code/code? */
2132 lookahead = *symptr;
2133 found = next_symbol(&lookahead);
2134 if (symptr->flags & FLAG_SLASH) {
2136 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
2137 if (strchr("aAZ", lookahead.code)) {
2138 if (lookahead.howlen == e_number) count = lookahead.length;
2141 count = sv_len_utf8(*beglist);
2144 if (lookahead.code == 'Z') count++;
2147 if (lookahead.howlen == e_number && lookahead.length < items)
2148 count = lookahead.length;
2151 lookahead.howlen = e_number;
2152 lookahead.length = count;
2153 lengthcode = sv_2mortal(newSViv(count));
2156 needs_swap = NEEDS_SWAP(datumtype);
2158 /* Code inside the switch must take care to properly update
2159 cat (CUR length and '\0' termination) if it updated *cur and
2160 doesn't simply leave using break */
2161 switch(TYPE_NO_ENDIANNESS(datumtype)) {
2163 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2164 (int) TYPE_NO_MODIFIERS(datumtype));
2166 Perl_croak(aTHX_ "'%%' may not be used in pack");
2169 case '.' | TYPE_IS_SHRIEKING:
2171 if (howlen == e_star) from = start;
2172 else if (len == 0) from = cur;
2174 tempsym_t *group = symptr;
2176 while (--len && group) group = group->previous;
2177 from = group ? start + group->strbeg : start;
2180 len = SvIV(fromstr);
2182 case '@' | TYPE_IS_SHRIEKING:
2184 from = start + symptr->strbeg;
2186 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
2188 while (len && from < cur) {
2189 from += UTF8SKIP(from);
2193 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2195 /* Here we know from == cur */
2197 GROWING(0, cat, start, cur, len);
2198 Zero(cur, len, char);
2200 } else if (from < cur) {
2203 } else goto no_change;
2211 if (len > 0) goto grow;
2212 if (len == 0) goto no_change;
2219 tempsym_t savsym = *symptr;
2220 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2221 symptr->flags |= group_modifiers;
2222 symptr->patend = savsym.grpend;
2224 symptr->previous = &lookahead;
2227 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2228 else symptr->flags &= ~FLAG_PARSE_UTF8;
2229 was_utf8 = SvUTF8(cat);
2230 symptr->patptr = savsym.grpbeg;
2231 beglist = pack_rec(cat, symptr, beglist, endlist);
2232 if (SvUTF8(cat) != was_utf8)
2233 /* This had better be an upgrade while in utf8==0 mode */
2236 if (savsym.howlen == e_star && beglist == endlist)
2237 break; /* No way to continue */
2239 items = endlist - beglist;
2240 lookahead.flags = symptr->flags & ~group_modifiers;
2243 case 'X' | TYPE_IS_SHRIEKING:
2244 if (!len) /* Avoid division by 0 */
2251 hop += UTF8SKIP(hop);
2258 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2262 len = (cur-start) % len;
2266 if (len < 1) goto no_change;
2270 Perl_croak(aTHX_ "'%c' outside of string in pack",
2271 (int) TYPE_NO_MODIFIERS(datumtype));
2272 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2274 Perl_croak(aTHX_ "'%c' outside of string in pack",
2275 (int) TYPE_NO_MODIFIERS(datumtype));
2281 if (cur - start < len)
2282 Perl_croak(aTHX_ "'%c' outside of string in pack",
2283 (int) TYPE_NO_MODIFIERS(datumtype));
2286 if (cur < start+symptr->strbeg) {
2287 /* Make sure group starts don't point into the void */
2289 const STRLEN length = cur-start;
2290 for (group = symptr;
2291 group && length < group->strbeg;
2292 group = group->previous) group->strbeg = length;
2293 lookahead.strbeg = length;
2296 case 'x' | TYPE_IS_SHRIEKING: {
2298 if (!len) /* Avoid division by 0 */
2300 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
2301 else ai32 = (cur - start) % len;
2302 if (ai32 == 0) goto no_change;
2314 aptr = SvPV_const(fromstr, fromlen);
2315 if (DO_UTF8(fromstr)) {
2316 const char *end, *s;
2318 if (!utf8 && !SvUTF8(cat)) {
2319 marked_upgrade(aTHX_ cat, symptr);
2320 lookahead.flags |= FLAG_DO_UTF8;
2321 lookahead.strbeg = symptr->strbeg;
2324 cur = start + SvCUR(cat);
2326 if (howlen == e_star) {
2327 if (utf8) goto string_copy;
2331 end = aptr + fromlen;
2332 fromlen = datumtype == 'Z' ? len-1 : len;
2333 while ((I32) fromlen > 0 && s < end) {
2338 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2341 if (datumtype == 'Z') len++;
2347 fromlen = len - fromlen;
2348 if (datumtype == 'Z') fromlen--;
2349 if (howlen == e_star) {
2351 if (datumtype == 'Z') len++;
2353 GROWING(0, cat, start, cur, len);
2354 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
2355 datumtype | TYPE_IS_PACK))
2356 Perl_croak(aTHX_ "panic: predicted utf8 length not available, "
2357 "for '%c', aptr=%p end=%p cur=%p, fromlen=%"UVuf,
2358 (int)datumtype, aptr, end, cur, (UV)fromlen);
2362 if (howlen == e_star) {
2364 if (datumtype == 'Z') len++;
2366 if (len <= (I32) fromlen) {
2368 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2370 /* assumes a byte expands to at most UTF8_EXPAND bytes on
2372 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2373 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
2375 while (fromlen > 0) {
2376 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
2382 if (howlen == e_star) {
2384 if (datumtype == 'Z') len++;
2386 if (len <= (I32) fromlen) {
2388 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2390 GROWING(0, cat, start, cur, len);
2391 Copy(aptr, cur, fromlen, char);
2395 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2402 const char *str, *end;
2409 str = SvPV_const(fromstr, fromlen);
2410 end = str + fromlen;
2411 if (DO_UTF8(fromstr)) {
2413 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2415 utf8_source = FALSE;
2416 utf8_flags = 0; /* Unused, but keep compilers happy */
2418 if (howlen == e_star) len = fromlen;
2419 field_len = (len+7)/8;
2420 GROWING(utf8, cat, start, cur, field_len);
2421 if (len > (I32)fromlen) len = fromlen;
2424 if (datumtype == 'B')
2428 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2430 } else bits |= *str++ & 1;
2431 if (l & 7) bits <<= 1;
2433 PUSH_BYTE(utf8, cur, bits);
2438 /* datumtype == 'b' */
2442 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2443 if (val & 1) bits |= 0x80;
2444 } else if (*str++ & 1)
2446 if (l & 7) bits >>= 1;
2448 PUSH_BYTE(utf8, cur, bits);
2454 if (datumtype == 'B')
2455 bits <<= 7 - (l & 7);
2457 bits >>= 7 - (l & 7);
2458 PUSH_BYTE(utf8, cur, bits);
2461 /* Determine how many chars are left in the requested field */
2463 if (howlen == e_star) field_len = 0;
2464 else field_len -= l;
2465 Zero(cur, field_len, char);
2471 const char *str, *end;
2478 str = SvPV_const(fromstr, fromlen);
2479 end = str + fromlen;
2480 if (DO_UTF8(fromstr)) {
2482 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2484 utf8_source = FALSE;
2485 utf8_flags = 0; /* Unused, but keep compilers happy */
2487 if (howlen == e_star) len = fromlen;
2488 field_len = (len+1)/2;
2489 GROWING(utf8, cat, start, cur, field_len);
2490 if (!utf8 && len > (I32)fromlen) len = fromlen;
2493 if (datumtype == 'H')
2497 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2498 if (val < 256 && isALPHA(val))
2499 bits |= (val + 9) & 0xf;
2502 } else if (isALPHA(*str))
2503 bits |= (*str++ + 9) & 0xf;
2505 bits |= *str++ & 0xf;
2506 if (l & 1) bits <<= 4;
2508 PUSH_BYTE(utf8, cur, bits);
2516 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2517 if (val < 256 && isALPHA(val))
2518 bits |= ((val + 9) & 0xf) << 4;
2520 bits |= (val & 0xf) << 4;
2521 } else if (isALPHA(*str))
2522 bits |= ((*str++ + 9) & 0xf) << 4;
2524 bits |= (*str++ & 0xf) << 4;
2525 if (l & 1) bits >>= 4;
2527 PUSH_BYTE(utf8, cur, bits);
2533 PUSH_BYTE(utf8, cur, bits);
2536 /* Determine how many chars are left in the requested field */
2538 if (howlen == e_star) field_len = 0;
2539 else field_len -= l;
2540 Zero(cur, field_len, char);
2548 aiv = SvIV(fromstr);
2549 if ((-128 > aiv || aiv > 127))
2550 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2551 "Character in 'c' format wrapped in pack");
2552 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
2557 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2563 aiv = SvIV(fromstr);
2564 if ((0 > aiv || aiv > 0xff))
2565 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2566 "Character in 'C' format wrapped in pack");
2567 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
2572 U8 in_bytes = (U8)IN_BYTES;
2574 end = start+SvLEN(cat)-1;
2575 if (utf8) end -= UTF8_MAXLEN-1;
2579 auv = SvUV(fromstr);
2580 if (in_bytes) auv = auv % 0x100;
2585 SvCUR_set(cat, cur - start);
2587 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2588 end = start+SvLEN(cat)-UTF8_MAXLEN;
2590 cur = (char *) uvuni_to_utf8_flags((U8 *) cur,
2593 0 : UNICODE_ALLOW_ANY);
2598 SvCUR_set(cat, cur - start);
2599 marked_upgrade(aTHX_ cat, symptr);
2600 lookahead.flags |= FLAG_DO_UTF8;
2601 lookahead.strbeg = symptr->strbeg;
2604 cur = start + SvCUR(cat);
2605 end = start+SvLEN(cat)-UTF8_MAXLEN;
2608 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2609 "Character in 'W' format wrapped in pack");
2614 SvCUR_set(cat, cur - start);
2615 GROWING(0, cat, start, cur, len+1);
2616 end = start+SvLEN(cat)-1;
2618 *(U8 *) cur++ = (U8)auv;
2627 if (!(symptr->flags & FLAG_DO_UTF8)) {
2628 marked_upgrade(aTHX_ cat, symptr);
2629 lookahead.flags |= FLAG_DO_UTF8;
2630 lookahead.strbeg = symptr->strbeg;
2636 end = start+SvLEN(cat);
2637 if (!utf8) end -= UTF8_MAXLEN;
2641 auv = SvUV(fromstr);
2643 U8 buffer[UTF8_MAXLEN], *endb;
2644 endb = uvuni_to_utf8_flags(buffer, auv,
2646 0 : UNICODE_ALLOW_ANY);
2647 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
2649 SvCUR_set(cat, cur - start);
2650 GROWING(0, cat, start, cur,
2651 len+(endb-buffer)*UTF8_EXPAND);
2652 end = start+SvLEN(cat);
2654 cur = bytes_to_uni(buffer, endb-buffer, cur);
2658 SvCUR_set(cat, cur - start);
2659 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2660 end = start+SvLEN(cat)-UTF8_MAXLEN;
2662 cur = (char *) uvuni_to_utf8_flags((U8 *) cur, auv,
2664 0 : UNICODE_ALLOW_ANY);
2669 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
2675 anv = SvNV(fromstr);
2676 # if defined(VMS) && !defined(_IEEE_FP)
2677 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
2678 * on Alpha; fake it if we don't have them.
2682 else if (anv < -FLT_MAX)
2684 else afloat = (float)anv;
2686 afloat = (float)anv;
2689 PUSH_VAR(utf8, cur, afloat);
2697 anv = SvNV(fromstr);
2698 # if defined(VMS) && !defined(_IEEE_FP)
2699 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
2700 * on Alpha; fake it if we don't have them.
2704 else if (anv < -DBL_MAX)
2706 else adouble = (double)anv;
2708 adouble = (double)anv;
2710 DO_BO_PACK(adouble);
2711 PUSH_VAR(utf8, cur, adouble);
2716 Zero(&anv, 1, NV); /* can be long double with unused bits */
2720 /* to work round a gcc/x86 bug; don't use SvNV */
2721 anv.nv = sv_2nv(fromstr);
2723 anv.nv = SvNV(fromstr);
2726 PUSH_BYTES(utf8, cur, anv.bytes, sizeof(anv.bytes));
2730 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2733 /* long doubles can have unused bits, which may be nonzero */
2734 Zero(&aldouble, 1, long double);
2738 /* to work round a gcc/x86 bug; don't use SvNV */
2739 aldouble.ld = (long double)sv_2nv(fromstr);
2741 aldouble.ld = (long double)SvNV(fromstr);
2743 DO_BO_PACK(aldouble);
2744 PUSH_BYTES(utf8, cur, aldouble.bytes, sizeof(aldouble.bytes));
2749 case 'n' | TYPE_IS_SHRIEKING:
2754 ai16 = (I16)SvIV(fromstr);
2755 ai16 = PerlSock_htons(ai16);
2756 PUSH16(utf8, cur, &ai16);
2759 case 'v' | TYPE_IS_SHRIEKING:
2764 ai16 = (I16)SvIV(fromstr);
2766 PUSH16(utf8, cur, &ai16);
2769 case 'S' | TYPE_IS_SHRIEKING:
2770 #if SHORTSIZE != SIZE16
2772 unsigned short aushort;
2774 aushort = SvUV(fromstr);
2775 DO_BO_PACK(aushort);
2776 PUSH_VAR(utf8, cur, aushort);
2786 au16 = (U16)SvUV(fromstr);
2788 PUSH16(utf8, cur, &au16);
2791 case 's' | TYPE_IS_SHRIEKING:
2792 #if SHORTSIZE != SIZE16
2796 ashort = SvIV(fromstr);
2798 PUSH_VAR(utf8, cur, ashort);
2808 ai16 = (I16)SvIV(fromstr);
2810 PUSH16(utf8, cur, &ai16);
2814 case 'I' | TYPE_IS_SHRIEKING:
2818 auint = SvUV(fromstr);
2820 PUSH_VAR(utf8, cur, auint);
2827 aiv = SvIV(fromstr);
2829 PUSH_VAR(utf8, cur, aiv);
2836 auv = SvUV(fromstr);
2838 PUSH_VAR(utf8, cur, auv);
2845 anv = SvNV(fromstr);
2849 SvCUR_set(cat, cur - start);
2850 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
2853 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
2854 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
2855 any negative IVs will have already been got by the croak()
2856 above. IOK is untrue for fractions, so we test them
2857 against UV_MAX_P1. */
2858 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
2859 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
2860 char *in = buf + sizeof(buf);
2861 UV auv = SvUV(fromstr);
2864 *--in = (char)((auv & 0x7f) | 0x80);
2867 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
2868 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2869 in, (buf + sizeof(buf)) - in);
2870 } else if (SvPOKp(fromstr))
2872 else if (SvNOKp(fromstr)) {
2873 /* 10**NV_MAX_10_EXP is the largest power of 10
2874 so 10**(NV_MAX_10_EXP+1) is definitely unrepresentable
2875 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
2876 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
2877 And with that many bytes only Inf can overflow.
2878 Some C compilers are strict about integral constant
2879 expressions so we conservatively divide by a slightly
2880 smaller integer instead of multiplying by the exact
2881 floating-point value.
2883 #ifdef NV_MAX_10_EXP
2884 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
2885 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
2887 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
2888 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
2890 char *in = buf + sizeof(buf);
2892 anv = Perl_floor(anv);
2894 const NV next = Perl_floor(anv / 128);
2895 if (in <= buf) /* this cannot happen ;-) */
2896 Perl_croak(aTHX_ "Cannot compress integer in pack");
2897 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
2900 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
2901 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2902 in, (buf + sizeof(buf)) - in);
2911 /* Copy string and check for compliance */
2912 from = SvPV_const(fromstr, len);
2913 if ((norm = is_an_int(from, len)) == NULL)
2914 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
2916 Newx(result, len, char);
2919 while (!done) *--in = div128(norm, &done) | 0x80;
2920 result[len - 1] &= 0x7F; /* clear continue bit */
2921 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2922 in, (result + len) - in);
2924 SvREFCNT_dec(norm); /* free norm */
2929 case 'i' | TYPE_IS_SHRIEKING:
2933 aint = SvIV(fromstr);
2935 PUSH_VAR(utf8, cur, aint);
2938 case 'N' | TYPE_IS_SHRIEKING:
2943 au32 = SvUV(fromstr);
2944 au32 = PerlSock_htonl(au32);
2945 PUSH32(utf8, cur, &au32);
2948 case 'V' | TYPE_IS_SHRIEKING:
2953 au32 = SvUV(fromstr);
2955 PUSH32(utf8, cur, &au32);
2958 case 'L' | TYPE_IS_SHRIEKING:
2959 #if LONGSIZE != SIZE32
2961 unsigned long aulong;
2963 aulong = SvUV(fromstr);
2965 PUSH_VAR(utf8, cur, aulong);
2975 au32 = SvUV(fromstr);
2977 PUSH32(utf8, cur, &au32);
2980 case 'l' | TYPE_IS_SHRIEKING:
2981 #if LONGSIZE != SIZE32
2985 along = SvIV(fromstr);
2987 PUSH_VAR(utf8, cur, along);
2997 ai32 = SvIV(fromstr);
2999 PUSH32(utf8, cur, &ai32);
3007 auquad = (Uquad_t) SvUV(fromstr);
3009 PUSH_VAR(utf8, cur, auquad);
3016 aquad = (Quad_t)SvIV(fromstr);
3018 PUSH_VAR(utf8, cur, aquad);
3021 #endif /* HAS_QUAD */
3023 len = 1; /* assume SV is correct length */
3024 GROWING(utf8, cat, start, cur, sizeof(char *));
3031 SvGETMAGIC(fromstr);
3032 if (!SvOK(fromstr)) aptr = NULL;
3034 /* XXX better yet, could spirit away the string to
3035 * a safe spot and hang on to it until the result
3036 * of pack() (and all copies of the result) are
3039 if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
3040 !SvREADONLY(fromstr)))) {
3041 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3042 "Attempt to pack pointer to temporary value");
3044 if (SvPOK(fromstr) || SvNIOK(fromstr))
3045 aptr = SvPV_nomg_const_nolen(fromstr);
3047 aptr = SvPV_force_flags_nolen(fromstr, 0);
3050 PUSH_VAR(utf8, cur, aptr);
3054 const char *aptr, *aend;
3058 if (len <= 2) len = 45;
3059 else len = len / 3 * 3;
3061 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3062 "Field too wide in 'u' format in pack");
3065 aptr = SvPV_const(fromstr, fromlen);
3066 from_utf8 = DO_UTF8(fromstr);
3068 aend = aptr + fromlen;
3069 fromlen = sv_len_utf8_nomg(fromstr);
3070 } else aend = NULL; /* Unused, but keep compilers happy */
3071 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
3072 while (fromlen > 0) {
3075 U8 hunk[1+63/3*4+1];
3077 if ((I32)fromlen > len)
3083 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3084 'u' | TYPE_IS_PACK)) {
3086 SvCUR_set(cat, cur - start);
3087 Perl_croak(aTHX_ "panic: string is shorter than advertised, "
3088 "aptr=%p, aend=%p, buffer=%p, todo=%ld",
3089 aptr, aend, buffer, (long) todo);
3091 end = doencodes(hunk, buffer, todo);
3093 end = doencodes(hunk, aptr, todo);
3096 PUSH_BYTES(utf8, cur, hunk, end-hunk);
3103 SvCUR_set(cat, cur - start);
3105 *symptr = lookahead;
3114 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
3117 SV *pat_sv = *++MARK;
3118 const char *pat = SvPV_const(pat_sv, fromlen);
3119 const char *patend = pat + fromlen;
3125 packlist(cat, pat, patend, MARK, SP + 1);
3135 * c-indentation-style: bsd
3137 * indent-tabs-mode: nil
3140 * ex: set ts=8 sts=4 sw=4 et: