3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
18 * [p.653 of _The Lord of the Rings_, IV/iv: "Of Herbs and Stewed Rabbit"]
21 /* This file contains pp ("push/pop") functions that
22 * execute the opcodes that make up a perl program. A typical pp function
23 * expects to find its arguments on the stack, and usually pushes its
24 * results onto the stack, hence the 'pp' terminology. Each OP structure
25 * contains a pointer to the relevant pp_foo() function.
27 * This particular file just contains pp_pack() and pp_unpack(). See the
28 * other pp*.c files for the rest of the pp_ functions.
32 #define PERL_IN_PP_PACK_C
35 /* Types used by pack/unpack */
37 e_no_len, /* no length */
38 e_number, /* number, [] */
42 typedef struct tempsym {
43 const char* patptr; /* current template char */
44 const char* patend; /* one after last char */
45 const char* grpbeg; /* 1st char of ()-group */
46 const char* grpend; /* end of ()-group */
47 I32 code; /* template code (!<>) */
48 I32 length; /* length/repeat count */
49 howlen_t howlen; /* how length is given */
50 int level; /* () nesting level */
51 U32 flags; /* /=4, comma=2, pack=1 */
52 /* and group modifiers */
53 STRLEN strbeg; /* offset of group start */
54 struct tempsym *previous; /* previous group */
57 #define TEMPSYM_INIT(symptr, p, e, f) \
59 (symptr)->patptr = (p); \
60 (symptr)->patend = (e); \
61 (symptr)->grpbeg = NULL; \
62 (symptr)->grpend = NULL; \
63 (symptr)->grpend = NULL; \
65 (symptr)->length = 0; \
66 (symptr)->howlen = e_no_len; \
67 (symptr)->level = 0; \
68 (symptr)->flags = (f); \
69 (symptr)->strbeg = 0; \
70 (symptr)->previous = NULL; \
78 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
81 U8 bytes[sizeof(long double)];
88 /* Maximum number of bytes to which a byte can grow due to upgrade */
92 * Offset for integer pack/unpack.
94 * On architectures where I16 and I32 aren't really 16 and 32 bits,
95 * which for now are all Crays, pack and unpack have to play games.
99 * These values are required for portability of pack() output.
100 * If they're not right on your machine, then pack() and unpack()
101 * wouldn't work right anyway; you'll need to apply the Cray hack.
102 * (I'd like to check them with #if, but you can't use sizeof() in
103 * the preprocessor.) --???
106 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
107 defines are now in config.h. --Andy Dougherty April 1998
112 /* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
115 #if U16SIZE > SIZE16 || U32SIZE > SIZE32
116 # if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
117 # define OFF16(p) ((char*)(p))
118 # define OFF32(p) ((char*)(p))
120 # if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
121 # define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
122 # define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
124 ++++ bad cray byte order
128 # define OFF16(p) ((char *) (p))
129 # define OFF32(p) ((char *) (p))
132 #define PUSH16(utf8, cur, p, needs_swap) \
133 PUSH_BYTES(utf8, cur, OFF16(p), SIZE16, needs_swap)
134 #define PUSH32(utf8, cur, p, needs_swap) \
135 PUSH_BYTES(utf8, cur, OFF32(p), SIZE32, needs_swap)
137 #if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
138 # define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_LITTLE_ENDIAN)
139 #elif BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
140 # define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_BIG_ENDIAN)
142 # error "Unsupported byteorder"
143 /* Need to add code here to re-instate mixed endian support.
144 NEEDS_SWAP would need to hold a flag indicating which action to
145 take, and S_reverse_copy and the code in uni_to_bytes would need
146 logic adding to deal with any mixed-endian transformations needed.
150 /* Only to be used inside a loop (see the break) */
151 #define SHIFT_BYTES(utf8, s, strend, buf, len, datumtype, needs_swap) \
153 if (UNLIKELY(utf8)) { \
154 if (!uni_to_bytes(aTHX_ &s, strend, \
155 (char *) (buf), len, datumtype)) break; \
157 if (UNLIKELY(needs_swap)) \
158 S_reverse_copy(s, (char *) (buf), len); \
160 Copy(s, (char *) (buf), len, char); \
165 #define SHIFT16(utf8, s, strend, p, datumtype, needs_swap) \
166 SHIFT_BYTES(utf8, s, strend, OFF16(p), SIZE16, datumtype, needs_swap)
168 #define SHIFT32(utf8, s, strend, p, datumtype, needs_swap) \
169 SHIFT_BYTES(utf8, s, strend, OFF32(p), SIZE32, datumtype, needs_swap)
171 #define SHIFT_VAR(utf8, s, strend, var, datumtype, needs_swap) \
172 SHIFT_BYTES(utf8, s, strend, &(var), sizeof(var), datumtype, needs_swap)
174 #define PUSH_VAR(utf8, aptr, var, needs_swap) \
175 PUSH_BYTES(utf8, aptr, &(var), sizeof(var), needs_swap)
177 /* Avoid stack overflow due to pathological templates. 100 should be plenty. */
178 #define MAX_SUB_TEMPLATE_LEVEL 100
180 /* flags (note that type modifiers can also be used as flags!) */
181 #define FLAG_WAS_UTF8 0x40
182 #define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
183 #define FLAG_UNPACK_ONLY_ONE 0x10
184 #define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
185 #define FLAG_SLASH 0x04
186 #define FLAG_COMMA 0x02
187 #define FLAG_PACK 0x01
190 S_mul128(pTHX_ SV *sv, U8 m)
193 char *s = SvPV(sv, len);
196 PERL_ARGS_ASSERT_MUL128;
198 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
199 SV * const tmpNew = newSVpvs("0000000000");
201 sv_catsv(tmpNew, sv);
202 SvREFCNT_dec(sv); /* free old sv */
207 while (!*t) /* trailing '\0'? */
210 const U32 i = ((*t - '0') << 7) + m;
211 *(t--) = '0' + (char)(i % 10);
217 /* Explosives and implosives. */
219 #if 'I' == 73 && 'J' == 74
220 /* On an ASCII/ISO kind of system */
221 #define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
224 Some other sort of character set - use memchr() so we don't match
227 #define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
231 #define TYPE_IS_SHRIEKING 0x100
232 #define TYPE_IS_BIG_ENDIAN 0x200
233 #define TYPE_IS_LITTLE_ENDIAN 0x400
234 #define TYPE_IS_PACK 0x800
235 #define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
236 #define TYPE_MODIFIERS(t) ((t) & ~0xFF)
237 #define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
239 # define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
240 # define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
242 # define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
244 #define PACK_SIZE_CANNOT_CSUM 0x80
245 #define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
246 #define PACK_SIZE_MASK 0x3F
248 #include "packsizetables.c"
251 S_reverse_copy(const char *src, char *dest, STRLEN len)
259 uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
262 UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
263 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
264 /* We try to process malformed UTF-8 as much as possible (preferably with
265 warnings), but these two mean we make no progress in the string and
266 might enter an infinite loop */
267 if (retlen == (STRLEN) -1 || retlen == 0)
268 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
269 (int) TYPE_NO_MODIFIERS(datumtype));
271 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
272 "Character in '%c' format wrapped in unpack",
273 (int) TYPE_NO_MODIFIERS(datumtype));
280 #define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
281 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
285 uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
289 const char *from = *s;
291 const U32 flags = ckWARN(WARN_UTF8) ?
292 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
293 const bool needs_swap = NEEDS_SWAP(datumtype);
295 if (UNLIKELY(needs_swap))
298 for (;buf_len > 0; buf_len--) {
299 if (from >= end) return FALSE;
300 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
301 if (retlen == (STRLEN) -1 || retlen == 0) {
302 from += UTF8SKIP(from);
304 } else from += retlen;
309 if (UNLIKELY(needs_swap))
310 *(U8 *)--buf = (U8)val;
312 *(U8 *)buf++ = (U8)val;
314 /* We have enough characters for the buffer. Did we have problems ? */
317 /* Rewalk the string fragment while warning */
319 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
320 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
321 if (ptr >= end) break;
322 utf8n_to_uvchr((U8 *) ptr, end-ptr, &retlen, flags);
324 if (from > end) from = end;
327 Perl_ck_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
328 WARN_PACK : WARN_UNPACK),
329 "Character(s) in '%c' format wrapped in %s",
330 (int) TYPE_NO_MODIFIERS(datumtype),
331 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
338 next_uni_uu(pTHX_ const char **s, const char *end, I32 *out)
342 const UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
343 if (val >= 0x100 || !ISUUCHAR(val) ||
344 retlen == (STRLEN) -1 || retlen == 0) {
348 *out = PL_uudmap[val] & 077;
354 S_bytes_to_uni(const U8 *start, STRLEN len, char *dest, const bool needs_swap) {
355 PERL_ARGS_ASSERT_BYTES_TO_UNI;
357 if (UNLIKELY(needs_swap)) {
358 const U8 *p = start + len;
359 while (p-- > start) {
360 append_utf8_from_native_byte(*p, (U8 **) & dest);
363 const U8 * const end = start + len;
364 while (start < end) {
365 append_utf8_from_native_byte(*start, (U8 **) & dest);
372 #define PUSH_BYTES(utf8, cur, buf, len, needs_swap) \
374 if (UNLIKELY(utf8)) \
375 (cur) = S_bytes_to_uni((U8 *) buf, len, (cur), needs_swap); \
377 if (UNLIKELY(needs_swap)) \
378 S_reverse_copy((char *)(buf), cur, len); \
380 Copy(buf, cur, len, char); \
385 #define GROWING(utf8, cat, start, cur, in_len) \
387 STRLEN glen = (in_len); \
388 if (utf8) glen *= UTF8_EXPAND; \
389 if ((cur) + glen >= (start) + SvLEN(cat)) { \
390 (start) = sv_exp_grow(cat, glen); \
391 (cur) = (start) + SvCUR(cat); \
395 #define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
397 const STRLEN glen = (in_len); \
399 if (utf8) gl *= UTF8_EXPAND; \
400 if ((cur) + gl >= (start) + SvLEN(cat)) { \
402 SvCUR_set((cat), (cur) - (start)); \
403 (start) = sv_exp_grow(cat, gl); \
404 (cur) = (start) + SvCUR(cat); \
406 PUSH_BYTES(utf8, cur, buf, glen, 0); \
409 #define PUSH_BYTE(utf8, s, byte) \
412 const U8 au8 = (byte); \
413 (s) = S_bytes_to_uni(&au8, 1, (s), 0); \
414 } else *(U8 *)(s)++ = (byte); \
417 /* Only to be used inside a loop (see the break) */
418 #define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
421 if (str >= end) break; \
422 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
423 if (retlen == (STRLEN) -1 || retlen == 0) { \
425 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
430 static const char *_action( const tempsym_t* symptr )
432 return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
435 /* Returns the sizeof() struct described by pat */
437 S_measure_struct(pTHX_ tempsym_t* symptr)
441 PERL_ARGS_ASSERT_MEASURE_STRUCT;
443 while (next_symbol(symptr)) {
447 switch (symptr->howlen) {
449 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
453 /* e_no_len and e_number */
454 len = symptr->length;
458 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
461 /* endianness doesn't influence the size of a type */
462 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
464 Perl_croak(aTHX_ "Invalid type '%c' in %s",
465 (int)TYPE_NO_MODIFIERS(symptr->code),
467 case '.' | TYPE_IS_SHRIEKING:
468 case '@' | TYPE_IS_SHRIEKING:
472 case 'U': /* XXXX Is it correct? */
475 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
476 (int) TYPE_NO_MODIFIERS(symptr->code),
483 tempsym_t savsym = *symptr;
484 symptr->patptr = savsym.grpbeg;
485 symptr->patend = savsym.grpend;
486 /* XXXX Theoretically, we need to measure many times at
487 different positions, since the subexpression may contain
488 alignment commands, but be not of aligned length.
489 Need to detect this and croak(). */
490 size = measure_struct(symptr);
494 case 'X' | TYPE_IS_SHRIEKING:
495 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
497 if (!len) /* Avoid division by 0 */
499 len = total % len; /* Assumed: the start is aligned. */
504 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
506 case 'x' | TYPE_IS_SHRIEKING:
507 if (!len) /* Avoid division by 0 */
509 star = total % len; /* Assumed: the start is aligned. */
510 if (star) /* Other portable ways? */
534 size = sizeof(char*);
544 /* locate matching closing parenthesis or bracket
545 * returns char pointer to char after match, or NULL
548 S_group_end(pTHX_ const char *patptr, const char *patend, char ender)
550 PERL_ARGS_ASSERT_GROUP_END;
552 while (patptr < patend) {
553 const char c = *patptr++;
560 while (patptr < patend && *patptr != '\n')
564 patptr = group_end(patptr, patend, ')') + 1;
566 patptr = group_end(patptr, patend, ']') + 1;
568 Perl_croak(aTHX_ "No group ending character '%c' found in template",
570 NOT_REACHED; /* NOTREACHED */
574 /* Convert unsigned decimal number to binary.
575 * Expects a pointer to the first digit and address of length variable
576 * Advances char pointer to 1st non-digit char and returns number
579 S_get_num(pTHX_ const char *patptr, I32 *lenptr )
581 I32 len = *patptr++ - '0';
583 PERL_ARGS_ASSERT_GET_NUM;
585 while (isDIGIT(*patptr)) {
586 if (len >= 0x7FFFFFFF/10)
587 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
588 len = (len * 10) + (*patptr++ - '0');
594 /* The marvellous template parsing routine: Using state stored in *symptr,
595 * locates next template code and count
598 S_next_symbol(pTHX_ tempsym_t* symptr )
600 const char* patptr = symptr->patptr;
601 const char* const patend = symptr->patend;
603 PERL_ARGS_ASSERT_NEXT_SYMBOL;
605 symptr->flags &= ~FLAG_SLASH;
607 while (patptr < patend) {
608 if (isSPACE(*patptr))
610 else if (*patptr == '#') {
612 while (patptr < patend && *patptr != '\n')
617 /* We should have found a template code */
618 I32 code = *patptr++ & 0xFF;
619 U32 inherited_modifiers = 0;
621 if (code == ','){ /* grandfather in commas but with a warning */
622 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
623 symptr->flags |= FLAG_COMMA;
624 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
625 "Invalid type ',' in %s", _action( symptr ) );
630 /* for '(', skip to ')' */
632 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
633 Perl_croak(aTHX_ "()-group starts with a count in %s",
635 symptr->grpbeg = patptr;
636 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
637 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
638 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
642 /* look for group modifiers to inherit */
643 if (TYPE_ENDIANNESS(symptr->flags)) {
644 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
645 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
648 /* look for modifiers */
649 while (patptr < patend) {
654 modifier = TYPE_IS_SHRIEKING;
655 allowed = "sSiIlLxXnNvV@.";
658 modifier = TYPE_IS_BIG_ENDIAN;
659 allowed = ENDIANNESS_ALLOWED_TYPES;
662 modifier = TYPE_IS_LITTLE_ENDIAN;
663 allowed = ENDIANNESS_ALLOWED_TYPES;
674 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
675 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
676 allowed, _action( symptr ) );
678 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
679 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
680 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
681 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
682 TYPE_ENDIANNESS_MASK)
683 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
684 *patptr, _action( symptr ) );
686 if ((code & modifier)) {
687 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
688 "Duplicate modifier '%c' after '%c' in %s",
689 *patptr, (int) TYPE_NO_MODIFIERS(code),
697 /* inherit modifiers */
698 code |= inherited_modifiers;
700 /* look for count and/or / */
701 if (patptr < patend) {
702 if (isDIGIT(*patptr)) {
703 patptr = get_num( patptr, &symptr->length );
704 symptr->howlen = e_number;
706 } else if (*patptr == '*') {
708 symptr->howlen = e_star;
710 } else if (*patptr == '[') {
711 const char* lenptr = ++patptr;
712 symptr->howlen = e_number;
713 patptr = group_end( patptr, patend, ']' ) + 1;
714 /* what kind of [] is it? */
715 if (isDIGIT(*lenptr)) {
716 lenptr = get_num( lenptr, &symptr->length );
718 Perl_croak(aTHX_ "Malformed integer in [] in %s",
721 tempsym_t savsym = *symptr;
722 symptr->patend = patptr-1;
723 symptr->patptr = lenptr;
724 savsym.length = measure_struct(symptr);
728 symptr->howlen = e_no_len;
733 while (patptr < patend) {
734 if (isSPACE(*patptr))
736 else if (*patptr == '#') {
738 while (patptr < patend && *patptr != '\n')
743 if (*patptr == '/') {
744 symptr->flags |= FLAG_SLASH;
746 if (patptr < patend &&
747 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
748 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
755 /* at end - no count, no / */
756 symptr->howlen = e_no_len;
761 symptr->patptr = patptr;
765 symptr->patptr = patptr;
770 There is no way to cleanly handle the case where we should process the
771 string per byte in its upgraded form while it's really in downgraded form
772 (e.g. estimates like strend-s as an upper bound for the number of
773 characters left wouldn't work). So if we foresee the need of this
774 (pattern starts with U or contains U0), we want to work on the encoded
775 version of the string. Users are advised to upgrade their pack string
776 themselves if they need to do a lot of unpacks like this on it
779 need_utf8(const char *pat, const char *patend)
783 PERL_ARGS_ASSERT_NEED_UTF8;
785 while (pat < patend) {
788 pat = (const char *) memchr(pat, '\n', patend-pat);
789 if (!pat) return FALSE;
790 } else if (pat[0] == 'U') {
791 if (first || pat[1] == '0') return TRUE;
792 } else first = FALSE;
799 first_symbol(const char *pat, const char *patend) {
800 PERL_ARGS_ASSERT_FIRST_SYMBOL;
802 while (pat < patend) {
803 if (pat[0] != '#') return pat[0];
805 pat = (const char *) memchr(pat, '\n', patend-pat);
814 =head1 Pack and Unpack
816 =for apidoc unpackstring
818 The engine implementing the unpack() Perl function.
820 Using the template pat..patend, this function unpacks the string
821 s..strend into a number of mortal SVs, which it pushes onto the perl
822 argument (@_) stack (so you will need to issue a C<PUTBACK> before and
823 C<SPAGAIN> after the call to this function). It returns the number of
826 The strend and patend pointers should point to the byte following the last
827 character of each string.
829 Although this function returns its values on the perl argument stack, it
830 doesn't take any parameters from that stack (and thus in particular
831 there's no need to do a PUSHMARK before calling it, unlike L</call_pv> for
837 Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
841 PERL_ARGS_ASSERT_UNPACKSTRING;
843 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
844 else if (need_utf8(pat, patend)) {
845 /* We probably should try to avoid this in case a scalar context call
846 wouldn't get to the "U0" */
847 STRLEN len = strend - s;
848 s = (char *) bytes_to_utf8((U8 *) s, &len);
851 flags |= FLAG_DO_UTF8;
854 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
855 flags |= FLAG_PARSE_UTF8;
857 TEMPSYM_INIT(&sym, pat, patend, flags);
859 return unpack_rec(&sym, s, s, strend, NULL );
863 S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
867 const I32 start_sp_offset = SP - PL_stack_base;
872 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
874 bool explicit_length;
875 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
876 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
878 PERL_ARGS_ASSERT_UNPACK_REC;
880 symptr->strbeg = s - strbeg;
882 while (next_symbol(symptr)) {
885 I32 datumtype = symptr->code;
887 /* do first one only unless in list context
888 / is implemented by unpacking the count, then popping it from the
889 stack, so must check that we're not in the middle of a / */
891 && (SP - PL_stack_base == start_sp_offset + 1)
892 && (datumtype != '/') ) /* XXX can this be omitted */
895 switch (howlen = symptr->howlen) {
897 len = strend - strbeg; /* long enough */
900 /* e_no_len and e_number */
901 len = symptr->length;
905 explicit_length = TRUE;
907 beyond = s >= strend;
909 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
911 /* props nonzero means we can process this letter. */
912 const long size = props & PACK_SIZE_MASK;
913 const long howmany = (strend - s) / size;
917 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
918 if (len && unpack_only_one) len = 1;
924 needs_swap = NEEDS_SWAP(datumtype);
926 switch(TYPE_NO_ENDIANNESS(datumtype)) {
928 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
931 if (howlen == e_no_len)
932 len = 16; /* len is not specified */
940 tempsym_t savsym = *symptr;
941 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
942 symptr->flags |= group_modifiers;
943 symptr->patend = savsym.grpend;
944 symptr->previous = &savsym;
947 if (len && unpack_only_one) len = 1;
949 symptr->patptr = savsym.grpbeg;
950 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
951 else symptr->flags &= ~FLAG_PARSE_UTF8;
952 unpack_rec(symptr, s, strbeg, strend, &s);
953 if (s == strend && savsym.howlen == e_star)
954 break; /* No way to continue */
957 savsym.flags = symptr->flags & ~group_modifiers;
961 case '.' | TYPE_IS_SHRIEKING:
965 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
966 if (howlen == e_star) from = strbeg;
967 else if (len <= 0) from = s;
969 tempsym_t *group = symptr;
971 while (--len && group) group = group->previous;
972 from = group ? strbeg + group->strbeg : strbeg;
975 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
976 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
980 case '@' | TYPE_IS_SHRIEKING:
982 s = strbeg + symptr->strbeg;
983 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
987 Perl_croak(aTHX_ "'@' outside of string in unpack");
992 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
995 Perl_croak(aTHX_ "'@' outside of string in unpack");
999 case 'X' | TYPE_IS_SHRIEKING:
1000 if (!len) /* Avoid division by 0 */
1003 const char *hop, *last;
1005 hop = last = strbeg;
1007 hop += UTF8SKIP(hop);
1014 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1018 len = (s - strbeg) % len;
1024 Perl_croak(aTHX_ "'X' outside of string in unpack");
1025 while (--s, UTF8_IS_CONTINUATION(*s)) {
1027 Perl_croak(aTHX_ "'X' outside of string in unpack");
1032 if (len > s - strbeg)
1033 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1037 case 'x' | TYPE_IS_SHRIEKING: {
1039 if (!len) /* Avoid division by 0 */
1041 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1042 else ai32 = (s - strbeg) % len;
1043 if (ai32 == 0) break;
1051 Perl_croak(aTHX_ "'x' outside of string in unpack");
1056 if (len > strend - s)
1057 Perl_croak(aTHX_ "'x' outside of string in unpack");
1062 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1068 /* Preliminary length estimate is assumed done in 'W' */
1069 if (len > strend - s) len = strend - s;
1075 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1076 if (hop >= strend) {
1078 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1083 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1085 } else if (len > strend - s)
1088 if (datumtype == 'Z') {
1089 /* 'Z' strips stuff after first null */
1090 const char *ptr, *end;
1092 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
1093 sv = newSVpvn(s, ptr-s);
1094 if (howlen == e_star) /* exact for 'Z*' */
1095 len = ptr-s + (ptr != strend ? 1 : 0);
1096 } else if (datumtype == 'A') {
1097 /* 'A' strips both nulls and spaces */
1099 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1100 for (ptr = s+len-1; ptr >= s; ptr--)
1101 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
1102 !isSPACE_utf8(ptr)) break;
1103 if (ptr >= s) ptr += UTF8SKIP(ptr);
1106 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1108 for (ptr = s+len-1; ptr >= s; ptr--)
1109 if (*ptr != 0 && !isSPACE(*ptr)) break;
1112 sv = newSVpvn(s, ptr-s);
1113 } else sv = newSVpvn(s, len);
1117 /* Undo any upgrade done due to need_utf8() */
1118 if (!(symptr->flags & FLAG_WAS_UTF8))
1119 sv_utf8_downgrade(sv, 0);
1127 if (howlen == e_star || len > (strend - s) * 8)
1128 len = (strend - s) * 8;
1131 while (len >= 8 && s < strend) {
1132 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
1137 cuv += PL_bitcount[*(U8 *)s++];
1140 if (len && s < strend) {
1142 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1143 if (datumtype == 'b')
1145 if (bits & 1) cuv++;
1150 if (bits & 0x80) cuv++;
1157 sv = sv_2mortal(newSV(len ? len : 1));
1160 if (datumtype == 'b') {
1162 const I32 ai32 = len;
1163 for (len = 0; len < ai32; len++) {
1164 if (len & 7) bits >>= 1;
1166 if (s >= strend) break;
1167 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1168 } else bits = *(U8 *) s++;
1169 *str++ = bits & 1 ? '1' : '0';
1173 const I32 ai32 = len;
1174 for (len = 0; len < ai32; len++) {
1175 if (len & 7) bits <<= 1;
1177 if (s >= strend) break;
1178 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1179 } else bits = *(U8 *) s++;
1180 *str++ = bits & 0x80 ? '1' : '0';
1184 SvCUR_set(sv, str - SvPVX_const(sv));
1191 /* Preliminary length estimate, acceptable for utf8 too */
1192 if (howlen == e_star || len > (strend - s) * 2)
1193 len = (strend - s) * 2;
1195 sv = sv_2mortal(newSV(len ? len : 1));
1199 if (datumtype == 'h') {
1202 for (len = 0; len < ai32; len++) {
1203 if (len & 1) bits >>= 4;
1205 if (s >= strend) break;
1206 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1207 } else bits = * (U8 *) s++;
1209 *str++ = PL_hexdigit[bits & 15];
1213 const I32 ai32 = len;
1214 for (len = 0; len < ai32; len++) {
1215 if (len & 1) bits <<= 4;
1217 if (s >= strend) break;
1218 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1219 } else bits = *(U8 *) s++;
1221 *str++ = PL_hexdigit[(bits >> 4) & 15];
1226 SvCUR_set(sv, str - SvPVX_const(sv));
1233 if (explicit_length)
1234 /* Switch to "character" mode */
1235 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1240 while (len-- > 0 && s < strend) {
1245 aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1246 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1247 if (retlen == (STRLEN) -1 || retlen == 0)
1248 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1252 aint = *(U8 *)(s)++;
1253 if (aint >= 128 && datumtype != 'C') /* fake up signed chars */
1257 else if (checksum > bits_in_uv)
1258 cdouble += (NV)aint;
1266 while (len-- > 0 && s < strend) {
1268 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1269 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1270 if (retlen == (STRLEN) -1 || retlen == 0)
1271 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1275 else if (checksum > bits_in_uv)
1276 cdouble += (NV) val;
1280 } else if (!checksum)
1282 const U8 ch = *(U8 *) s++;
1285 else if (checksum > bits_in_uv)
1286 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1288 while (len-- > 0) cuv += *(U8 *) s++;
1292 if (explicit_length && howlen != e_star) {
1293 /* Switch to "bytes in UTF-8" mode */
1294 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
1296 /* Should be impossible due to the need_utf8() test */
1297 Perl_croak(aTHX_ "U0 mode on a byte string");
1301 if (len > strend - s) len = strend - s;
1303 if (len && unpack_only_one) len = 1;
1307 while (len-- > 0 && s < strend) {
1311 U8 result[UTF8_MAXLEN];
1312 const char *ptr = s;
1314 /* Bug: warns about bad utf8 even if we are short on bytes
1315 and will break out of the loop */
1316 if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1319 len = UTF8SKIP(result);
1320 if (!uni_to_bytes(aTHX_ &ptr, strend,
1321 (char *) &result[1], len-1, 'U')) break;
1322 auv = utf8n_to_uvchr(result, len, &retlen, UTF8_ALLOW_DEFAULT);
1325 auv = utf8n_to_uvchr((U8*)s, strend - s, &retlen, UTF8_ALLOW_DEFAULT);
1326 if (retlen == (STRLEN) -1 || retlen == 0)
1327 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1332 else if (checksum > bits_in_uv)
1333 cdouble += (NV) auv;
1338 case 's' | TYPE_IS_SHRIEKING:
1339 #if SHORTSIZE != SIZE16
1342 SHIFT_VAR(utf8, s, strend, ashort, datumtype, needs_swap);
1345 else if (checksum > bits_in_uv)
1346 cdouble += (NV)ashort;
1358 #if U16SIZE > SIZE16
1361 SHIFT16(utf8, s, strend, &ai16, datumtype, needs_swap);
1362 #if U16SIZE > SIZE16
1368 else if (checksum > bits_in_uv)
1369 cdouble += (NV)ai16;
1374 case 'S' | TYPE_IS_SHRIEKING:
1375 #if SHORTSIZE != SIZE16
1377 unsigned short aushort;
1378 SHIFT_VAR(utf8, s, strend, aushort, datumtype, needs_swap,
1382 else if (checksum > bits_in_uv)
1383 cdouble += (NV)aushort;
1396 #if U16SIZE > SIZE16
1399 SHIFT16(utf8, s, strend, &au16, datumtype, needs_swap);
1400 if (datumtype == 'n')
1401 au16 = PerlSock_ntohs(au16);
1402 if (datumtype == 'v')
1406 else if (checksum > bits_in_uv)
1407 cdouble += (NV) au16;
1412 case 'v' | TYPE_IS_SHRIEKING:
1413 case 'n' | TYPE_IS_SHRIEKING:
1416 # if U16SIZE > SIZE16
1419 SHIFT16(utf8, s, strend, &ai16, datumtype, needs_swap);
1420 /* There should never be any byte-swapping here. */
1421 assert(!TYPE_ENDIANNESS(datumtype));
1422 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
1423 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1424 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
1425 ai16 = (I16) vtohs((U16) ai16);
1428 else if (checksum > bits_in_uv)
1429 cdouble += (NV) ai16;
1435 case 'i' | TYPE_IS_SHRIEKING:
1438 SHIFT_VAR(utf8, s, strend, aint, datumtype, needs_swap);
1441 else if (checksum > bits_in_uv)
1442 cdouble += (NV)aint;
1448 case 'I' | TYPE_IS_SHRIEKING:
1451 SHIFT_VAR(utf8, s, strend, auint, datumtype, needs_swap);
1454 else if (checksum > bits_in_uv)
1455 cdouble += (NV)auint;
1463 SHIFT_VAR(utf8, s, strend, aiv, datumtype, needs_swap);
1466 else if (checksum > bits_in_uv)
1475 SHIFT_VAR(utf8, s, strend, auv, datumtype, needs_swap);
1478 else if (checksum > bits_in_uv)
1484 case 'l' | TYPE_IS_SHRIEKING:
1485 #if LONGSIZE != SIZE32
1488 SHIFT_VAR(utf8, s, strend, along, datumtype, needs_swap);
1491 else if (checksum > bits_in_uv)
1492 cdouble += (NV)along;
1503 #if U32SIZE > SIZE32
1506 SHIFT32(utf8, s, strend, &ai32, datumtype, needs_swap);
1507 #if U32SIZE > SIZE32
1508 if (ai32 > 2147483647) ai32 -= 4294967296;
1512 else if (checksum > bits_in_uv)
1513 cdouble += (NV)ai32;
1518 case 'L' | TYPE_IS_SHRIEKING:
1519 #if LONGSIZE != SIZE32
1521 unsigned long aulong;
1522 SHIFT_VAR(utf8, s, strend, aulong, datumtype, needs_swap);
1525 else if (checksum > bits_in_uv)
1526 cdouble += (NV)aulong;
1539 #if U32SIZE > SIZE32
1542 SHIFT32(utf8, s, strend, &au32, datumtype, needs_swap);
1543 if (datumtype == 'N')
1544 au32 = PerlSock_ntohl(au32);
1545 if (datumtype == 'V')
1549 else if (checksum > bits_in_uv)
1550 cdouble += (NV)au32;
1555 case 'V' | TYPE_IS_SHRIEKING:
1556 case 'N' | TYPE_IS_SHRIEKING:
1559 #if U32SIZE > SIZE32
1562 SHIFT32(utf8, s, strend, &ai32, datumtype, needs_swap);
1563 /* There should never be any byte swapping here. */
1564 assert(!TYPE_ENDIANNESS(datumtype));
1565 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1566 ai32 = (I32)PerlSock_ntohl((U32)ai32);
1567 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1568 ai32 = (I32)vtohl((U32)ai32);
1571 else if (checksum > bits_in_uv)
1572 cdouble += (NV)ai32;
1580 SHIFT_VAR(utf8, s, strend, aptr, datumtype, needs_swap);
1581 /* newSVpv generates undef if aptr is NULL */
1582 mPUSHs(newSVpv(aptr, 0));
1590 while (len > 0 && s < strend) {
1592 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1593 auv = (auv << 7) | (ch & 0x7f);
1594 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
1602 if (++bytes >= sizeof(UV)) { /* promote to string */
1605 sv = Perl_newSVpvf(aTHX_ "%.*"UVuf, (int)TYPE_DIGITS(UV), auv);
1606 while (s < strend) {
1607 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1608 sv = mul128(sv, (U8)(ch & 0x7f));
1614 t = SvPV_nolen_const(sv);
1623 if ((s >= strend) && bytes)
1624 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
1628 if (symptr->howlen == e_star)
1629 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
1631 if (s + sizeof(char*) <= strend) {
1633 SHIFT_VAR(utf8, s, strend, aptr, datumtype, needs_swap);
1634 /* newSVpvn generates undef if aptr is NULL */
1635 PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP));
1638 #if defined(HAS_QUAD) && IVSIZE >= 8
1642 SHIFT_VAR(utf8, s, strend, aquad, datumtype, needs_swap);
1644 mPUSHs(newSViv((IV)aquad));
1645 else if (checksum > bits_in_uv)
1646 cdouble += (NV)aquad;
1654 SHIFT_VAR(utf8, s, strend, auquad, datumtype, needs_swap);
1656 mPUSHs(newSVuv((UV)auquad));
1657 else if (checksum > bits_in_uv)
1658 cdouble += (NV)auquad;
1664 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
1668 SHIFT_VAR(utf8, s, strend, afloat, datumtype, needs_swap);
1678 SHIFT_VAR(utf8, s, strend, adouble, datumtype, needs_swap);
1688 SHIFT_BYTES(utf8, s, strend, anv.bytes, sizeof(anv.bytes),
1689 datumtype, needs_swap);
1696 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
1700 SHIFT_BYTES(utf8, s, strend, aldouble.bytes,
1701 sizeof(aldouble.bytes), datumtype, needs_swap);
1703 mPUSHn(aldouble.ld);
1705 cdouble += aldouble.ld;
1711 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
1712 sv = sv_2mortal(newSV(l));
1713 if (l) SvPOK_on(sv);
1716 while (next_uni_uu(aTHX_ &s, strend, &len)) {
1721 next_uni_uu(aTHX_ &s, strend, &a);
1722 next_uni_uu(aTHX_ &s, strend, &b);
1723 next_uni_uu(aTHX_ &s, strend, &c);
1724 next_uni_uu(aTHX_ &s, strend, &d);
1725 hunk[0] = (char)((a << 2) | (b >> 4));
1726 hunk[1] = (char)((b << 4) | (c >> 2));
1727 hunk[2] = (char)((c << 6) | d);
1729 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
1737 /* possible checksum byte */
1738 const char *skip = s+UTF8SKIP(s);
1739 if (skip < strend && *skip == '\n')
1745 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
1749 len = PL_uudmap[*(U8*)s++] & 077;
1751 if (s < strend && ISUUCHAR(*s))
1752 a = PL_uudmap[*(U8*)s++] & 077;
1755 if (s < strend && ISUUCHAR(*s))
1756 b = PL_uudmap[*(U8*)s++] & 077;
1759 if (s < strend && ISUUCHAR(*s))
1760 c = PL_uudmap[*(U8*)s++] & 077;
1763 if (s < strend && ISUUCHAR(*s))
1764 d = PL_uudmap[*(U8*)s++] & 077;
1767 hunk[0] = (char)((a << 2) | (b >> 4));
1768 hunk[1] = (char)((b << 4) | (c >> 2));
1769 hunk[2] = (char)((c << 6) | d);
1771 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
1776 else /* possible checksum byte */
1777 if (s + 1 < strend && s[1] == '\n')
1787 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
1788 (checksum > bits_in_uv &&
1789 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
1792 anv = (NV) (1 << (checksum & 15));
1793 while (checksum >= 16) {
1797 while (cdouble < 0.0)
1799 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
1800 sv = newSVnv(cdouble);
1803 if (checksum < bits_in_uv) {
1804 UV mask = ((UV)1 << checksum) - 1;
1813 if (symptr->flags & FLAG_SLASH){
1814 if (SP - PL_stack_base - start_sp_offset <= 0)
1816 if( next_symbol(symptr) ){
1817 if( symptr->howlen == e_number )
1818 Perl_croak(aTHX_ "Count after length/code in unpack" );
1820 /* ...end of char buffer then no decent length available */
1821 Perl_croak(aTHX_ "length/code after end of string in unpack" );
1823 /* take top of stack (hope it's numeric) */
1826 Perl_croak(aTHX_ "Negative '/' count in unpack" );
1829 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
1831 datumtype = symptr->code;
1832 explicit_length = FALSE;
1840 return SP - PL_stack_base - start_sp_offset;
1848 I32 gimme = GIMME_V;
1851 const char *pat = SvPV_const(left, llen);
1852 const char *s = SvPV_const(right, rlen);
1853 const char *strend = s + rlen;
1854 const char *patend = pat + llen;
1858 cnt = unpackstring(pat, patend, s, strend,
1859 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
1860 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
1863 if ( !cnt && gimme == G_SCALAR )
1864 PUSHs(&PL_sv_undef);
1869 doencodes(U8 *h, const char *s, I32 len)
1871 *h++ = PL_uuemap[len];
1873 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1874 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
1875 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
1876 *h++ = PL_uuemap[(077 & (s[2] & 077))];
1881 const char r = (len > 1 ? s[1] : '\0');
1882 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1883 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
1884 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
1885 *h++ = PL_uuemap[0];
1892 S_is_an_int(pTHX_ const char *s, STRLEN l)
1894 SV *result = newSVpvn(s, l);
1895 char *const result_c = SvPV_nolen(result); /* convenience */
1896 char *out = result_c;
1900 PERL_ARGS_ASSERT_IS_AN_INT;
1908 SvREFCNT_dec(result);
1931 SvREFCNT_dec(result);
1937 SvCUR_set(result, out - result_c);
1941 /* pnum must be '\0' terminated */
1943 S_div128(pTHX_ SV *pnum, bool *done)
1946 char * const s = SvPV(pnum, len);
1950 PERL_ARGS_ASSERT_DIV128;
1954 const int i = m * 10 + (*t - '0');
1955 const int r = (i >> 7); /* r < 10 */
1963 SvCUR_set(pnum, (STRLEN) (t - s));
1968 =for apidoc packlist
1970 The engine implementing pack() Perl function.
1976 Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, SV **beglist, SV **endlist )
1981 PERL_ARGS_ASSERT_PACKLIST;
1983 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
1985 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
1986 Also make sure any UTF8 flag is loaded */
1987 SvPV_force_nolen(cat);
1989 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
1991 (void)pack_rec( cat, &sym, beglist, endlist );
1994 /* like sv_utf8_upgrade, but also repoint the group start markers */
1996 marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
1999 const char *from_ptr, *from_start, *from_end, **marks, **m;
2000 char *to_start, *to_ptr;
2002 if (SvUTF8(sv)) return;
2004 from_start = SvPVX_const(sv);
2005 from_end = from_start + SvCUR(sv);
2006 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2007 if (!NATIVE_BYTE_IS_INVARIANT(*from_ptr)) break;
2008 if (from_ptr == from_end) {
2009 /* Simple case: no character needs to be changed */
2014 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
2015 Newx(to_start, len, char);
2016 Copy(from_start, to_start, from_ptr-from_start, char);
2017 to_ptr = to_start + (from_ptr-from_start);
2019 Newx(marks, sym_ptr->level+2, const char *);
2020 for (group=sym_ptr; group; group = group->previous)
2021 marks[group->level] = from_start + group->strbeg;
2022 marks[sym_ptr->level+1] = from_end+1;
2023 for (m = marks; *m < from_ptr; m++)
2024 *m = to_start + (*m-from_start);
2026 for (;from_ptr < from_end; from_ptr++) {
2027 while (*m == from_ptr) *m++ = to_ptr;
2028 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
2032 while (*m == from_ptr) *m++ = to_ptr;
2033 if (m != marks + sym_ptr->level+1) {
2036 Perl_croak(aTHX_ "panic: marks beyond string end, m=%p, marks=%p, "
2037 "level=%d", m, marks, sym_ptr->level);
2039 for (group=sym_ptr; group; group = group->previous)
2040 group->strbeg = marks[group->level] - to_start;
2045 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
2046 from_start -= SvIVX(sv);
2049 SvFLAGS(sv) &= ~SVf_OOK;
2052 Safefree(from_start);
2053 SvPV_set(sv, to_start);
2054 SvCUR_set(sv, to_ptr - to_start);
2059 /* Exponential string grower. Makes string extension effectively O(n)
2060 needed says how many extra bytes we need (not counting the final '\0')
2061 Only grows the string if there is an actual lack of space
2064 S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
2065 const STRLEN cur = SvCUR(sv);
2066 const STRLEN len = SvLEN(sv);
2069 PERL_ARGS_ASSERT_SV_EXP_GROW;
2071 if (len - cur > needed) return SvPVX(sv);
2072 extend = needed > len ? needed : len;
2073 return SvGROW(sv, len+extend+1);
2078 S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
2081 tempsym_t lookahead;
2082 I32 items = endlist - beglist;
2083 bool found = next_symbol(symptr);
2084 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
2085 bool warn_utf8 = ckWARN(WARN_UTF8);
2088 PERL_ARGS_ASSERT_PACK_REC;
2090 if (symptr->level == 0 && found && symptr->code == 'U') {
2091 marked_upgrade(aTHX_ cat, symptr);
2092 symptr->flags |= FLAG_DO_UTF8;
2095 symptr->strbeg = SvCUR(cat);
2101 SV *lengthcode = NULL;
2102 I32 datumtype = symptr->code;
2103 howlen_t howlen = symptr->howlen;
2104 char *start = SvPVX(cat);
2105 char *cur = start + SvCUR(cat);
2108 #define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2112 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2116 /* e_no_len and e_number */
2117 len = symptr->length;
2122 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
2124 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2125 /* We can process this letter. */
2126 STRLEN size = props & PACK_SIZE_MASK;
2127 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2131 /* Look ahead for next symbol. Do we have code/code? */
2132 lookahead = *symptr;
2133 found = next_symbol(&lookahead);
2134 if (symptr->flags & FLAG_SLASH) {
2136 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
2137 if (strchr("aAZ", lookahead.code)) {
2138 if (lookahead.howlen == e_number) count = lookahead.length;
2141 count = sv_len_utf8(*beglist);
2144 if (lookahead.code == 'Z') count++;
2147 if (lookahead.howlen == e_number && lookahead.length < items)
2148 count = lookahead.length;
2151 lookahead.howlen = e_number;
2152 lookahead.length = count;
2153 lengthcode = sv_2mortal(newSViv(count));
2156 needs_swap = NEEDS_SWAP(datumtype);
2158 /* Code inside the switch must take care to properly update
2159 cat (CUR length and '\0' termination) if it updated *cur and
2160 doesn't simply leave using break */
2161 switch(TYPE_NO_ENDIANNESS(datumtype)) {
2163 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2164 (int) TYPE_NO_MODIFIERS(datumtype));
2166 Perl_croak(aTHX_ "'%%' may not be used in pack");
2168 case '.' | TYPE_IS_SHRIEKING:
2170 if (howlen == e_star) from = start;
2171 else if (len == 0) from = cur;
2173 tempsym_t *group = symptr;
2175 while (--len && group) group = group->previous;
2176 from = group ? start + group->strbeg : start;
2179 len = SvIV(fromstr);
2181 case '@' | TYPE_IS_SHRIEKING:
2183 from = start + symptr->strbeg;
2185 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
2187 while (len && from < cur) {
2188 from += UTF8SKIP(from);
2192 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2194 /* Here we know from == cur */
2196 GROWING(0, cat, start, cur, len);
2197 Zero(cur, len, char);
2199 } else if (from < cur) {
2202 } else goto no_change;
2210 if (len > 0) goto grow;
2211 if (len == 0) goto no_change;
2218 tempsym_t savsym = *symptr;
2219 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2220 symptr->flags |= group_modifiers;
2221 symptr->patend = savsym.grpend;
2223 symptr->previous = &lookahead;
2226 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2227 else symptr->flags &= ~FLAG_PARSE_UTF8;
2228 was_utf8 = SvUTF8(cat);
2229 symptr->patptr = savsym.grpbeg;
2230 beglist = pack_rec(cat, symptr, beglist, endlist);
2231 if (SvUTF8(cat) != was_utf8)
2232 /* This had better be an upgrade while in utf8==0 mode */
2235 if (savsym.howlen == e_star && beglist == endlist)
2236 break; /* No way to continue */
2238 items = endlist - beglist;
2239 lookahead.flags = symptr->flags & ~group_modifiers;
2242 case 'X' | TYPE_IS_SHRIEKING:
2243 if (!len) /* Avoid division by 0 */
2250 hop += UTF8SKIP(hop);
2257 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2261 len = (cur-start) % len;
2265 if (len < 1) goto no_change;
2269 Perl_croak(aTHX_ "'%c' outside of string in pack",
2270 (int) TYPE_NO_MODIFIERS(datumtype));
2271 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2273 Perl_croak(aTHX_ "'%c' outside of string in pack",
2274 (int) TYPE_NO_MODIFIERS(datumtype));
2280 if (cur - start < len)
2281 Perl_croak(aTHX_ "'%c' outside of string in pack",
2282 (int) TYPE_NO_MODIFIERS(datumtype));
2285 if (cur < start+symptr->strbeg) {
2286 /* Make sure group starts don't point into the void */
2288 const STRLEN length = cur-start;
2289 for (group = symptr;
2290 group && length < group->strbeg;
2291 group = group->previous) group->strbeg = length;
2292 lookahead.strbeg = length;
2295 case 'x' | TYPE_IS_SHRIEKING: {
2297 if (!len) /* Avoid division by 0 */
2299 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
2300 else ai32 = (cur - start) % len;
2301 if (ai32 == 0) goto no_change;
2313 aptr = SvPV_const(fromstr, fromlen);
2314 if (DO_UTF8(fromstr)) {
2315 const char *end, *s;
2317 if (!utf8 && !SvUTF8(cat)) {
2318 marked_upgrade(aTHX_ cat, symptr);
2319 lookahead.flags |= FLAG_DO_UTF8;
2320 lookahead.strbeg = symptr->strbeg;
2323 cur = start + SvCUR(cat);
2325 if (howlen == e_star) {
2326 if (utf8) goto string_copy;
2330 end = aptr + fromlen;
2331 fromlen = datumtype == 'Z' ? len-1 : len;
2332 while ((I32) fromlen > 0 && s < end) {
2337 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2340 if (datumtype == 'Z') len++;
2346 fromlen = len - fromlen;
2347 if (datumtype == 'Z') fromlen--;
2348 if (howlen == e_star) {
2350 if (datumtype == 'Z') len++;
2352 GROWING(0, cat, start, cur, len);
2353 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
2354 datumtype | TYPE_IS_PACK))
2355 Perl_croak(aTHX_ "panic: predicted utf8 length not available, "
2356 "for '%c', aptr=%p end=%p cur=%p, fromlen=%"UVuf,
2357 (int)datumtype, aptr, end, cur, (UV)fromlen);
2361 if (howlen == e_star) {
2363 if (datumtype == 'Z') len++;
2365 if (len <= (I32) fromlen) {
2367 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2369 /* assumes a byte expands to at most UTF8_EXPAND bytes on
2371 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2372 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
2374 while (fromlen > 0) {
2375 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
2381 if (howlen == e_star) {
2383 if (datumtype == 'Z') len++;
2385 if (len <= (I32) fromlen) {
2387 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2389 GROWING(0, cat, start, cur, len);
2390 Copy(aptr, cur, fromlen, char);
2394 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2401 const char *str, *end;
2408 str = SvPV_const(fromstr, fromlen);
2409 end = str + fromlen;
2410 if (DO_UTF8(fromstr)) {
2412 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2414 utf8_source = FALSE;
2415 utf8_flags = 0; /* Unused, but keep compilers happy */
2417 if (howlen == e_star) len = fromlen;
2418 field_len = (len+7)/8;
2419 GROWING(utf8, cat, start, cur, field_len);
2420 if (len > (I32)fromlen) len = fromlen;
2423 if (datumtype == 'B')
2427 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2429 } else bits |= *str++ & 1;
2430 if (l & 7) bits <<= 1;
2432 PUSH_BYTE(utf8, cur, bits);
2437 /* datumtype == 'b' */
2441 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2442 if (val & 1) bits |= 0x80;
2443 } else if (*str++ & 1)
2445 if (l & 7) bits >>= 1;
2447 PUSH_BYTE(utf8, cur, bits);
2453 if (datumtype == 'B')
2454 bits <<= 7 - (l & 7);
2456 bits >>= 7 - (l & 7);
2457 PUSH_BYTE(utf8, cur, bits);
2460 /* Determine how many chars are left in the requested field */
2462 if (howlen == e_star) field_len = 0;
2463 else field_len -= l;
2464 Zero(cur, field_len, char);
2470 const char *str, *end;
2477 str = SvPV_const(fromstr, fromlen);
2478 end = str + fromlen;
2479 if (DO_UTF8(fromstr)) {
2481 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2483 utf8_source = FALSE;
2484 utf8_flags = 0; /* Unused, but keep compilers happy */
2486 if (howlen == e_star) len = fromlen;
2487 field_len = (len+1)/2;
2488 GROWING(utf8, cat, start, cur, field_len);
2489 if (!utf8 && len > (I32)fromlen) len = fromlen;
2492 if (datumtype == 'H')
2496 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2497 if (val < 256 && isALPHA(val))
2498 bits |= (val + 9) & 0xf;
2501 } else if (isALPHA(*str))
2502 bits |= (*str++ + 9) & 0xf;
2504 bits |= *str++ & 0xf;
2505 if (l & 1) bits <<= 4;
2507 PUSH_BYTE(utf8, cur, bits);
2515 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2516 if (val < 256 && isALPHA(val))
2517 bits |= ((val + 9) & 0xf) << 4;
2519 bits |= (val & 0xf) << 4;
2520 } else if (isALPHA(*str))
2521 bits |= ((*str++ + 9) & 0xf) << 4;
2523 bits |= (*str++ & 0xf) << 4;
2524 if (l & 1) bits >>= 4;
2526 PUSH_BYTE(utf8, cur, bits);
2532 PUSH_BYTE(utf8, cur, bits);
2535 /* Determine how many chars are left in the requested field */
2537 if (howlen == e_star) field_len = 0;
2538 else field_len -= l;
2539 Zero(cur, field_len, char);
2547 aiv = SvIV(fromstr);
2548 if ((-128 > aiv || aiv > 127))
2549 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2550 "Character in 'c' format wrapped in pack");
2551 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
2556 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2562 aiv = SvIV(fromstr);
2563 if ((0 > aiv || aiv > 0xff))
2564 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2565 "Character in 'C' format wrapped in pack");
2566 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
2571 U8 in_bytes = (U8)IN_BYTES;
2573 end = start+SvLEN(cat)-1;
2574 if (utf8) end -= UTF8_MAXLEN-1;
2578 auv = SvUV(fromstr);
2579 if (in_bytes) auv = auv % 0x100;
2584 SvCUR_set(cat, cur - start);
2586 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2587 end = start+SvLEN(cat)-UTF8_MAXLEN;
2589 cur = (char *) uvchr_to_utf8_flags((U8 *) cur,
2592 0 : UNICODE_ALLOW_ANY);
2597 SvCUR_set(cat, cur - start);
2598 marked_upgrade(aTHX_ cat, symptr);
2599 lookahead.flags |= FLAG_DO_UTF8;
2600 lookahead.strbeg = symptr->strbeg;
2603 cur = start + SvCUR(cat);
2604 end = start+SvLEN(cat)-UTF8_MAXLEN;
2607 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2608 "Character in 'W' format wrapped in pack");
2613 SvCUR_set(cat, cur - start);
2614 GROWING(0, cat, start, cur, len+1);
2615 end = start+SvLEN(cat)-1;
2617 *(U8 *) cur++ = (U8)auv;
2626 if (!(symptr->flags & FLAG_DO_UTF8)) {
2627 marked_upgrade(aTHX_ cat, symptr);
2628 lookahead.flags |= FLAG_DO_UTF8;
2629 lookahead.strbeg = symptr->strbeg;
2635 end = start+SvLEN(cat);
2636 if (!utf8) end -= UTF8_MAXLEN;
2640 auv = SvUV(fromstr);
2642 U8 buffer[UTF8_MAXLEN], *endb;
2643 endb = uvchr_to_utf8_flags(buffer, auv,
2645 0 : UNICODE_ALLOW_ANY);
2646 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
2648 SvCUR_set(cat, cur - start);
2649 GROWING(0, cat, start, cur,
2650 len+(endb-buffer)*UTF8_EXPAND);
2651 end = start+SvLEN(cat);
2653 cur = S_bytes_to_uni(buffer, endb-buffer, cur, 0);
2657 SvCUR_set(cat, cur - start);
2658 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2659 end = start+SvLEN(cat)-UTF8_MAXLEN;
2661 cur = (char *) uvchr_to_utf8_flags((U8 *) cur, auv,
2663 0 : UNICODE_ALLOW_ANY);
2668 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
2674 anv = SvNV(fromstr);
2675 # if defined(VMS) && !defined(_IEEE_FP)
2676 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
2677 * on Alpha; fake it if we don't have them.
2681 else if (anv < -FLT_MAX)
2683 else afloat = (float)anv;
2685 afloat = (float)anv;
2687 PUSH_VAR(utf8, cur, afloat, needs_swap);
2695 anv = SvNV(fromstr);
2696 # if defined(VMS) && !defined(_IEEE_FP)
2697 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
2698 * on Alpha; fake it if we don't have them.
2702 else if (anv < -DBL_MAX)
2704 else adouble = (double)anv;
2706 adouble = (double)anv;
2708 PUSH_VAR(utf8, cur, adouble, needs_swap);
2713 Zero(&anv, 1, NV); /* can be long double with unused bits */
2717 /* to work round a gcc/x86 bug; don't use SvNV */
2718 anv.nv = sv_2nv(fromstr);
2720 anv.nv = SvNV(fromstr);
2722 PUSH_BYTES(utf8, cur, anv.bytes, sizeof(anv.bytes), needs_swap);
2726 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2729 /* long doubles can have unused bits, which may be nonzero */
2730 Zero(&aldouble, 1, long double);
2734 /* to work round a gcc/x86 bug; don't use SvNV */
2735 aldouble.ld = (long double)sv_2nv(fromstr);
2737 aldouble.ld = (long double)SvNV(fromstr);
2739 PUSH_BYTES(utf8, cur, aldouble.bytes, sizeof(aldouble.bytes),
2745 case 'n' | TYPE_IS_SHRIEKING:
2750 ai16 = (I16)SvIV(fromstr);
2751 ai16 = PerlSock_htons(ai16);
2752 PUSH16(utf8, cur, &ai16, FALSE);
2755 case 'v' | TYPE_IS_SHRIEKING:
2760 ai16 = (I16)SvIV(fromstr);
2762 PUSH16(utf8, cur, &ai16, FALSE);
2765 case 'S' | TYPE_IS_SHRIEKING:
2766 #if SHORTSIZE != SIZE16
2768 unsigned short aushort;
2770 aushort = SvUV(fromstr);
2771 PUSH_VAR(utf8, cur, aushort, needs_swap);
2781 au16 = (U16)SvUV(fromstr);
2782 PUSH16(utf8, cur, &au16, needs_swap);
2785 case 's' | TYPE_IS_SHRIEKING:
2786 #if SHORTSIZE != SIZE16
2790 ashort = SvIV(fromstr);
2791 PUSH_VAR(utf8, cur, ashort, needs_swap);
2801 ai16 = (I16)SvIV(fromstr);
2802 PUSH16(utf8, cur, &ai16, needs_swap);
2806 case 'I' | TYPE_IS_SHRIEKING:
2810 auint = SvUV(fromstr);
2811 PUSH_VAR(utf8, cur, auint, needs_swap);
2818 aiv = SvIV(fromstr);
2819 PUSH_VAR(utf8, cur, aiv, needs_swap);
2826 auv = SvUV(fromstr);
2827 PUSH_VAR(utf8, cur, auv, needs_swap);
2834 anv = SvNV(fromstr);
2838 SvCUR_set(cat, cur - start);
2839 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
2842 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
2843 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
2844 any negative IVs will have already been got by the croak()
2845 above. IOK is untrue for fractions, so we test them
2846 against UV_MAX_P1. */
2847 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
2848 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
2849 char *in = buf + sizeof(buf);
2850 UV auv = SvUV(fromstr);
2853 *--in = (char)((auv & 0x7f) | 0x80);
2856 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
2857 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2858 in, (buf + sizeof(buf)) - in);
2859 } else if (SvPOKp(fromstr))
2861 else if (SvNOKp(fromstr)) {
2862 /* 10**NV_MAX_10_EXP is the largest power of 10
2863 so 10**(NV_MAX_10_EXP+1) is definitely unrepresentable
2864 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
2865 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
2866 And with that many bytes only Inf can overflow.
2867 Some C compilers are strict about integral constant
2868 expressions so we conservatively divide by a slightly
2869 smaller integer instead of multiplying by the exact
2870 floating-point value.
2872 #ifdef NV_MAX_10_EXP
2873 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
2874 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
2876 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
2877 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
2879 char *in = buf + sizeof(buf);
2881 anv = Perl_floor(anv);
2883 const NV next = Perl_floor(anv / 128);
2884 if (in <= buf) /* this cannot happen ;-) */
2885 Perl_croak(aTHX_ "Cannot compress integer in pack");
2886 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
2889 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
2890 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2891 in, (buf + sizeof(buf)) - in);
2900 /* Copy string and check for compliance */
2901 from = SvPV_const(fromstr, len);
2902 if ((norm = is_an_int(from, len)) == NULL)
2903 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
2905 Newx(result, len, char);
2908 while (!done) *--in = div128(norm, &done) | 0x80;
2909 result[len - 1] &= 0x7F; /* clear continue bit */
2910 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2911 in, (result + len) - in);
2913 SvREFCNT_dec(norm); /* free norm */
2918 case 'i' | TYPE_IS_SHRIEKING:
2922 aint = SvIV(fromstr);
2923 PUSH_VAR(utf8, cur, aint, needs_swap);
2926 case 'N' | TYPE_IS_SHRIEKING:
2931 au32 = SvUV(fromstr);
2932 au32 = PerlSock_htonl(au32);
2933 PUSH32(utf8, cur, &au32, FALSE);
2936 case 'V' | TYPE_IS_SHRIEKING:
2941 au32 = SvUV(fromstr);
2943 PUSH32(utf8, cur, &au32, FALSE);
2946 case 'L' | TYPE_IS_SHRIEKING:
2947 #if LONGSIZE != SIZE32
2949 unsigned long aulong;
2951 aulong = SvUV(fromstr);
2952 PUSH_VAR(utf8, cur, aulong, needs_swap);
2962 au32 = SvUV(fromstr);
2963 PUSH32(utf8, cur, &au32, needs_swap);
2966 case 'l' | TYPE_IS_SHRIEKING:
2967 #if LONGSIZE != SIZE32
2971 along = SvIV(fromstr);
2972 PUSH_VAR(utf8, cur, along, needs_swap);
2982 ai32 = SvIV(fromstr);
2983 PUSH32(utf8, cur, &ai32, needs_swap);
2986 #if defined(HAS_QUAD) && IVSIZE >= 8
2991 auquad = (Uquad_t) SvUV(fromstr);
2992 PUSH_VAR(utf8, cur, auquad, needs_swap);
2999 aquad = (Quad_t)SvIV(fromstr);
3000 PUSH_VAR(utf8, cur, aquad, needs_swap);
3005 len = 1; /* assume SV is correct length */
3006 GROWING(utf8, cat, start, cur, sizeof(char *));
3013 SvGETMAGIC(fromstr);
3014 if (!SvOK(fromstr)) aptr = NULL;
3016 /* XXX better yet, could spirit away the string to
3017 * a safe spot and hang on to it until the result
3018 * of pack() (and all copies of the result) are
3021 if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
3022 !SvREADONLY(fromstr)))) {
3023 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3024 "Attempt to pack pointer to temporary value");
3026 if (SvPOK(fromstr) || SvNIOK(fromstr))
3027 aptr = SvPV_nomg_const_nolen(fromstr);
3029 aptr = SvPV_force_flags_nolen(fromstr, 0);
3031 PUSH_VAR(utf8, cur, aptr, needs_swap);
3035 const char *aptr, *aend;
3039 if (len <= 2) len = 45;
3040 else len = len / 3 * 3;
3042 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3043 "Field too wide in 'u' format in pack");
3046 aptr = SvPV_const(fromstr, fromlen);
3047 from_utf8 = DO_UTF8(fromstr);
3049 aend = aptr + fromlen;
3050 fromlen = sv_len_utf8_nomg(fromstr);
3051 } else aend = NULL; /* Unused, but keep compilers happy */
3052 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
3053 while (fromlen > 0) {
3056 U8 hunk[1+63/3*4+1];
3058 if ((I32)fromlen > len)
3064 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3065 'u' | TYPE_IS_PACK)) {
3067 SvCUR_set(cat, cur - start);
3068 Perl_croak(aTHX_ "panic: string is shorter than advertised, "
3069 "aptr=%p, aend=%p, buffer=%p, todo=%ld",
3070 aptr, aend, buffer, (long) todo);
3072 end = doencodes(hunk, buffer, todo);
3074 end = doencodes(hunk, aptr, todo);
3077 PUSH_BYTES(utf8, cur, hunk, end-hunk, 0);
3084 SvCUR_set(cat, cur - start);
3086 *symptr = lookahead;
3095 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
3098 SV *pat_sv = *++MARK;
3099 const char *pat = SvPV_const(pat_sv, fromlen);
3100 const char *patend = pat + fromlen;
3106 packlist(cat, pat, patend, MARK, SP + 1);
3116 * c-indentation-style: bsd
3118 * indent-tabs-mode: nil
3121 * ex: set ts=8 sts=4 sw=4 et: