3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
18 * [p.653 of _The Lord of the Rings_, IV/iv: "Of Herbs and Stewed Rabbit"]
21 /* This file contains pp ("push/pop") functions that
22 * execute the opcodes that make up a perl program. A typical pp function
23 * expects to find its arguments on the stack, and usually pushes its
24 * results onto the stack, hence the 'pp' terminology. Each OP structure
25 * contains a pointer to the relevant pp_foo() function.
27 * This particular file just contains pp_pack() and pp_unpack(). See the
28 * other pp*.c files for the rest of the pp_ functions.
32 #define PERL_IN_PP_PACK_C
35 /* Types used by pack/unpack */
37 e_no_len, /* no length */
38 e_number, /* number, [] */
42 typedef struct tempsym {
43 const char* patptr; /* current template char */
44 const char* patend; /* one after last char */
45 const char* grpbeg; /* 1st char of ()-group */
46 const char* grpend; /* end of ()-group */
47 I32 code; /* template code (!<>) */
48 I32 length; /* length/repeat count */
49 howlen_t howlen; /* how length is given */
50 int level; /* () nesting level */
51 U32 flags; /* /=4, comma=2, pack=1 */
52 /* and group modifiers */
53 STRLEN strbeg; /* offset of group start */
54 struct tempsym *previous; /* previous group */
57 #define TEMPSYM_INIT(symptr, p, e, f) \
59 (symptr)->patptr = (p); \
60 (symptr)->patend = (e); \
61 (symptr)->grpbeg = NULL; \
62 (symptr)->grpend = NULL; \
63 (symptr)->grpend = NULL; \
65 (symptr)->length = 0; \
66 (symptr)->howlen = e_no_len; \
67 (symptr)->level = 0; \
68 (symptr)->flags = (f); \
69 (symptr)->strbeg = 0; \
70 (symptr)->previous = NULL; \
78 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
81 U8 bytes[sizeof(long double)];
88 /* Maximum number of bytes to which a byte can grow due to upgrade */
92 * Offset for integer pack/unpack.
94 * On architectures where I16 and I32 aren't really 16 and 32 bits,
95 * which for now are all Crays, pack and unpack have to play games.
99 * These values are required for portability of pack() output.
100 * If they're not right on your machine, then pack() and unpack()
101 * wouldn't work right anyway; you'll need to apply the Cray hack.
102 * (I'd like to check them with #if, but you can't use sizeof() in
103 * the preprocessor.) --???
106 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
107 defines are now in config.h. --Andy Dougherty April 1998
112 /* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
115 #if U16SIZE > SIZE16 || U32SIZE > SIZE32
116 # if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
117 # define OFF16(p) ((char*)(p))
118 # define OFF32(p) ((char*)(p))
120 # if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
121 # define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
122 # define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
124 ++++ bad cray byte order
128 # define OFF16(p) ((char *) (p))
129 # define OFF32(p) ((char *) (p))
132 #define PUSH16(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF16(p), SIZE16)
133 #define PUSH32(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF32(p), SIZE32)
135 #if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
136 # define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_LITTLE_ENDIAN)
137 #elif BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
138 # define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_BIG_ENDIAN)
140 # error "Unsupported byteorder"
141 /* Need to add code here to re-instate mixed endian support. */
144 /* Only to be used inside a loop (see the break) */
145 #define SHIFT_BYTES(utf8, s, strend, buf, len, datumtype) \
148 if (!uni_to_bytes(aTHX_ &s, strend, \
149 (char *) (buf), len, datumtype)) break; \
151 Copy(s, (char *) (buf), len, char); \
156 #define SHIFT16(utf8, s, strend, p, datumtype) \
157 SHIFT_BYTES(utf8, s, strend, OFF16(p), SIZE16, datumtype)
159 #define SHIFT32(utf8, s, strend, p, datumtype) \
160 SHIFT_BYTES(utf8, s, strend, OFF32(p), SIZE32, datumtype)
162 #define SHIFT_VAR(utf8, s, strend, var, datumtype) \
163 SHIFT_BYTES(utf8, s, strend, &(var), sizeof(var), datumtype)
165 #define PUSH_VAR(utf8, aptr, var) \
166 PUSH_BYTES(utf8, aptr, &(var), sizeof(var))
168 /* Avoid stack overflow due to pathological templates. 100 should be plenty. */
169 #define MAX_SUB_TEMPLATE_LEVEL 100
171 /* flags (note that type modifiers can also be used as flags!) */
172 #define FLAG_WAS_UTF8 0x40
173 #define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
174 #define FLAG_UNPACK_ONLY_ONE 0x10
175 #define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
176 #define FLAG_SLASH 0x04
177 #define FLAG_COMMA 0x02
178 #define FLAG_PACK 0x01
181 S_mul128(pTHX_ SV *sv, U8 m)
184 char *s = SvPV(sv, len);
187 PERL_ARGS_ASSERT_MUL128;
189 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
190 SV * const tmpNew = newSVpvs("0000000000");
192 sv_catsv(tmpNew, sv);
193 SvREFCNT_dec(sv); /* free old sv */
198 while (!*t) /* trailing '\0'? */
201 const U32 i = ((*t - '0') << 7) + m;
202 *(t--) = '0' + (char)(i % 10);
208 /* Explosives and implosives. */
210 #if 'I' == 73 && 'J' == 74
211 /* On an ASCII/ISO kind of system */
212 #define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
215 Some other sort of character set - use memchr() so we don't match
218 #define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
222 #define TYPE_IS_SHRIEKING 0x100
223 #define TYPE_IS_BIG_ENDIAN 0x200
224 #define TYPE_IS_LITTLE_ENDIAN 0x400
225 #define TYPE_IS_PACK 0x800
226 #define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
227 #define TYPE_MODIFIERS(t) ((t) & ~0xFF)
228 #define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
230 # define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
231 # define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
233 # define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
235 # define DO_BO_UNPACK(var) \
238 my_swabn(&var, sizeof(var)); \
242 # define DO_BO_PACK(var) \
245 my_swabn(&var, sizeof(var)); \
249 #define PACK_SIZE_CANNOT_CSUM 0x80
250 #define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
251 #define PACK_SIZE_MASK 0x3F
253 #include "packsizetables.c"
256 uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
259 UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
260 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
261 /* We try to process malformed UTF-8 as much as possible (preferably with
262 warnings), but these two mean we make no progress in the string and
263 might enter an infinite loop */
264 if (retlen == (STRLEN) -1 || retlen == 0)
265 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
266 (int) TYPE_NO_MODIFIERS(datumtype));
268 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
269 "Character in '%c' format wrapped in unpack",
270 (int) TYPE_NO_MODIFIERS(datumtype));
277 #define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
278 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
282 uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
286 const char *from = *s;
288 const U32 flags = ckWARN(WARN_UTF8) ?
289 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
290 for (;buf_len > 0; buf_len--) {
291 if (from >= end) return FALSE;
292 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
293 if (retlen == (STRLEN) -1 || retlen == 0) {
294 from += UTF8SKIP(from);
296 } else from += retlen;
301 *(U8 *)buf++ = (U8)val;
303 /* We have enough characters for the buffer. Did we have problems ? */
306 /* Rewalk the string fragment while warning */
308 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
309 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
310 if (ptr >= end) break;
311 utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
313 if (from > end) from = end;
316 Perl_ck_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
317 WARN_PACK : WARN_UNPACK),
318 "Character(s) in '%c' format wrapped in %s",
319 (int) TYPE_NO_MODIFIERS(datumtype),
320 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
327 next_uni_uu(pTHX_ const char **s, const char *end, I32 *out)
331 const UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
332 if (val >= 0x100 || !ISUUCHAR(val) ||
333 retlen == (STRLEN) -1 || retlen == 0) {
337 *out = PL_uudmap[val] & 077;
343 S_bytes_to_uni(const U8 *start, STRLEN len, char *dest) {
344 const U8 * const end = start + len;
346 PERL_ARGS_ASSERT_BYTES_TO_UNI;
348 while (start < end) {
349 const UV uv = NATIVE_TO_ASCII(*start);
350 if (UNI_IS_INVARIANT(uv))
351 *dest++ = (char)(U8)UTF_TO_NATIVE(uv);
353 *dest++ = (char)(U8)UTF8_EIGHT_BIT_HI(uv);
354 *dest++ = (char)(U8)UTF8_EIGHT_BIT_LO(uv);
361 #define PUSH_BYTES(utf8, cur, buf, len) \
364 (cur) = bytes_to_uni((U8 *) buf, len, (cur)); \
366 Copy(buf, cur, len, char); \
371 #define GROWING(utf8, cat, start, cur, in_len) \
373 STRLEN glen = (in_len); \
374 if (utf8) glen *= UTF8_EXPAND; \
375 if ((cur) + glen >= (start) + SvLEN(cat)) { \
376 (start) = sv_exp_grow(cat, glen); \
377 (cur) = (start) + SvCUR(cat); \
381 #define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
383 const STRLEN glen = (in_len); \
385 if (utf8) gl *= UTF8_EXPAND; \
386 if ((cur) + gl >= (start) + SvLEN(cat)) { \
388 SvCUR_set((cat), (cur) - (start)); \
389 (start) = sv_exp_grow(cat, gl); \
390 (cur) = (start) + SvCUR(cat); \
392 PUSH_BYTES(utf8, cur, buf, glen); \
395 #define PUSH_BYTE(utf8, s, byte) \
398 const U8 au8 = (byte); \
399 (s) = bytes_to_uni(&au8, 1, (s)); \
400 } else *(U8 *)(s)++ = (byte); \
403 /* Only to be used inside a loop (see the break) */
404 #define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
407 if (str >= end) break; \
408 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
409 if (retlen == (STRLEN) -1 || retlen == 0) { \
411 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
416 static const char *_action( const tempsym_t* symptr )
418 return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
421 /* Returns the sizeof() struct described by pat */
423 S_measure_struct(pTHX_ tempsym_t* symptr)
427 PERL_ARGS_ASSERT_MEASURE_STRUCT;
429 while (next_symbol(symptr)) {
433 switch (symptr->howlen) {
435 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
439 /* e_no_len and e_number */
440 len = symptr->length;
444 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
447 /* endianness doesn't influence the size of a type */
448 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
450 Perl_croak(aTHX_ "Invalid type '%c' in %s",
451 (int)TYPE_NO_MODIFIERS(symptr->code),
453 case '.' | TYPE_IS_SHRIEKING:
454 case '@' | TYPE_IS_SHRIEKING:
458 case 'U': /* XXXX Is it correct? */
461 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
462 (int) TYPE_NO_MODIFIERS(symptr->code),
469 tempsym_t savsym = *symptr;
470 symptr->patptr = savsym.grpbeg;
471 symptr->patend = savsym.grpend;
472 /* XXXX Theoretically, we need to measure many times at
473 different positions, since the subexpression may contain
474 alignment commands, but be not of aligned length.
475 Need to detect this and croak(). */
476 size = measure_struct(symptr);
480 case 'X' | TYPE_IS_SHRIEKING:
481 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
483 if (!len) /* Avoid division by 0 */
485 len = total % len; /* Assumed: the start is aligned. */
490 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
492 case 'x' | TYPE_IS_SHRIEKING:
493 if (!len) /* Avoid division by 0 */
495 star = total % len; /* Assumed: the start is aligned. */
496 if (star) /* Other portable ways? */
520 size = sizeof(char*);
530 /* locate matching closing parenthesis or bracket
531 * returns char pointer to char after match, or NULL
534 S_group_end(pTHX_ const char *patptr, const char *patend, char ender)
536 PERL_ARGS_ASSERT_GROUP_END;
538 while (patptr < patend) {
539 const char c = *patptr++;
546 while (patptr < patend && *patptr != '\n')
550 patptr = group_end(patptr, patend, ')') + 1;
552 patptr = group_end(patptr, patend, ']') + 1;
554 Perl_croak(aTHX_ "No group ending character '%c' found in template",
560 /* Convert unsigned decimal number to binary.
561 * Expects a pointer to the first digit and address of length variable
562 * Advances char pointer to 1st non-digit char and returns number
565 S_get_num(pTHX_ const char *patptr, I32 *lenptr )
567 I32 len = *patptr++ - '0';
569 PERL_ARGS_ASSERT_GET_NUM;
571 while (isDIGIT(*patptr)) {
572 if (len >= 0x7FFFFFFF/10)
573 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
574 len = (len * 10) + (*patptr++ - '0');
580 /* The marvellous template parsing routine: Using state stored in *symptr,
581 * locates next template code and count
584 S_next_symbol(pTHX_ tempsym_t* symptr )
586 const char* patptr = symptr->patptr;
587 const char* const patend = symptr->patend;
589 PERL_ARGS_ASSERT_NEXT_SYMBOL;
591 symptr->flags &= ~FLAG_SLASH;
593 while (patptr < patend) {
594 if (isSPACE(*patptr))
596 else if (*patptr == '#') {
598 while (patptr < patend && *patptr != '\n')
603 /* We should have found a template code */
604 I32 code = *patptr++ & 0xFF;
605 U32 inherited_modifiers = 0;
607 if (code == ','){ /* grandfather in commas but with a warning */
608 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
609 symptr->flags |= FLAG_COMMA;
610 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
611 "Invalid type ',' in %s", _action( symptr ) );
616 /* for '(', skip to ')' */
618 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
619 Perl_croak(aTHX_ "()-group starts with a count in %s",
621 symptr->grpbeg = patptr;
622 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
623 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
624 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
628 /* look for group modifiers to inherit */
629 if (TYPE_ENDIANNESS(symptr->flags)) {
630 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
631 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
634 /* look for modifiers */
635 while (patptr < patend) {
640 modifier = TYPE_IS_SHRIEKING;
641 allowed = "sSiIlLxXnNvV@.";
644 modifier = TYPE_IS_BIG_ENDIAN;
645 allowed = ENDIANNESS_ALLOWED_TYPES;
648 modifier = TYPE_IS_LITTLE_ENDIAN;
649 allowed = ENDIANNESS_ALLOWED_TYPES;
660 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
661 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
662 allowed, _action( symptr ) );
664 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
665 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
666 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
667 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
668 TYPE_ENDIANNESS_MASK)
669 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
670 *patptr, _action( symptr ) );
672 if ((code & modifier)) {
673 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
674 "Duplicate modifier '%c' after '%c' in %s",
675 *patptr, (int) TYPE_NO_MODIFIERS(code),
683 /* inherit modifiers */
684 code |= inherited_modifiers;
686 /* look for count and/or / */
687 if (patptr < patend) {
688 if (isDIGIT(*patptr)) {
689 patptr = get_num( patptr, &symptr->length );
690 symptr->howlen = e_number;
692 } else if (*patptr == '*') {
694 symptr->howlen = e_star;
696 } else if (*patptr == '[') {
697 const char* lenptr = ++patptr;
698 symptr->howlen = e_number;
699 patptr = group_end( patptr, patend, ']' ) + 1;
700 /* what kind of [] is it? */
701 if (isDIGIT(*lenptr)) {
702 lenptr = get_num( lenptr, &symptr->length );
704 Perl_croak(aTHX_ "Malformed integer in [] in %s",
707 tempsym_t savsym = *symptr;
708 symptr->patend = patptr-1;
709 symptr->patptr = lenptr;
710 savsym.length = measure_struct(symptr);
714 symptr->howlen = e_no_len;
719 while (patptr < patend) {
720 if (isSPACE(*patptr))
722 else if (*patptr == '#') {
724 while (patptr < patend && *patptr != '\n')
729 if (*patptr == '/') {
730 symptr->flags |= FLAG_SLASH;
732 if (patptr < patend &&
733 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
734 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
741 /* at end - no count, no / */
742 symptr->howlen = e_no_len;
747 symptr->patptr = patptr;
751 symptr->patptr = patptr;
756 There is no way to cleanly handle the case where we should process the
757 string per byte in its upgraded form while it's really in downgraded form
758 (e.g. estimates like strend-s as an upper bound for the number of
759 characters left wouldn't work). So if we foresee the need of this
760 (pattern starts with U or contains U0), we want to work on the encoded
761 version of the string. Users are advised to upgrade their pack string
762 themselves if they need to do a lot of unpacks like this on it
765 need_utf8(const char *pat, const char *patend)
769 PERL_ARGS_ASSERT_NEED_UTF8;
771 while (pat < patend) {
774 pat = (const char *) memchr(pat, '\n', patend-pat);
775 if (!pat) return FALSE;
776 } else if (pat[0] == 'U') {
777 if (first || pat[1] == '0') return TRUE;
778 } else first = FALSE;
785 first_symbol(const char *pat, const char *patend) {
786 PERL_ARGS_ASSERT_FIRST_SYMBOL;
788 while (pat < patend) {
789 if (pat[0] != '#') return pat[0];
791 pat = (const char *) memchr(pat, '\n', patend-pat);
799 =for apidoc unpackstring
801 The engine implementing the unpack() Perl function.
803 Using the template pat..patend, this function unpacks the string
804 s..strend into a number of mortal SVs, which it pushes onto the perl
805 argument (@_) stack (so you will need to issue a C<PUTBACK> before and
806 C<SPAGAIN> after the call to this function). It returns the number of
809 The strend and patend pointers should point to the byte following the last
810 character of each string.
812 Although this function returns its values on the perl argument stack, it
813 doesn't take any parameters from that stack (and thus in particular
814 there's no need to do a PUSHMARK before calling it, unlike L</call_pv> for
820 Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
824 PERL_ARGS_ASSERT_UNPACKSTRING;
826 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
827 else if (need_utf8(pat, patend)) {
828 /* We probably should try to avoid this in case a scalar context call
829 wouldn't get to the "U0" */
830 STRLEN len = strend - s;
831 s = (char *) bytes_to_utf8((U8 *) s, &len);
834 flags |= FLAG_DO_UTF8;
837 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
838 flags |= FLAG_PARSE_UTF8;
840 TEMPSYM_INIT(&sym, pat, patend, flags);
842 return unpack_rec(&sym, s, s, strend, NULL );
846 S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
850 const I32 start_sp_offset = SP - PL_stack_base;
855 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
857 bool explicit_length;
858 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
859 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
861 PERL_ARGS_ASSERT_UNPACK_REC;
863 symptr->strbeg = s - strbeg;
865 while (next_symbol(symptr)) {
868 I32 datumtype = symptr->code;
870 /* do first one only unless in list context
871 / is implemented by unpacking the count, then popping it from the
872 stack, so must check that we're not in the middle of a / */
874 && (SP - PL_stack_base == start_sp_offset + 1)
875 && (datumtype != '/') ) /* XXX can this be omitted */
878 switch (howlen = symptr->howlen) {
880 len = strend - strbeg; /* long enough */
883 /* e_no_len and e_number */
884 len = symptr->length;
888 explicit_length = TRUE;
890 beyond = s >= strend;
892 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
894 /* props nonzero means we can process this letter. */
895 const long size = props & PACK_SIZE_MASK;
896 const long howmany = (strend - s) / size;
900 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
901 if (len && unpack_only_one) len = 1;
907 needs_swap = NEEDS_SWAP(datumtype);
909 switch(TYPE_NO_ENDIANNESS(datumtype)) {
911 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
914 if (howlen == e_no_len)
915 len = 16; /* len is not specified */
923 tempsym_t savsym = *symptr;
924 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
925 symptr->flags |= group_modifiers;
926 symptr->patend = savsym.grpend;
927 symptr->previous = &savsym;
930 if (len && unpack_only_one) len = 1;
932 symptr->patptr = savsym.grpbeg;
933 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
934 else symptr->flags &= ~FLAG_PARSE_UTF8;
935 unpack_rec(symptr, s, strbeg, strend, &s);
936 if (s == strend && savsym.howlen == e_star)
937 break; /* No way to continue */
940 savsym.flags = symptr->flags & ~group_modifiers;
944 case '.' | TYPE_IS_SHRIEKING:
948 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
949 if (howlen == e_star) from = strbeg;
950 else if (len <= 0) from = s;
952 tempsym_t *group = symptr;
954 while (--len && group) group = group->previous;
955 from = group ? strbeg + group->strbeg : strbeg;
958 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
959 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
963 case '@' | TYPE_IS_SHRIEKING:
965 s = strbeg + symptr->strbeg;
966 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
970 Perl_croak(aTHX_ "'@' outside of string in unpack");
975 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
978 Perl_croak(aTHX_ "'@' outside of string in unpack");
982 case 'X' | TYPE_IS_SHRIEKING:
983 if (!len) /* Avoid division by 0 */
986 const char *hop, *last;
990 hop += UTF8SKIP(hop);
997 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1001 len = (s - strbeg) % len;
1007 Perl_croak(aTHX_ "'X' outside of string in unpack");
1008 while (--s, UTF8_IS_CONTINUATION(*s)) {
1010 Perl_croak(aTHX_ "'X' outside of string in unpack");
1015 if (len > s - strbeg)
1016 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1020 case 'x' | TYPE_IS_SHRIEKING: {
1022 if (!len) /* Avoid division by 0 */
1024 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1025 else ai32 = (s - strbeg) % len;
1026 if (ai32 == 0) break;
1034 Perl_croak(aTHX_ "'x' outside of string in unpack");
1039 if (len > strend - s)
1040 Perl_croak(aTHX_ "'x' outside of string in unpack");
1045 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1051 /* Preliminary length estimate is assumed done in 'W' */
1052 if (len > strend - s) len = strend - s;
1058 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1059 if (hop >= strend) {
1061 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1066 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1068 } else if (len > strend - s)
1071 if (datumtype == 'Z') {
1072 /* 'Z' strips stuff after first null */
1073 const char *ptr, *end;
1075 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
1076 sv = newSVpvn(s, ptr-s);
1077 if (howlen == e_star) /* exact for 'Z*' */
1078 len = ptr-s + (ptr != strend ? 1 : 0);
1079 } else if (datumtype == 'A') {
1080 /* 'A' strips both nulls and spaces */
1082 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1083 for (ptr = s+len-1; ptr >= s; ptr--)
1084 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
1085 !isSPACE_utf8(ptr)) break;
1086 if (ptr >= s) ptr += UTF8SKIP(ptr);
1089 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1091 for (ptr = s+len-1; ptr >= s; ptr--)
1092 if (*ptr != 0 && !isSPACE(*ptr)) break;
1095 sv = newSVpvn(s, ptr-s);
1096 } else sv = newSVpvn(s, len);
1100 /* Undo any upgrade done due to need_utf8() */
1101 if (!(symptr->flags & FLAG_WAS_UTF8))
1102 sv_utf8_downgrade(sv, 0);
1110 if (howlen == e_star || len > (strend - s) * 8)
1111 len = (strend - s) * 8;
1114 while (len >= 8 && s < strend) {
1115 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
1120 cuv += PL_bitcount[*(U8 *)s++];
1123 if (len && s < strend) {
1125 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1126 if (datumtype == 'b')
1128 if (bits & 1) cuv++;
1133 if (bits & 0x80) cuv++;
1140 sv = sv_2mortal(newSV(len ? len : 1));
1143 if (datumtype == 'b') {
1145 const I32 ai32 = len;
1146 for (len = 0; len < ai32; len++) {
1147 if (len & 7) bits >>= 1;
1149 if (s >= strend) break;
1150 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1151 } else bits = *(U8 *) s++;
1152 *str++ = bits & 1 ? '1' : '0';
1156 const I32 ai32 = len;
1157 for (len = 0; len < ai32; len++) {
1158 if (len & 7) bits <<= 1;
1160 if (s >= strend) break;
1161 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1162 } else bits = *(U8 *) s++;
1163 *str++ = bits & 0x80 ? '1' : '0';
1167 SvCUR_set(sv, str - SvPVX_const(sv));
1174 /* Preliminary length estimate, acceptable for utf8 too */
1175 if (howlen == e_star || len > (strend - s) * 2)
1176 len = (strend - s) * 2;
1178 sv = sv_2mortal(newSV(len ? len : 1));
1182 if (datumtype == 'h') {
1185 for (len = 0; len < ai32; len++) {
1186 if (len & 1) bits >>= 4;
1188 if (s >= strend) break;
1189 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1190 } else bits = * (U8 *) s++;
1192 *str++ = PL_hexdigit[bits & 15];
1196 const I32 ai32 = len;
1197 for (len = 0; len < ai32; len++) {
1198 if (len & 1) bits <<= 4;
1200 if (s >= strend) break;
1201 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1202 } else bits = *(U8 *) s++;
1204 *str++ = PL_hexdigit[(bits >> 4) & 15];
1209 SvCUR_set(sv, str - SvPVX_const(sv));
1216 if (explicit_length)
1217 /* Switch to "character" mode */
1218 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1223 while (len-- > 0 && s < strend) {
1228 aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1229 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1230 if (retlen == (STRLEN) -1 || retlen == 0)
1231 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1235 aint = *(U8 *)(s)++;
1236 if (aint >= 128 && datumtype != 'C') /* fake up signed chars */
1240 else if (checksum > bits_in_uv)
1241 cdouble += (NV)aint;
1249 while (len-- > 0 && s < strend) {
1251 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1252 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1253 if (retlen == (STRLEN) -1 || retlen == 0)
1254 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1258 else if (checksum > bits_in_uv)
1259 cdouble += (NV) val;
1263 } else if (!checksum)
1265 const U8 ch = *(U8 *) s++;
1268 else if (checksum > bits_in_uv)
1269 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1271 while (len-- > 0) cuv += *(U8 *) s++;
1275 if (explicit_length && howlen != e_star) {
1276 /* Switch to "bytes in UTF-8" mode */
1277 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
1279 /* Should be impossible due to the need_utf8() test */
1280 Perl_croak(aTHX_ "U0 mode on a byte string");
1284 if (len > strend - s) len = strend - s;
1286 if (len && unpack_only_one) len = 1;
1290 while (len-- > 0 && s < strend) {
1294 U8 result[UTF8_MAXLEN];
1295 const char *ptr = s;
1297 /* Bug: warns about bad utf8 even if we are short on bytes
1298 and will break out of the loop */
1299 if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1302 len = UTF8SKIP(result);
1303 if (!uni_to_bytes(aTHX_ &ptr, strend,
1304 (char *) &result[1], len-1, 'U')) break;
1305 auv = utf8n_to_uvuni(result, len, &retlen, UTF8_ALLOW_DEFAULT);
1308 auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, UTF8_ALLOW_DEFAULT);
1309 if (retlen == (STRLEN) -1 || retlen == 0)
1310 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1315 else if (checksum > bits_in_uv)
1316 cdouble += (NV) auv;
1321 case 's' | TYPE_IS_SHRIEKING:
1322 #if SHORTSIZE != SIZE16
1325 SHIFT_VAR(utf8, s, strend, ashort, datumtype);
1326 DO_BO_UNPACK(ashort);
1329 else if (checksum > bits_in_uv)
1330 cdouble += (NV)ashort;
1342 #if U16SIZE > SIZE16
1345 SHIFT16(utf8, s, strend, &ai16, datumtype);
1347 #if U16SIZE > SIZE16
1353 else if (checksum > bits_in_uv)
1354 cdouble += (NV)ai16;
1359 case 'S' | TYPE_IS_SHRIEKING:
1360 #if SHORTSIZE != SIZE16
1362 unsigned short aushort;
1363 SHIFT_VAR(utf8, s, strend, aushort, datumtype);
1364 DO_BO_UNPACK(aushort);
1367 else if (checksum > bits_in_uv)
1368 cdouble += (NV)aushort;
1381 #if U16SIZE > SIZE16
1384 SHIFT16(utf8, s, strend, &au16, datumtype);
1386 if (datumtype == 'n')
1387 au16 = PerlSock_ntohs(au16);
1388 if (datumtype == 'v')
1392 else if (checksum > bits_in_uv)
1393 cdouble += (NV) au16;
1398 case 'v' | TYPE_IS_SHRIEKING:
1399 case 'n' | TYPE_IS_SHRIEKING:
1402 # if U16SIZE > SIZE16
1405 SHIFT16(utf8, s, strend, &ai16, datumtype);
1407 /* There should never be any byte-swapping here. */
1408 assert(!TYPE_ENDIANNESS(datumtype));
1409 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
1410 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1411 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
1412 ai16 = (I16) vtohs((U16) ai16);
1415 else if (checksum > bits_in_uv)
1416 cdouble += (NV) ai16;
1422 case 'i' | TYPE_IS_SHRIEKING:
1425 SHIFT_VAR(utf8, s, strend, aint, datumtype);
1429 else if (checksum > bits_in_uv)
1430 cdouble += (NV)aint;
1436 case 'I' | TYPE_IS_SHRIEKING:
1439 SHIFT_VAR(utf8, s, strend, auint, datumtype);
1440 DO_BO_UNPACK(auint);
1443 else if (checksum > bits_in_uv)
1444 cdouble += (NV)auint;
1452 SHIFT_VAR(utf8, s, strend, aiv, datumtype);
1456 else if (checksum > bits_in_uv)
1465 SHIFT_VAR(utf8, s, strend, auv, datumtype);
1469 else if (checksum > bits_in_uv)
1475 case 'l' | TYPE_IS_SHRIEKING:
1476 #if LONGSIZE != SIZE32
1479 SHIFT_VAR(utf8, s, strend, along, datumtype);
1480 DO_BO_UNPACK(along);
1483 else if (checksum > bits_in_uv)
1484 cdouble += (NV)along;
1495 #if U32SIZE > SIZE32
1498 SHIFT32(utf8, s, strend, &ai32, datumtype);
1500 #if U32SIZE > SIZE32
1501 if (ai32 > 2147483647) ai32 -= 4294967296;
1505 else if (checksum > bits_in_uv)
1506 cdouble += (NV)ai32;
1511 case 'L' | TYPE_IS_SHRIEKING:
1512 #if LONGSIZE != SIZE32
1514 unsigned long aulong;
1515 SHIFT_VAR(utf8, s, strend, aulong, datumtype);
1516 DO_BO_UNPACK(aulong);
1519 else if (checksum > bits_in_uv)
1520 cdouble += (NV)aulong;
1533 #if U32SIZE > SIZE32
1536 SHIFT32(utf8, s, strend, &au32, datumtype);
1538 if (datumtype == 'N')
1539 au32 = PerlSock_ntohl(au32);
1540 if (datumtype == 'V')
1544 else if (checksum > bits_in_uv)
1545 cdouble += (NV)au32;
1550 case 'V' | TYPE_IS_SHRIEKING:
1551 case 'N' | TYPE_IS_SHRIEKING:
1554 #if U32SIZE > SIZE32
1557 SHIFT32(utf8, s, strend, &ai32, datumtype);
1559 /* There should never be any byte swapping here. */
1560 assert(!TYPE_ENDIANNESS(datumtype));
1561 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1562 ai32 = (I32)PerlSock_ntohl((U32)ai32);
1563 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1564 ai32 = (I32)vtohl((U32)ai32);
1567 else if (checksum > bits_in_uv)
1568 cdouble += (NV)ai32;
1576 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
1578 /* newSVpv generates undef if aptr is NULL */
1579 mPUSHs(newSVpv(aptr, 0));
1587 while (len > 0 && s < strend) {
1589 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1590 auv = (auv << 7) | (ch & 0x7f);
1591 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
1599 if (++bytes >= sizeof(UV)) { /* promote to string */
1602 sv = Perl_newSVpvf(aTHX_ "%.*"UVuf, (int)TYPE_DIGITS(UV), auv);
1603 while (s < strend) {
1604 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1605 sv = mul128(sv, (U8)(ch & 0x7f));
1611 t = SvPV_nolen_const(sv);
1620 if ((s >= strend) && bytes)
1621 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
1625 if (symptr->howlen == e_star)
1626 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
1628 if (s + sizeof(char*) <= strend) {
1630 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
1632 /* newSVpvn generates undef if aptr is NULL */
1633 PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP));
1640 SHIFT_VAR(utf8, s, strend, aquad, datumtype);
1641 DO_BO_UNPACK(aquad);
1643 mPUSHs(aquad >= IV_MIN && aquad <= IV_MAX ?
1644 newSViv((IV)aquad) : newSVnv((NV)aquad));
1645 else if (checksum > bits_in_uv)
1646 cdouble += (NV)aquad;
1654 SHIFT_VAR(utf8, s, strend, auquad, datumtype);
1655 DO_BO_UNPACK(auquad);
1657 mPUSHs(auquad <= UV_MAX ?
1658 newSVuv((UV)auquad) : newSVnv((NV)auquad));
1659 else if (checksum > bits_in_uv)
1660 cdouble += (NV)auquad;
1665 #endif /* HAS_QUAD */
1666 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
1670 SHIFT_VAR(utf8, s, strend, afloat, datumtype);
1671 DO_BO_UNPACK(afloat);
1681 SHIFT_VAR(utf8, s, strend, adouble, datumtype);
1682 DO_BO_UNPACK(adouble);
1692 SHIFT_BYTES(utf8, s, strend, anv.bytes, sizeof(anv.bytes), datumtype);
1693 DO_BO_UNPACK(anv.nv);
1700 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
1704 SHIFT_BYTES(utf8, s, strend, aldouble.bytes, sizeof(aldouble.bytes), datumtype);
1705 DO_BO_UNPACK(aldouble.ld);
1707 mPUSHn(aldouble.ld);
1709 cdouble += aldouble.ld;
1715 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
1716 sv = sv_2mortal(newSV(l));
1717 if (l) SvPOK_on(sv);
1720 while (next_uni_uu(aTHX_ &s, strend, &len)) {
1725 next_uni_uu(aTHX_ &s, strend, &a);
1726 next_uni_uu(aTHX_ &s, strend, &b);
1727 next_uni_uu(aTHX_ &s, strend, &c);
1728 next_uni_uu(aTHX_ &s, strend, &d);
1729 hunk[0] = (char)((a << 2) | (b >> 4));
1730 hunk[1] = (char)((b << 4) | (c >> 2));
1731 hunk[2] = (char)((c << 6) | d);
1733 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
1741 /* possible checksum byte */
1742 const char *skip = s+UTF8SKIP(s);
1743 if (skip < strend && *skip == '\n')
1749 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
1753 len = PL_uudmap[*(U8*)s++] & 077;
1755 if (s < strend && ISUUCHAR(*s))
1756 a = PL_uudmap[*(U8*)s++] & 077;
1759 if (s < strend && ISUUCHAR(*s))
1760 b = PL_uudmap[*(U8*)s++] & 077;
1763 if (s < strend && ISUUCHAR(*s))
1764 c = PL_uudmap[*(U8*)s++] & 077;
1767 if (s < strend && ISUUCHAR(*s))
1768 d = PL_uudmap[*(U8*)s++] & 077;
1771 hunk[0] = (char)((a << 2) | (b >> 4));
1772 hunk[1] = (char)((b << 4) | (c >> 2));
1773 hunk[2] = (char)((c << 6) | d);
1775 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
1780 else /* possible checksum byte */
1781 if (s + 1 < strend && s[1] == '\n')
1791 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
1792 (checksum > bits_in_uv &&
1793 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
1796 anv = (NV) (1 << (checksum & 15));
1797 while (checksum >= 16) {
1801 while (cdouble < 0.0)
1803 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
1804 sv = newSVnv(cdouble);
1807 if (checksum < bits_in_uv) {
1808 UV mask = ((UV)1 << checksum) - 1;
1817 if (symptr->flags & FLAG_SLASH){
1818 if (SP - PL_stack_base - start_sp_offset <= 0)
1820 if( next_symbol(symptr) ){
1821 if( symptr->howlen == e_number )
1822 Perl_croak(aTHX_ "Count after length/code in unpack" );
1824 /* ...end of char buffer then no decent length available */
1825 Perl_croak(aTHX_ "length/code after end of string in unpack" );
1827 /* take top of stack (hope it's numeric) */
1830 Perl_croak(aTHX_ "Negative '/' count in unpack" );
1833 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
1835 datumtype = symptr->code;
1836 explicit_length = FALSE;
1844 return SP - PL_stack_base - start_sp_offset;
1852 I32 gimme = GIMME_V;
1855 const char *pat = SvPV_const(left, llen);
1856 const char *s = SvPV_const(right, rlen);
1857 const char *strend = s + rlen;
1858 const char *patend = pat + llen;
1862 cnt = unpackstring(pat, patend, s, strend,
1863 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
1864 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
1867 if ( !cnt && gimme == G_SCALAR )
1868 PUSHs(&PL_sv_undef);
1873 doencodes(U8 *h, const char *s, I32 len)
1875 *h++ = PL_uuemap[len];
1877 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1878 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
1879 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
1880 *h++ = PL_uuemap[(077 & (s[2] & 077))];
1885 const char r = (len > 1 ? s[1] : '\0');
1886 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1887 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
1888 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
1889 *h++ = PL_uuemap[0];
1896 S_is_an_int(pTHX_ const char *s, STRLEN l)
1898 SV *result = newSVpvn(s, l);
1899 char *const result_c = SvPV_nolen(result); /* convenience */
1900 char *out = result_c;
1904 PERL_ARGS_ASSERT_IS_AN_INT;
1912 SvREFCNT_dec(result);
1935 SvREFCNT_dec(result);
1941 SvCUR_set(result, out - result_c);
1945 /* pnum must be '\0' terminated */
1947 S_div128(pTHX_ SV *pnum, bool *done)
1950 char * const s = SvPV(pnum, len);
1954 PERL_ARGS_ASSERT_DIV128;
1958 const int i = m * 10 + (*t - '0');
1959 const int r = (i >> 7); /* r < 10 */
1967 SvCUR_set(pnum, (STRLEN) (t - s));
1972 =for apidoc packlist
1974 The engine implementing pack() Perl function.
1980 Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, SV **beglist, SV **endlist )
1985 PERL_ARGS_ASSERT_PACKLIST;
1987 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
1989 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
1990 Also make sure any UTF8 flag is loaded */
1991 SvPV_force_nolen(cat);
1993 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
1995 (void)pack_rec( cat, &sym, beglist, endlist );
1998 /* like sv_utf8_upgrade, but also repoint the group start markers */
2000 marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2003 const char *from_ptr, *from_start, *from_end, **marks, **m;
2004 char *to_start, *to_ptr;
2006 if (SvUTF8(sv)) return;
2008 from_start = SvPVX_const(sv);
2009 from_end = from_start + SvCUR(sv);
2010 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2011 if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
2012 if (from_ptr == from_end) {
2013 /* Simple case: no character needs to be changed */
2018 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
2019 Newx(to_start, len, char);
2020 Copy(from_start, to_start, from_ptr-from_start, char);
2021 to_ptr = to_start + (from_ptr-from_start);
2023 Newx(marks, sym_ptr->level+2, const char *);
2024 for (group=sym_ptr; group; group = group->previous)
2025 marks[group->level] = from_start + group->strbeg;
2026 marks[sym_ptr->level+1] = from_end+1;
2027 for (m = marks; *m < from_ptr; m++)
2028 *m = to_start + (*m-from_start);
2030 for (;from_ptr < from_end; from_ptr++) {
2031 while (*m == from_ptr) *m++ = to_ptr;
2032 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
2036 while (*m == from_ptr) *m++ = to_ptr;
2037 if (m != marks + sym_ptr->level+1) {
2040 Perl_croak(aTHX_ "panic: marks beyond string end, m=%p, marks=%p, "
2041 "level=%d", m, marks, sym_ptr->level);
2043 for (group=sym_ptr; group; group = group->previous)
2044 group->strbeg = marks[group->level] - to_start;
2049 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
2050 from_start -= SvIVX(sv);
2053 SvFLAGS(sv) &= ~SVf_OOK;
2056 Safefree(from_start);
2057 SvPV_set(sv, to_start);
2058 SvCUR_set(sv, to_ptr - to_start);
2063 /* Exponential string grower. Makes string extension effectively O(n)
2064 needed says how many extra bytes we need (not counting the final '\0')
2065 Only grows the string if there is an actual lack of space
2068 S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
2069 const STRLEN cur = SvCUR(sv);
2070 const STRLEN len = SvLEN(sv);
2073 PERL_ARGS_ASSERT_SV_EXP_GROW;
2075 if (len - cur > needed) return SvPVX(sv);
2076 extend = needed > len ? needed : len;
2077 return SvGROW(sv, len+extend+1);
2082 S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
2085 tempsym_t lookahead;
2086 I32 items = endlist - beglist;
2087 bool found = next_symbol(symptr);
2088 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
2089 bool warn_utf8 = ckWARN(WARN_UTF8);
2091 PERL_ARGS_ASSERT_PACK_REC;
2093 if (symptr->level == 0 && found && symptr->code == 'U') {
2094 marked_upgrade(aTHX_ cat, symptr);
2095 symptr->flags |= FLAG_DO_UTF8;
2098 symptr->strbeg = SvCUR(cat);
2104 SV *lengthcode = NULL;
2105 I32 datumtype = symptr->code;
2106 howlen_t howlen = symptr->howlen;
2107 char *start = SvPVX(cat);
2108 char *cur = start + SvCUR(cat);
2111 #define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2115 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2119 /* e_no_len and e_number */
2120 len = symptr->length;
2125 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
2127 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2128 /* We can process this letter. */
2129 STRLEN size = props & PACK_SIZE_MASK;
2130 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2134 /* Look ahead for next symbol. Do we have code/code? */
2135 lookahead = *symptr;
2136 found = next_symbol(&lookahead);
2137 if (symptr->flags & FLAG_SLASH) {
2139 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
2140 if (strchr("aAZ", lookahead.code)) {
2141 if (lookahead.howlen == e_number) count = lookahead.length;
2144 count = sv_len_utf8(*beglist);
2147 if (lookahead.code == 'Z') count++;
2150 if (lookahead.howlen == e_number && lookahead.length < items)
2151 count = lookahead.length;
2154 lookahead.howlen = e_number;
2155 lookahead.length = count;
2156 lengthcode = sv_2mortal(newSViv(count));
2159 needs_swap = NEEDS_SWAP(datumtype);
2161 /* Code inside the switch must take care to properly update
2162 cat (CUR length and '\0' termination) if it updated *cur and
2163 doesn't simply leave using break */
2164 switch(TYPE_NO_ENDIANNESS(datumtype)) {
2166 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2167 (int) TYPE_NO_MODIFIERS(datumtype));
2169 Perl_croak(aTHX_ "'%%' may not be used in pack");
2172 case '.' | TYPE_IS_SHRIEKING:
2174 if (howlen == e_star) from = start;
2175 else if (len == 0) from = cur;
2177 tempsym_t *group = symptr;
2179 while (--len && group) group = group->previous;
2180 from = group ? start + group->strbeg : start;
2183 len = SvIV(fromstr);
2185 case '@' | TYPE_IS_SHRIEKING:
2187 from = start + symptr->strbeg;
2189 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
2191 while (len && from < cur) {
2192 from += UTF8SKIP(from);
2196 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2198 /* Here we know from == cur */
2200 GROWING(0, cat, start, cur, len);
2201 Zero(cur, len, char);
2203 } else if (from < cur) {
2206 } else goto no_change;
2214 if (len > 0) goto grow;
2215 if (len == 0) goto no_change;
2222 tempsym_t savsym = *symptr;
2223 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2224 symptr->flags |= group_modifiers;
2225 symptr->patend = savsym.grpend;
2227 symptr->previous = &lookahead;
2230 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2231 else symptr->flags &= ~FLAG_PARSE_UTF8;
2232 was_utf8 = SvUTF8(cat);
2233 symptr->patptr = savsym.grpbeg;
2234 beglist = pack_rec(cat, symptr, beglist, endlist);
2235 if (SvUTF8(cat) != was_utf8)
2236 /* This had better be an upgrade while in utf8==0 mode */
2239 if (savsym.howlen == e_star && beglist == endlist)
2240 break; /* No way to continue */
2242 items = endlist - beglist;
2243 lookahead.flags = symptr->flags & ~group_modifiers;
2246 case 'X' | TYPE_IS_SHRIEKING:
2247 if (!len) /* Avoid division by 0 */
2254 hop += UTF8SKIP(hop);
2261 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2265 len = (cur-start) % len;
2269 if (len < 1) goto no_change;
2273 Perl_croak(aTHX_ "'%c' outside of string in pack",
2274 (int) TYPE_NO_MODIFIERS(datumtype));
2275 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2277 Perl_croak(aTHX_ "'%c' outside of string in pack",
2278 (int) TYPE_NO_MODIFIERS(datumtype));
2284 if (cur - start < len)
2285 Perl_croak(aTHX_ "'%c' outside of string in pack",
2286 (int) TYPE_NO_MODIFIERS(datumtype));
2289 if (cur < start+symptr->strbeg) {
2290 /* Make sure group starts don't point into the void */
2292 const STRLEN length = cur-start;
2293 for (group = symptr;
2294 group && length < group->strbeg;
2295 group = group->previous) group->strbeg = length;
2296 lookahead.strbeg = length;
2299 case 'x' | TYPE_IS_SHRIEKING: {
2301 if (!len) /* Avoid division by 0 */
2303 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
2304 else ai32 = (cur - start) % len;
2305 if (ai32 == 0) goto no_change;
2317 aptr = SvPV_const(fromstr, fromlen);
2318 if (DO_UTF8(fromstr)) {
2319 const char *end, *s;
2321 if (!utf8 && !SvUTF8(cat)) {
2322 marked_upgrade(aTHX_ cat, symptr);
2323 lookahead.flags |= FLAG_DO_UTF8;
2324 lookahead.strbeg = symptr->strbeg;
2327 cur = start + SvCUR(cat);
2329 if (howlen == e_star) {
2330 if (utf8) goto string_copy;
2334 end = aptr + fromlen;
2335 fromlen = datumtype == 'Z' ? len-1 : len;
2336 while ((I32) fromlen > 0 && s < end) {
2341 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2344 if (datumtype == 'Z') len++;
2350 fromlen = len - fromlen;
2351 if (datumtype == 'Z') fromlen--;
2352 if (howlen == e_star) {
2354 if (datumtype == 'Z') len++;
2356 GROWING(0, cat, start, cur, len);
2357 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
2358 datumtype | TYPE_IS_PACK))
2359 Perl_croak(aTHX_ "panic: predicted utf8 length not available, "
2360 "for '%c', aptr=%p end=%p cur=%p, fromlen=%"UVuf,
2361 (int)datumtype, aptr, end, cur, (UV)fromlen);
2365 if (howlen == e_star) {
2367 if (datumtype == 'Z') len++;
2369 if (len <= (I32) fromlen) {
2371 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2373 /* assumes a byte expands to at most UTF8_EXPAND bytes on
2375 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2376 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
2378 while (fromlen > 0) {
2379 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
2385 if (howlen == e_star) {
2387 if (datumtype == 'Z') len++;
2389 if (len <= (I32) fromlen) {
2391 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2393 GROWING(0, cat, start, cur, len);
2394 Copy(aptr, cur, fromlen, char);
2398 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2405 const char *str, *end;
2412 str = SvPV_const(fromstr, fromlen);
2413 end = str + fromlen;
2414 if (DO_UTF8(fromstr)) {
2416 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2418 utf8_source = FALSE;
2419 utf8_flags = 0; /* Unused, but keep compilers happy */
2421 if (howlen == e_star) len = fromlen;
2422 field_len = (len+7)/8;
2423 GROWING(utf8, cat, start, cur, field_len);
2424 if (len > (I32)fromlen) len = fromlen;
2427 if (datumtype == 'B')
2431 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2433 } else bits |= *str++ & 1;
2434 if (l & 7) bits <<= 1;
2436 PUSH_BYTE(utf8, cur, bits);
2441 /* datumtype == 'b' */
2445 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2446 if (val & 1) bits |= 0x80;
2447 } else if (*str++ & 1)
2449 if (l & 7) bits >>= 1;
2451 PUSH_BYTE(utf8, cur, bits);
2457 if (datumtype == 'B')
2458 bits <<= 7 - (l & 7);
2460 bits >>= 7 - (l & 7);
2461 PUSH_BYTE(utf8, cur, bits);
2464 /* Determine how many chars are left in the requested field */
2466 if (howlen == e_star) field_len = 0;
2467 else field_len -= l;
2468 Zero(cur, field_len, char);
2474 const char *str, *end;
2481 str = SvPV_const(fromstr, fromlen);
2482 end = str + fromlen;
2483 if (DO_UTF8(fromstr)) {
2485 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2487 utf8_source = FALSE;
2488 utf8_flags = 0; /* Unused, but keep compilers happy */
2490 if (howlen == e_star) len = fromlen;
2491 field_len = (len+1)/2;
2492 GROWING(utf8, cat, start, cur, field_len);
2493 if (!utf8 && len > (I32)fromlen) len = fromlen;
2496 if (datumtype == 'H')
2500 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2501 if (val < 256 && isALPHA(val))
2502 bits |= (val + 9) & 0xf;
2505 } else if (isALPHA(*str))
2506 bits |= (*str++ + 9) & 0xf;
2508 bits |= *str++ & 0xf;
2509 if (l & 1) bits <<= 4;
2511 PUSH_BYTE(utf8, cur, bits);
2519 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2520 if (val < 256 && isALPHA(val))
2521 bits |= ((val + 9) & 0xf) << 4;
2523 bits |= (val & 0xf) << 4;
2524 } else if (isALPHA(*str))
2525 bits |= ((*str++ + 9) & 0xf) << 4;
2527 bits |= (*str++ & 0xf) << 4;
2528 if (l & 1) bits >>= 4;
2530 PUSH_BYTE(utf8, cur, bits);
2536 PUSH_BYTE(utf8, cur, bits);
2539 /* Determine how many chars are left in the requested field */
2541 if (howlen == e_star) field_len = 0;
2542 else field_len -= l;
2543 Zero(cur, field_len, char);
2551 aiv = SvIV(fromstr);
2552 if ((-128 > aiv || aiv > 127))
2553 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2554 "Character in 'c' format wrapped in pack");
2555 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
2560 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2566 aiv = SvIV(fromstr);
2567 if ((0 > aiv || aiv > 0xff))
2568 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2569 "Character in 'C' format wrapped in pack");
2570 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
2575 U8 in_bytes = (U8)IN_BYTES;
2577 end = start+SvLEN(cat)-1;
2578 if (utf8) end -= UTF8_MAXLEN-1;
2582 auv = SvUV(fromstr);
2583 if (in_bytes) auv = auv % 0x100;
2588 SvCUR_set(cat, cur - start);
2590 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2591 end = start+SvLEN(cat)-UTF8_MAXLEN;
2593 cur = (char *) uvuni_to_utf8_flags((U8 *) cur,
2596 0 : UNICODE_ALLOW_ANY);
2601 SvCUR_set(cat, cur - start);
2602 marked_upgrade(aTHX_ cat, symptr);
2603 lookahead.flags |= FLAG_DO_UTF8;
2604 lookahead.strbeg = symptr->strbeg;
2607 cur = start + SvCUR(cat);
2608 end = start+SvLEN(cat)-UTF8_MAXLEN;
2611 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2612 "Character in 'W' format wrapped in pack");
2617 SvCUR_set(cat, cur - start);
2618 GROWING(0, cat, start, cur, len+1);
2619 end = start+SvLEN(cat)-1;
2621 *(U8 *) cur++ = (U8)auv;
2630 if (!(symptr->flags & FLAG_DO_UTF8)) {
2631 marked_upgrade(aTHX_ cat, symptr);
2632 lookahead.flags |= FLAG_DO_UTF8;
2633 lookahead.strbeg = symptr->strbeg;
2639 end = start+SvLEN(cat);
2640 if (!utf8) end -= UTF8_MAXLEN;
2644 auv = SvUV(fromstr);
2646 U8 buffer[UTF8_MAXLEN], *endb;
2647 endb = uvuni_to_utf8_flags(buffer, auv,
2649 0 : UNICODE_ALLOW_ANY);
2650 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
2652 SvCUR_set(cat, cur - start);
2653 GROWING(0, cat, start, cur,
2654 len+(endb-buffer)*UTF8_EXPAND);
2655 end = start+SvLEN(cat);
2657 cur = bytes_to_uni(buffer, endb-buffer, cur);
2661 SvCUR_set(cat, cur - start);
2662 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2663 end = start+SvLEN(cat)-UTF8_MAXLEN;
2665 cur = (char *) uvuni_to_utf8_flags((U8 *) cur, auv,
2667 0 : UNICODE_ALLOW_ANY);
2672 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
2678 anv = SvNV(fromstr);
2679 # if defined(VMS) && !defined(_IEEE_FP)
2680 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
2681 * on Alpha; fake it if we don't have them.
2685 else if (anv < -FLT_MAX)
2687 else afloat = (float)anv;
2689 afloat = (float)anv;
2692 PUSH_VAR(utf8, cur, afloat);
2700 anv = SvNV(fromstr);
2701 # if defined(VMS) && !defined(_IEEE_FP)
2702 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
2703 * on Alpha; fake it if we don't have them.
2707 else if (anv < -DBL_MAX)
2709 else adouble = (double)anv;
2711 adouble = (double)anv;
2713 DO_BO_PACK(adouble);
2714 PUSH_VAR(utf8, cur, adouble);
2719 Zero(&anv, 1, NV); /* can be long double with unused bits */
2723 /* to work round a gcc/x86 bug; don't use SvNV */
2724 anv.nv = sv_2nv(fromstr);
2726 anv.nv = SvNV(fromstr);
2729 PUSH_BYTES(utf8, cur, anv.bytes, sizeof(anv.bytes));
2733 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2736 /* long doubles can have unused bits, which may be nonzero */
2737 Zero(&aldouble, 1, long double);
2741 /* to work round a gcc/x86 bug; don't use SvNV */
2742 aldouble.ld = (long double)sv_2nv(fromstr);
2744 aldouble.ld = (long double)SvNV(fromstr);
2746 DO_BO_PACK(aldouble);
2747 PUSH_BYTES(utf8, cur, aldouble.bytes, sizeof(aldouble.bytes));
2752 case 'n' | TYPE_IS_SHRIEKING:
2757 ai16 = (I16)SvIV(fromstr);
2758 ai16 = PerlSock_htons(ai16);
2759 PUSH16(utf8, cur, &ai16);
2762 case 'v' | TYPE_IS_SHRIEKING:
2767 ai16 = (I16)SvIV(fromstr);
2769 PUSH16(utf8, cur, &ai16);
2772 case 'S' | TYPE_IS_SHRIEKING:
2773 #if SHORTSIZE != SIZE16
2775 unsigned short aushort;
2777 aushort = SvUV(fromstr);
2778 DO_BO_PACK(aushort);
2779 PUSH_VAR(utf8, cur, aushort);
2789 au16 = (U16)SvUV(fromstr);
2791 PUSH16(utf8, cur, &au16);
2794 case 's' | TYPE_IS_SHRIEKING:
2795 #if SHORTSIZE != SIZE16
2799 ashort = SvIV(fromstr);
2801 PUSH_VAR(utf8, cur, ashort);
2811 ai16 = (I16)SvIV(fromstr);
2813 PUSH16(utf8, cur, &ai16);
2817 case 'I' | TYPE_IS_SHRIEKING:
2821 auint = SvUV(fromstr);
2823 PUSH_VAR(utf8, cur, auint);
2830 aiv = SvIV(fromstr);
2832 PUSH_VAR(utf8, cur, aiv);
2839 auv = SvUV(fromstr);
2841 PUSH_VAR(utf8, cur, auv);
2848 anv = SvNV(fromstr);
2852 SvCUR_set(cat, cur - start);
2853 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
2856 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
2857 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
2858 any negative IVs will have already been got by the croak()
2859 above. IOK is untrue for fractions, so we test them
2860 against UV_MAX_P1. */
2861 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
2862 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
2863 char *in = buf + sizeof(buf);
2864 UV auv = SvUV(fromstr);
2867 *--in = (char)((auv & 0x7f) | 0x80);
2870 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
2871 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2872 in, (buf + sizeof(buf)) - in);
2873 } else if (SvPOKp(fromstr))
2875 else if (SvNOKp(fromstr)) {
2876 /* 10**NV_MAX_10_EXP is the largest power of 10
2877 so 10**(NV_MAX_10_EXP+1) is definitely unrepresentable
2878 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
2879 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
2880 And with that many bytes only Inf can overflow.
2881 Some C compilers are strict about integral constant
2882 expressions so we conservatively divide by a slightly
2883 smaller integer instead of multiplying by the exact
2884 floating-point value.
2886 #ifdef NV_MAX_10_EXP
2887 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
2888 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
2890 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
2891 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
2893 char *in = buf + sizeof(buf);
2895 anv = Perl_floor(anv);
2897 const NV next = Perl_floor(anv / 128);
2898 if (in <= buf) /* this cannot happen ;-) */
2899 Perl_croak(aTHX_ "Cannot compress integer in pack");
2900 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
2903 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
2904 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2905 in, (buf + sizeof(buf)) - in);
2914 /* Copy string and check for compliance */
2915 from = SvPV_const(fromstr, len);
2916 if ((norm = is_an_int(from, len)) == NULL)
2917 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
2919 Newx(result, len, char);
2922 while (!done) *--in = div128(norm, &done) | 0x80;
2923 result[len - 1] &= 0x7F; /* clear continue bit */
2924 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2925 in, (result + len) - in);
2927 SvREFCNT_dec(norm); /* free norm */
2932 case 'i' | TYPE_IS_SHRIEKING:
2936 aint = SvIV(fromstr);
2938 PUSH_VAR(utf8, cur, aint);
2941 case 'N' | TYPE_IS_SHRIEKING:
2946 au32 = SvUV(fromstr);
2947 au32 = PerlSock_htonl(au32);
2948 PUSH32(utf8, cur, &au32);
2951 case 'V' | TYPE_IS_SHRIEKING:
2956 au32 = SvUV(fromstr);
2958 PUSH32(utf8, cur, &au32);
2961 case 'L' | TYPE_IS_SHRIEKING:
2962 #if LONGSIZE != SIZE32
2964 unsigned long aulong;
2966 aulong = SvUV(fromstr);
2968 PUSH_VAR(utf8, cur, aulong);
2978 au32 = SvUV(fromstr);
2980 PUSH32(utf8, cur, &au32);
2983 case 'l' | TYPE_IS_SHRIEKING:
2984 #if LONGSIZE != SIZE32
2988 along = SvIV(fromstr);
2990 PUSH_VAR(utf8, cur, along);
3000 ai32 = SvIV(fromstr);
3002 PUSH32(utf8, cur, &ai32);
3010 auquad = (Uquad_t) SvUV(fromstr);
3012 PUSH_VAR(utf8, cur, auquad);
3019 aquad = (Quad_t)SvIV(fromstr);
3021 PUSH_VAR(utf8, cur, aquad);
3024 #endif /* HAS_QUAD */
3026 len = 1; /* assume SV is correct length */
3027 GROWING(utf8, cat, start, cur, sizeof(char *));
3034 SvGETMAGIC(fromstr);
3035 if (!SvOK(fromstr)) aptr = NULL;
3037 /* XXX better yet, could spirit away the string to
3038 * a safe spot and hang on to it until the result
3039 * of pack() (and all copies of the result) are
3042 if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
3043 !SvREADONLY(fromstr)))) {
3044 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3045 "Attempt to pack pointer to temporary value");
3047 if (SvPOK(fromstr) || SvNIOK(fromstr))
3048 aptr = SvPV_nomg_const_nolen(fromstr);
3050 aptr = SvPV_force_flags_nolen(fromstr, 0);
3053 PUSH_VAR(utf8, cur, aptr);
3057 const char *aptr, *aend;
3061 if (len <= 2) len = 45;
3062 else len = len / 3 * 3;
3064 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3065 "Field too wide in 'u' format in pack");
3068 aptr = SvPV_const(fromstr, fromlen);
3069 from_utf8 = DO_UTF8(fromstr);
3071 aend = aptr + fromlen;
3072 fromlen = sv_len_utf8_nomg(fromstr);
3073 } else aend = NULL; /* Unused, but keep compilers happy */
3074 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
3075 while (fromlen > 0) {
3078 U8 hunk[1+63/3*4+1];
3080 if ((I32)fromlen > len)
3086 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3087 'u' | TYPE_IS_PACK)) {
3089 SvCUR_set(cat, cur - start);
3090 Perl_croak(aTHX_ "panic: string is shorter than advertised, "
3091 "aptr=%p, aend=%p, buffer=%p, todo=%ld",
3092 aptr, aend, buffer, (long) todo);
3094 end = doencodes(hunk, buffer, todo);
3096 end = doencodes(hunk, aptr, todo);
3099 PUSH_BYTES(utf8, cur, hunk, end-hunk);
3106 SvCUR_set(cat, cur - start);
3108 *symptr = lookahead;
3117 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
3120 SV *pat_sv = *++MARK;
3121 const char *pat = SvPV_const(pat_sv, fromlen);
3122 const char *patend = pat + fromlen;
3128 packlist(cat, pat, patend, MARK, SP + 1);
3138 * c-indentation-style: bsd
3140 * indent-tabs-mode: nil
3143 * ex: set ts=8 sts=4 sw=4 et: