3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
18 * [p.653 of _The Lord of the Rings_, IV/iv: "Of Herbs and Stewed Rabbit"]
21 /* This file contains pp ("push/pop") functions that
22 * execute the opcodes that make up a perl program. A typical pp function
23 * expects to find its arguments on the stack, and usually pushes its
24 * results onto the stack, hence the 'pp' terminology. Each OP structure
25 * contains a pointer to the relevant pp_foo() function.
27 * This particular file just contains pp_pack() and pp_unpack(). See the
28 * other pp*.c files for the rest of the pp_ functions.
32 #define PERL_IN_PP_PACK_C
35 /* Types used by pack/unpack */
37 e_no_len, /* no length */
38 e_number, /* number, [] */
42 typedef struct tempsym {
43 const char* patptr; /* current template char */
44 const char* patend; /* one after last char */
45 const char* grpbeg; /* 1st char of ()-group */
46 const char* grpend; /* end of ()-group */
47 I32 code; /* template code (!<>) */
48 I32 length; /* length/repeat count */
49 howlen_t howlen; /* how length is given */
50 int level; /* () nesting level */
51 U32 flags; /* /=4, comma=2, pack=1 */
52 /* and group modifiers */
53 STRLEN strbeg; /* offset of group start */
54 struct tempsym *previous; /* previous group */
57 #define TEMPSYM_INIT(symptr, p, e, f) \
59 (symptr)->patptr = (p); \
60 (symptr)->patend = (e); \
61 (symptr)->grpbeg = NULL; \
62 (symptr)->grpend = NULL; \
63 (symptr)->grpend = NULL; \
65 (symptr)->length = 0; \
66 (symptr)->howlen = e_no_len; \
67 (symptr)->level = 0; \
68 (symptr)->flags = (f); \
69 (symptr)->strbeg = 0; \
70 (symptr)->previous = NULL; \
78 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
81 U8 bytes[sizeof(long double)];
88 /* Maximum number of bytes to which a byte can grow due to upgrade */
92 * Offset for integer pack/unpack.
94 * On architectures where I16 and I32 aren't really 16 and 32 bits,
95 * which for now are all Crays, pack and unpack have to play games.
99 * These values are required for portability of pack() output.
100 * If they're not right on your machine, then pack() and unpack()
101 * wouldn't work right anyway; you'll need to apply the Cray hack.
102 * (I'd like to check them with #if, but you can't use sizeof() in
103 * the preprocessor.) --???
106 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
107 defines are now in config.h. --Andy Dougherty April 1998
112 /* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
115 #if U16SIZE > SIZE16 || U32SIZE > SIZE32
116 # if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
117 # define OFF16(p) ((char*)(p))
118 # define OFF32(p) ((char*)(p))
120 # if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
121 # define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
122 # define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
124 ++++ bad cray byte order
128 # define OFF16(p) ((char *) (p))
129 # define OFF32(p) ((char *) (p))
132 #define PUSH16(utf8, cur, p, needs_swap) \
133 PUSH_BYTES(utf8, cur, OFF16(p), SIZE16, needs_swap)
134 #define PUSH32(utf8, cur, p, needs_swap) \
135 PUSH_BYTES(utf8, cur, OFF32(p), SIZE32, needs_swap)
137 #if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
138 # define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_LITTLE_ENDIAN)
139 #elif BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
140 # define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_BIG_ENDIAN)
142 # error "Unsupported byteorder"
143 /* Need to add code here to re-instate mixed endian support.
144 NEEDS_SWAP would need to hold a flag indicating which action to
145 take, and S_reverse_copy and the code in uni_to_bytes would need
146 logic adding to deal with any mixed-endian transformations needed.
150 /* Only to be used inside a loop (see the break) */
151 #define SHIFT_BYTES(utf8, s, strend, buf, len, datumtype, needs_swap) \
153 if (UNLIKELY(utf8)) { \
154 if (!uni_to_bytes(aTHX_ &s, strend, \
155 (char *) (buf), len, datumtype)) break; \
157 if (UNLIKELY(needs_swap)) \
158 S_reverse_copy(s, (char *) (buf), len); \
160 Copy(s, (char *) (buf), len, char); \
165 #define SHIFT16(utf8, s, strend, p, datumtype, needs_swap) \
166 SHIFT_BYTES(utf8, s, strend, OFF16(p), SIZE16, datumtype, needs_swap)
168 #define SHIFT32(utf8, s, strend, p, datumtype, needs_swap) \
169 SHIFT_BYTES(utf8, s, strend, OFF32(p), SIZE32, datumtype, needs_swap)
171 #define SHIFT_VAR(utf8, s, strend, var, datumtype, needs_swap) \
172 SHIFT_BYTES(utf8, s, strend, &(var), sizeof(var), datumtype, needs_swap)
174 #define PUSH_VAR(utf8, aptr, var, needs_swap) \
175 PUSH_BYTES(utf8, aptr, &(var), sizeof(var), needs_swap)
177 /* Avoid stack overflow due to pathological templates. 100 should be plenty. */
178 #define MAX_SUB_TEMPLATE_LEVEL 100
180 /* flags (note that type modifiers can also be used as flags!) */
181 #define FLAG_WAS_UTF8 0x40
182 #define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
183 #define FLAG_UNPACK_ONLY_ONE 0x10
184 #define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
185 #define FLAG_SLASH 0x04
186 #define FLAG_COMMA 0x02
187 #define FLAG_PACK 0x01
190 S_mul128(pTHX_ SV *sv, U8 m)
193 char *s = SvPV(sv, len);
196 PERL_ARGS_ASSERT_MUL128;
198 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
199 SV * const tmpNew = newSVpvs("0000000000");
201 sv_catsv(tmpNew, sv);
202 SvREFCNT_dec(sv); /* free old sv */
207 while (!*t) /* trailing '\0'? */
210 const U32 i = ((*t - '0') << 7) + m;
211 *(t--) = '0' + (char)(i % 10);
217 /* Explosives and implosives. */
219 #if 'I' == 73 && 'J' == 74
220 /* On an ASCII/ISO kind of system */
221 #define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
224 Some other sort of character set - use memchr() so we don't match
227 #define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
231 #define TYPE_IS_SHRIEKING 0x100
232 #define TYPE_IS_BIG_ENDIAN 0x200
233 #define TYPE_IS_LITTLE_ENDIAN 0x400
234 #define TYPE_IS_PACK 0x800
235 #define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
236 #define TYPE_MODIFIERS(t) ((t) & ~0xFF)
237 #define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
239 # define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
240 # define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
242 # define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
244 #define PACK_SIZE_CANNOT_CSUM 0x80
245 #define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
246 #define PACK_SIZE_MASK 0x3F
248 #include "packsizetables.c"
251 S_reverse_copy(const char *src, char *dest, STRLEN len)
259 uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
262 UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
263 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
264 /* We try to process malformed UTF-8 as much as possible (preferably with
265 warnings), but these two mean we make no progress in the string and
266 might enter an infinite loop */
267 if (retlen == (STRLEN) -1 || retlen == 0)
268 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
269 (int) TYPE_NO_MODIFIERS(datumtype));
271 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
272 "Character in '%c' format wrapped in unpack",
273 (int) TYPE_NO_MODIFIERS(datumtype));
280 #define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
281 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
285 uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
289 const char *from = *s;
291 const U32 flags = ckWARN(WARN_UTF8) ?
292 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
293 const bool needs_swap = NEEDS_SWAP(datumtype);
295 if (UNLIKELY(needs_swap))
298 for (;buf_len > 0; buf_len--) {
299 if (from >= end) return FALSE;
300 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
301 if (retlen == (STRLEN) -1 || retlen == 0) {
302 from += UTF8SKIP(from);
304 } else from += retlen;
309 if (UNLIKELY(needs_swap))
310 *(U8 *)--buf = (U8)val;
312 *(U8 *)buf++ = (U8)val;
314 /* We have enough characters for the buffer. Did we have problems ? */
317 /* Rewalk the string fragment while warning */
319 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
320 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
321 if (ptr >= end) break;
322 utf8n_to_uvchr((U8 *) ptr, end-ptr, &retlen, flags);
324 if (from > end) from = end;
327 Perl_ck_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
328 WARN_PACK : WARN_UNPACK),
329 "Character(s) in '%c' format wrapped in %s",
330 (int) TYPE_NO_MODIFIERS(datumtype),
331 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
338 next_uni_uu(pTHX_ const char **s, const char *end, I32 *out)
341 const UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
342 if (val >= 0x100 || !ISUUCHAR(val) ||
343 retlen == (STRLEN) -1 || retlen == 0) {
347 *out = PL_uudmap[val] & 077;
353 S_bytes_to_uni(const U8 *start, STRLEN len, char *dest, const bool needs_swap) {
354 PERL_ARGS_ASSERT_BYTES_TO_UNI;
356 if (UNLIKELY(needs_swap)) {
357 const U8 *p = start + len;
358 while (p-- > start) {
359 append_utf8_from_native_byte(*p, (U8 **) & dest);
362 const U8 * const end = start + len;
363 while (start < end) {
364 append_utf8_from_native_byte(*start, (U8 **) & dest);
371 #define PUSH_BYTES(utf8, cur, buf, len, needs_swap) \
373 if (UNLIKELY(utf8)) \
374 (cur) = S_bytes_to_uni((U8 *) buf, len, (cur), needs_swap); \
376 if (UNLIKELY(needs_swap)) \
377 S_reverse_copy((char *)(buf), cur, len); \
379 Copy(buf, cur, len, char); \
384 #define GROWING(utf8, cat, start, cur, in_len) \
386 STRLEN glen = (in_len); \
387 if (utf8) glen *= UTF8_EXPAND; \
388 if ((cur) + glen >= (start) + SvLEN(cat)) { \
389 (start) = sv_exp_grow(cat, glen); \
390 (cur) = (start) + SvCUR(cat); \
394 #define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
396 const STRLEN glen = (in_len); \
398 if (utf8) gl *= UTF8_EXPAND; \
399 if ((cur) + gl >= (start) + SvLEN(cat)) { \
401 SvCUR_set((cat), (cur) - (start)); \
402 (start) = sv_exp_grow(cat, gl); \
403 (cur) = (start) + SvCUR(cat); \
405 PUSH_BYTES(utf8, cur, buf, glen, 0); \
408 #define PUSH_BYTE(utf8, s, byte) \
411 const U8 au8 = (byte); \
412 (s) = S_bytes_to_uni(&au8, 1, (s), 0); \
413 } else *(U8 *)(s)++ = (byte); \
416 /* Only to be used inside a loop (see the break) */
417 #define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
420 if (str >= end) break; \
421 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
422 if (retlen == (STRLEN) -1 || retlen == 0) { \
424 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
429 static const char *_action( const tempsym_t* symptr )
431 return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
434 /* Returns the sizeof() struct described by pat */
436 S_measure_struct(pTHX_ tempsym_t* symptr)
440 PERL_ARGS_ASSERT_MEASURE_STRUCT;
442 while (next_symbol(symptr)) {
446 switch (symptr->howlen) {
448 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
452 /* e_no_len and e_number */
453 len = symptr->length;
457 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
460 /* endianness doesn't influence the size of a type */
461 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
463 Perl_croak(aTHX_ "Invalid type '%c' in %s",
464 (int)TYPE_NO_MODIFIERS(symptr->code),
466 case '.' | TYPE_IS_SHRIEKING:
467 case '@' | TYPE_IS_SHRIEKING:
471 case 'U': /* XXXX Is it correct? */
474 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
475 (int) TYPE_NO_MODIFIERS(symptr->code),
482 tempsym_t savsym = *symptr;
483 symptr->patptr = savsym.grpbeg;
484 symptr->patend = savsym.grpend;
485 /* XXXX Theoretically, we need to measure many times at
486 different positions, since the subexpression may contain
487 alignment commands, but be not of aligned length.
488 Need to detect this and croak(). */
489 size = measure_struct(symptr);
493 case 'X' | TYPE_IS_SHRIEKING:
494 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
496 if (!len) /* Avoid division by 0 */
498 len = total % len; /* Assumed: the start is aligned. */
503 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
505 case 'x' | TYPE_IS_SHRIEKING:
506 if (!len) /* Avoid division by 0 */
508 star = total % len; /* Assumed: the start is aligned. */
509 if (star) /* Other portable ways? */
533 size = sizeof(char*);
543 /* locate matching closing parenthesis or bracket
544 * returns char pointer to char after match, or NULL
547 S_group_end(pTHX_ const char *patptr, const char *patend, char ender)
549 PERL_ARGS_ASSERT_GROUP_END;
551 while (patptr < patend) {
552 const char c = *patptr++;
559 while (patptr < patend && *patptr != '\n')
563 patptr = group_end(patptr, patend, ')') + 1;
565 patptr = group_end(patptr, patend, ']') + 1;
567 Perl_croak(aTHX_ "No group ending character '%c' found in template",
569 NOT_REACHED; /* NOTREACHED */
573 /* Convert unsigned decimal number to binary.
574 * Expects a pointer to the first digit and address of length variable
575 * Advances char pointer to 1st non-digit char and returns number
578 S_get_num(pTHX_ const char *patptr, I32 *lenptr )
580 I32 len = *patptr++ - '0';
582 PERL_ARGS_ASSERT_GET_NUM;
584 while (isDIGIT(*patptr)) {
585 if (len >= 0x7FFFFFFF/10)
586 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
587 len = (len * 10) + (*patptr++ - '0');
593 /* The marvellous template parsing routine: Using state stored in *symptr,
594 * locates next template code and count
597 S_next_symbol(pTHX_ tempsym_t* symptr )
599 const char* patptr = symptr->patptr;
600 const char* const patend = symptr->patend;
602 PERL_ARGS_ASSERT_NEXT_SYMBOL;
604 symptr->flags &= ~FLAG_SLASH;
606 while (patptr < patend) {
607 if (isSPACE(*patptr))
609 else if (*patptr == '#') {
611 while (patptr < patend && *patptr != '\n')
616 /* We should have found a template code */
617 I32 code = *patptr++ & 0xFF;
618 U32 inherited_modifiers = 0;
620 if (code == ','){ /* grandfather in commas but with a warning */
621 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
622 symptr->flags |= FLAG_COMMA;
623 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
624 "Invalid type ',' in %s", _action( symptr ) );
629 /* for '(', skip to ')' */
631 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
632 Perl_croak(aTHX_ "()-group starts with a count in %s",
634 symptr->grpbeg = patptr;
635 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
636 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
637 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
641 /* look for group modifiers to inherit */
642 if (TYPE_ENDIANNESS(symptr->flags)) {
643 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
644 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
647 /* look for modifiers */
648 while (patptr < patend) {
653 modifier = TYPE_IS_SHRIEKING;
654 allowed = "sSiIlLxXnNvV@.";
657 modifier = TYPE_IS_BIG_ENDIAN;
658 allowed = ENDIANNESS_ALLOWED_TYPES;
661 modifier = TYPE_IS_LITTLE_ENDIAN;
662 allowed = ENDIANNESS_ALLOWED_TYPES;
673 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
674 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
675 allowed, _action( symptr ) );
677 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
678 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
679 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
680 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
681 TYPE_ENDIANNESS_MASK)
682 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
683 *patptr, _action( symptr ) );
685 if ((code & modifier)) {
686 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
687 "Duplicate modifier '%c' after '%c' in %s",
688 *patptr, (int) TYPE_NO_MODIFIERS(code),
696 /* inherit modifiers */
697 code |= inherited_modifiers;
699 /* look for count and/or / */
700 if (patptr < patend) {
701 if (isDIGIT(*patptr)) {
702 patptr = get_num( patptr, &symptr->length );
703 symptr->howlen = e_number;
705 } else if (*patptr == '*') {
707 symptr->howlen = e_star;
709 } else if (*patptr == '[') {
710 const char* lenptr = ++patptr;
711 symptr->howlen = e_number;
712 patptr = group_end( patptr, patend, ']' ) + 1;
713 /* what kind of [] is it? */
714 if (isDIGIT(*lenptr)) {
715 lenptr = get_num( lenptr, &symptr->length );
717 Perl_croak(aTHX_ "Malformed integer in [] in %s",
720 tempsym_t savsym = *symptr;
721 symptr->patend = patptr-1;
722 symptr->patptr = lenptr;
723 savsym.length = measure_struct(symptr);
727 symptr->howlen = e_no_len;
732 while (patptr < patend) {
733 if (isSPACE(*patptr))
735 else if (*patptr == '#') {
737 while (patptr < patend && *patptr != '\n')
742 if (*patptr == '/') {
743 symptr->flags |= FLAG_SLASH;
745 if (patptr < patend &&
746 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
747 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
754 /* at end - no count, no / */
755 symptr->howlen = e_no_len;
760 symptr->patptr = patptr;
764 symptr->patptr = patptr;
769 There is no way to cleanly handle the case where we should process the
770 string per byte in its upgraded form while it's really in downgraded form
771 (e.g. estimates like strend-s as an upper bound for the number of
772 characters left wouldn't work). So if we foresee the need of this
773 (pattern starts with U or contains U0), we want to work on the encoded
774 version of the string. Users are advised to upgrade their pack string
775 themselves if they need to do a lot of unpacks like this on it
778 need_utf8(const char *pat, const char *patend)
782 PERL_ARGS_ASSERT_NEED_UTF8;
784 while (pat < patend) {
787 pat = (const char *) memchr(pat, '\n', patend-pat);
788 if (!pat) return FALSE;
789 } else if (pat[0] == 'U') {
790 if (first || pat[1] == '0') return TRUE;
791 } else first = FALSE;
798 first_symbol(const char *pat, const char *patend) {
799 PERL_ARGS_ASSERT_FIRST_SYMBOL;
801 while (pat < patend) {
802 if (pat[0] != '#') return pat[0];
804 pat = (const char *) memchr(pat, '\n', patend-pat);
813 =head1 Pack and Unpack
815 =for apidoc unpackstring
817 The engine implementing the unpack() Perl function.
819 Using the template pat..patend, this function unpacks the string
820 s..strend into a number of mortal SVs, which it pushes onto the perl
821 argument (@_) stack (so you will need to issue a C<PUTBACK> before and
822 C<SPAGAIN> after the call to this function). It returns the number of
825 The strend and patend pointers should point to the byte following the last
826 character of each string.
828 Although this function returns its values on the perl argument stack, it
829 doesn't take any parameters from that stack (and thus in particular
830 there's no need to do a PUSHMARK before calling it, unlike L</call_pv> for
836 Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
840 PERL_ARGS_ASSERT_UNPACKSTRING;
842 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
843 else if (need_utf8(pat, patend)) {
844 /* We probably should try to avoid this in case a scalar context call
845 wouldn't get to the "U0" */
846 STRLEN len = strend - s;
847 s = (char *) bytes_to_utf8((U8 *) s, &len);
850 flags |= FLAG_DO_UTF8;
853 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
854 flags |= FLAG_PARSE_UTF8;
856 TEMPSYM_INIT(&sym, pat, patend, flags);
858 return unpack_rec(&sym, s, s, strend, NULL );
862 S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
866 const I32 start_sp_offset = SP - PL_stack_base;
871 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
873 bool explicit_length;
874 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
875 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
877 PERL_ARGS_ASSERT_UNPACK_REC;
879 symptr->strbeg = s - strbeg;
881 while (next_symbol(symptr)) {
884 I32 datumtype = symptr->code;
886 /* do first one only unless in list context
887 / is implemented by unpacking the count, then popping it from the
888 stack, so must check that we're not in the middle of a / */
890 && (SP - PL_stack_base == start_sp_offset + 1)
891 && (datumtype != '/') ) /* XXX can this be omitted */
894 switch (howlen = symptr->howlen) {
896 len = strend - strbeg; /* long enough */
899 /* e_no_len and e_number */
900 len = symptr->length;
904 explicit_length = TRUE;
906 beyond = s >= strend;
908 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
910 /* props nonzero means we can process this letter. */
911 const long size = props & PACK_SIZE_MASK;
912 const long howmany = (strend - s) / size;
916 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
917 if (len && unpack_only_one) len = 1;
923 needs_swap = NEEDS_SWAP(datumtype);
925 switch(TYPE_NO_ENDIANNESS(datumtype)) {
927 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
930 if (howlen == e_no_len)
931 len = 16; /* len is not specified */
939 tempsym_t savsym = *symptr;
940 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
941 symptr->flags |= group_modifiers;
942 symptr->patend = savsym.grpend;
943 symptr->previous = &savsym;
946 if (len && unpack_only_one) len = 1;
948 symptr->patptr = savsym.grpbeg;
949 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
950 else symptr->flags &= ~FLAG_PARSE_UTF8;
951 unpack_rec(symptr, s, strbeg, strend, &s);
952 if (s == strend && savsym.howlen == e_star)
953 break; /* No way to continue */
956 savsym.flags = symptr->flags & ~group_modifiers;
960 case '.' | TYPE_IS_SHRIEKING:
964 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
965 if (howlen == e_star) from = strbeg;
966 else if (len <= 0) from = s;
968 tempsym_t *group = symptr;
970 while (--len && group) group = group->previous;
971 from = group ? strbeg + group->strbeg : strbeg;
974 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
975 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
979 case '@' | TYPE_IS_SHRIEKING:
981 s = strbeg + symptr->strbeg;
982 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
986 Perl_croak(aTHX_ "'@' outside of string in unpack");
991 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
994 Perl_croak(aTHX_ "'@' outside of string in unpack");
998 case 'X' | TYPE_IS_SHRIEKING:
999 if (!len) /* Avoid division by 0 */
1002 const char *hop, *last;
1004 hop = last = strbeg;
1006 hop += UTF8SKIP(hop);
1013 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1017 len = (s - strbeg) % len;
1023 Perl_croak(aTHX_ "'X' outside of string in unpack");
1024 while (--s, UTF8_IS_CONTINUATION(*s)) {
1026 Perl_croak(aTHX_ "'X' outside of string in unpack");
1031 if (len > s - strbeg)
1032 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1036 case 'x' | TYPE_IS_SHRIEKING: {
1038 if (!len) /* Avoid division by 0 */
1040 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1041 else ai32 = (s - strbeg) % len;
1042 if (ai32 == 0) break;
1050 Perl_croak(aTHX_ "'x' outside of string in unpack");
1055 if (len > strend - s)
1056 Perl_croak(aTHX_ "'x' outside of string in unpack");
1061 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1067 /* Preliminary length estimate is assumed done in 'W' */
1068 if (len > strend - s) len = strend - s;
1074 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1075 if (hop >= strend) {
1077 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1082 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1084 } else if (len > strend - s)
1087 if (datumtype == 'Z') {
1088 /* 'Z' strips stuff after first null */
1089 const char *ptr, *end;
1091 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
1092 sv = newSVpvn(s, ptr-s);
1093 if (howlen == e_star) /* exact for 'Z*' */
1094 len = ptr-s + (ptr != strend ? 1 : 0);
1095 } else if (datumtype == 'A') {
1096 /* 'A' strips both nulls and spaces */
1098 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1099 for (ptr = s+len-1; ptr >= s; ptr--)
1100 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
1101 !isSPACE_utf8(ptr)) break;
1102 if (ptr >= s) ptr += UTF8SKIP(ptr);
1105 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1107 for (ptr = s+len-1; ptr >= s; ptr--)
1108 if (*ptr != 0 && !isSPACE(*ptr)) break;
1111 sv = newSVpvn(s, ptr-s);
1112 } else sv = newSVpvn(s, len);
1116 /* Undo any upgrade done due to need_utf8() */
1117 if (!(symptr->flags & FLAG_WAS_UTF8))
1118 sv_utf8_downgrade(sv, 0);
1126 if (howlen == e_star || len > (strend - s) * 8)
1127 len = (strend - s) * 8;
1130 while (len >= 8 && s < strend) {
1131 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
1136 cuv += PL_bitcount[*(U8 *)s++];
1139 if (len && s < strend) {
1141 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1142 if (datumtype == 'b')
1144 if (bits & 1) cuv++;
1149 if (bits & 0x80) cuv++;
1156 sv = sv_2mortal(newSV(len ? len : 1));
1159 if (datumtype == 'b') {
1161 const I32 ai32 = len;
1162 for (len = 0; len < ai32; len++) {
1163 if (len & 7) bits >>= 1;
1165 if (s >= strend) break;
1166 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1167 } else bits = *(U8 *) s++;
1168 *str++ = bits & 1 ? '1' : '0';
1172 const I32 ai32 = len;
1173 for (len = 0; len < ai32; len++) {
1174 if (len & 7) bits <<= 1;
1176 if (s >= strend) break;
1177 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1178 } else bits = *(U8 *) s++;
1179 *str++ = bits & 0x80 ? '1' : '0';
1183 SvCUR_set(sv, str - SvPVX_const(sv));
1190 /* Preliminary length estimate, acceptable for utf8 too */
1191 if (howlen == e_star || len > (strend - s) * 2)
1192 len = (strend - s) * 2;
1194 sv = sv_2mortal(newSV(len ? len : 1));
1198 if (datumtype == 'h') {
1201 for (len = 0; len < ai32; len++) {
1202 if (len & 1) bits >>= 4;
1204 if (s >= strend) break;
1205 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1206 } else bits = * (U8 *) s++;
1208 *str++ = PL_hexdigit[bits & 15];
1212 const I32 ai32 = len;
1213 for (len = 0; len < ai32; len++) {
1214 if (len & 1) bits <<= 4;
1216 if (s >= strend) break;
1217 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1218 } else bits = *(U8 *) s++;
1220 *str++ = PL_hexdigit[(bits >> 4) & 15];
1225 SvCUR_set(sv, str - SvPVX_const(sv));
1232 if (explicit_length)
1233 /* Switch to "character" mode */
1234 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1239 while (len-- > 0 && s < strend) {
1244 aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1245 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1246 if (retlen == (STRLEN) -1 || retlen == 0)
1247 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1251 aint = *(U8 *)(s)++;
1252 if (aint >= 128 && datumtype != 'C') /* fake up signed chars */
1256 else if (checksum > bits_in_uv)
1257 cdouble += (NV)aint;
1265 while (len-- > 0 && s < strend) {
1267 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1268 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1269 if (retlen == (STRLEN) -1 || retlen == 0)
1270 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1274 else if (checksum > bits_in_uv)
1275 cdouble += (NV) val;
1279 } else if (!checksum)
1281 const U8 ch = *(U8 *) s++;
1284 else if (checksum > bits_in_uv)
1285 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1287 while (len-- > 0) cuv += *(U8 *) s++;
1291 if (explicit_length && howlen != e_star) {
1292 /* Switch to "bytes in UTF-8" mode */
1293 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
1295 /* Should be impossible due to the need_utf8() test */
1296 Perl_croak(aTHX_ "U0 mode on a byte string");
1300 if (len > strend - s) len = strend - s;
1302 if (len && unpack_only_one) len = 1;
1306 while (len-- > 0 && s < strend) {
1310 U8 result[UTF8_MAXLEN];
1311 const char *ptr = s;
1313 /* Bug: warns about bad utf8 even if we are short on bytes
1314 and will break out of the loop */
1315 if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1318 len = UTF8SKIP(result);
1319 if (!uni_to_bytes(aTHX_ &ptr, strend,
1320 (char *) &result[1], len-1, 'U')) break;
1321 auv = NATIVE_TO_UNI(utf8n_to_uvchr(result,
1324 UTF8_ALLOW_DEFAULT));
1327 auv = NATIVE_TO_UNI(utf8n_to_uvchr((U8*)s,
1330 UTF8_ALLOW_DEFAULT));
1331 if (retlen == (STRLEN) -1 || retlen == 0)
1332 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1337 else if (checksum > bits_in_uv)
1338 cdouble += (NV) auv;
1343 case 's' | TYPE_IS_SHRIEKING:
1344 #if SHORTSIZE != SIZE16
1347 SHIFT_VAR(utf8, s, strend, ashort, datumtype, needs_swap);
1350 else if (checksum > bits_in_uv)
1351 cdouble += (NV)ashort;
1363 #if U16SIZE > SIZE16
1366 SHIFT16(utf8, s, strend, &ai16, datumtype, needs_swap);
1367 #if U16SIZE > SIZE16
1373 else if (checksum > bits_in_uv)
1374 cdouble += (NV)ai16;
1379 case 'S' | TYPE_IS_SHRIEKING:
1380 #if SHORTSIZE != SIZE16
1382 unsigned short aushort;
1383 SHIFT_VAR(utf8, s, strend, aushort, datumtype, needs_swap,
1387 else if (checksum > bits_in_uv)
1388 cdouble += (NV)aushort;
1401 #if U16SIZE > SIZE16
1404 SHIFT16(utf8, s, strend, &au16, datumtype, needs_swap);
1405 if (datumtype == 'n')
1406 au16 = PerlSock_ntohs(au16);
1407 if (datumtype == 'v')
1411 else if (checksum > bits_in_uv)
1412 cdouble += (NV) au16;
1417 case 'v' | TYPE_IS_SHRIEKING:
1418 case 'n' | TYPE_IS_SHRIEKING:
1421 # if U16SIZE > SIZE16
1424 SHIFT16(utf8, s, strend, &ai16, datumtype, needs_swap);
1425 /* There should never be any byte-swapping here. */
1426 assert(!TYPE_ENDIANNESS(datumtype));
1427 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
1428 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1429 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
1430 ai16 = (I16) vtohs((U16) ai16);
1433 else if (checksum > bits_in_uv)
1434 cdouble += (NV) ai16;
1440 case 'i' | TYPE_IS_SHRIEKING:
1443 SHIFT_VAR(utf8, s, strend, aint, datumtype, needs_swap);
1446 else if (checksum > bits_in_uv)
1447 cdouble += (NV)aint;
1453 case 'I' | TYPE_IS_SHRIEKING:
1456 SHIFT_VAR(utf8, s, strend, auint, datumtype, needs_swap);
1459 else if (checksum > bits_in_uv)
1460 cdouble += (NV)auint;
1468 SHIFT_VAR(utf8, s, strend, aiv, datumtype, needs_swap);
1471 else if (checksum > bits_in_uv)
1480 SHIFT_VAR(utf8, s, strend, auv, datumtype, needs_swap);
1483 else if (checksum > bits_in_uv)
1489 case 'l' | TYPE_IS_SHRIEKING:
1490 #if LONGSIZE != SIZE32
1493 SHIFT_VAR(utf8, s, strend, along, datumtype, needs_swap);
1496 else if (checksum > bits_in_uv)
1497 cdouble += (NV)along;
1508 #if U32SIZE > SIZE32
1511 SHIFT32(utf8, s, strend, &ai32, datumtype, needs_swap);
1512 #if U32SIZE > SIZE32
1513 if (ai32 > 2147483647) ai32 -= 4294967296;
1517 else if (checksum > bits_in_uv)
1518 cdouble += (NV)ai32;
1523 case 'L' | TYPE_IS_SHRIEKING:
1524 #if LONGSIZE != SIZE32
1526 unsigned long aulong;
1527 SHIFT_VAR(utf8, s, strend, aulong, datumtype, needs_swap);
1530 else if (checksum > bits_in_uv)
1531 cdouble += (NV)aulong;
1544 #if U32SIZE > SIZE32
1547 SHIFT32(utf8, s, strend, &au32, datumtype, needs_swap);
1548 if (datumtype == 'N')
1549 au32 = PerlSock_ntohl(au32);
1550 if (datumtype == 'V')
1554 else if (checksum > bits_in_uv)
1555 cdouble += (NV)au32;
1560 case 'V' | TYPE_IS_SHRIEKING:
1561 case 'N' | TYPE_IS_SHRIEKING:
1564 #if U32SIZE > SIZE32
1567 SHIFT32(utf8, s, strend, &ai32, datumtype, needs_swap);
1568 /* There should never be any byte swapping here. */
1569 assert(!TYPE_ENDIANNESS(datumtype));
1570 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1571 ai32 = (I32)PerlSock_ntohl((U32)ai32);
1572 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1573 ai32 = (I32)vtohl((U32)ai32);
1576 else if (checksum > bits_in_uv)
1577 cdouble += (NV)ai32;
1585 SHIFT_VAR(utf8, s, strend, aptr, datumtype, needs_swap);
1586 /* newSVpv generates undef if aptr is NULL */
1587 mPUSHs(newSVpv(aptr, 0));
1595 while (len > 0 && s < strend) {
1597 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1598 auv = (auv << 7) | (ch & 0x7f);
1599 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
1607 if (++bytes >= sizeof(UV)) { /* promote to string */
1610 sv = Perl_newSVpvf(aTHX_ "%.*"UVuf, (int)TYPE_DIGITS(UV), auv);
1611 while (s < strend) {
1612 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1613 sv = mul128(sv, (U8)(ch & 0x7f));
1619 t = SvPV_nolen_const(sv);
1628 if ((s >= strend) && bytes)
1629 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
1633 if (symptr->howlen == e_star)
1634 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
1636 if (s + sizeof(char*) <= strend) {
1638 SHIFT_VAR(utf8, s, strend, aptr, datumtype, needs_swap);
1639 /* newSVpvn generates undef if aptr is NULL */
1640 PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP));
1643 #if defined(HAS_QUAD) && IVSIZE >= 8
1647 SHIFT_VAR(utf8, s, strend, aquad, datumtype, needs_swap);
1649 mPUSHs(newSViv((IV)aquad));
1650 else if (checksum > bits_in_uv)
1651 cdouble += (NV)aquad;
1659 SHIFT_VAR(utf8, s, strend, auquad, datumtype, needs_swap);
1661 mPUSHs(newSVuv((UV)auquad));
1662 else if (checksum > bits_in_uv)
1663 cdouble += (NV)auquad;
1669 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
1673 SHIFT_VAR(utf8, s, strend, afloat, datumtype, needs_swap);
1683 SHIFT_VAR(utf8, s, strend, adouble, datumtype, needs_swap);
1693 SHIFT_BYTES(utf8, s, strend, anv.bytes, sizeof(anv.bytes),
1694 datumtype, needs_swap);
1701 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
1705 SHIFT_BYTES(utf8, s, strend, aldouble.bytes,
1706 sizeof(aldouble.bytes), datumtype, needs_swap);
1707 /* The most common long double format, the x86 80-bit
1708 * extended precision, has either 2 or 6 unused bytes,
1709 * which may contain garbage, which may contain
1710 * unintentional data. While we do zero the bytes of
1711 * the long double data in pack(), here in unpack() we
1712 * don't, because it's really hard to envision that
1713 * reading the long double off aldouble would be
1714 * affected by the unused bytes.
1716 * Note that trying to unpack 'long doubles' of 'long
1717 * doubles' packed in another system is in the general
1718 * case doomed without having more detail. */
1720 mPUSHn(aldouble.ld);
1722 cdouble += aldouble.ld;
1728 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
1729 sv = sv_2mortal(newSV(l));
1730 if (l) SvPOK_on(sv);
1733 while (next_uni_uu(aTHX_ &s, strend, &len)) {
1738 next_uni_uu(aTHX_ &s, strend, &a);
1739 next_uni_uu(aTHX_ &s, strend, &b);
1740 next_uni_uu(aTHX_ &s, strend, &c);
1741 next_uni_uu(aTHX_ &s, strend, &d);
1742 hunk[0] = (char)((a << 2) | (b >> 4));
1743 hunk[1] = (char)((b << 4) | (c >> 2));
1744 hunk[2] = (char)((c << 6) | d);
1746 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
1754 /* possible checksum byte */
1755 const char *skip = s+UTF8SKIP(s);
1756 if (skip < strend && *skip == '\n')
1762 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
1766 len = PL_uudmap[*(U8*)s++] & 077;
1768 if (s < strend && ISUUCHAR(*s))
1769 a = PL_uudmap[*(U8*)s++] & 077;
1772 if (s < strend && ISUUCHAR(*s))
1773 b = PL_uudmap[*(U8*)s++] & 077;
1776 if (s < strend && ISUUCHAR(*s))
1777 c = PL_uudmap[*(U8*)s++] & 077;
1780 if (s < strend && ISUUCHAR(*s))
1781 d = PL_uudmap[*(U8*)s++] & 077;
1784 hunk[0] = (char)((a << 2) | (b >> 4));
1785 hunk[1] = (char)((b << 4) | (c >> 2));
1786 hunk[2] = (char)((c << 6) | d);
1788 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
1793 else /* possible checksum byte */
1794 if (s + 1 < strend && s[1] == '\n')
1801 } /* End of switch */
1804 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
1805 (checksum > bits_in_uv &&
1806 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
1809 anv = (NV) (1 << (checksum & 15));
1810 while (checksum >= 16) {
1814 while (cdouble < 0.0)
1816 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
1817 sv = newSVnv(cdouble);
1820 if (checksum < bits_in_uv) {
1821 UV mask = ((UV)1 << checksum) - 1;
1830 if (symptr->flags & FLAG_SLASH){
1831 if (SP - PL_stack_base - start_sp_offset <= 0)
1833 if( next_symbol(symptr) ){
1834 if( symptr->howlen == e_number )
1835 Perl_croak(aTHX_ "Count after length/code in unpack" );
1837 /* ...end of char buffer then no decent length available */
1838 Perl_croak(aTHX_ "length/code after end of string in unpack" );
1840 /* take top of stack (hope it's numeric) */
1843 Perl_croak(aTHX_ "Negative '/' count in unpack" );
1846 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
1848 datumtype = symptr->code;
1849 explicit_length = FALSE;
1857 return SP - PL_stack_base - start_sp_offset;
1864 I32 gimme = GIMME_V;
1867 const char *pat = SvPV_const(left, llen);
1868 const char *s = SvPV_const(right, rlen);
1869 const char *strend = s + rlen;
1870 const char *patend = pat + llen;
1874 cnt = unpackstring(pat, patend, s, strend,
1875 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
1876 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
1879 if ( !cnt && gimme == G_SCALAR )
1880 PUSHs(&PL_sv_undef);
1885 doencodes(U8 *h, const U8 *s, I32 len)
1887 *h++ = PL_uuemap[len];
1889 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1890 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
1891 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
1892 *h++ = PL_uuemap[(077 & (s[2] & 077))];
1897 const U8 r = (len > 1 ? s[1] : '\0');
1898 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1899 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
1900 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
1901 *h++ = PL_uuemap[0];
1908 S_is_an_int(pTHX_ const char *s, STRLEN l)
1910 SV *result = newSVpvn(s, l);
1911 char *const result_c = SvPV_nolen(result); /* convenience */
1912 char *out = result_c;
1916 PERL_ARGS_ASSERT_IS_AN_INT;
1924 SvREFCNT_dec(result);
1947 SvREFCNT_dec(result);
1953 SvCUR_set(result, out - result_c);
1957 /* pnum must be '\0' terminated */
1959 S_div128(pTHX_ SV *pnum, bool *done)
1962 char * const s = SvPV(pnum, len);
1966 PERL_ARGS_ASSERT_DIV128;
1970 const int i = m * 10 + (*t - '0');
1971 const int r = (i >> 7); /* r < 10 */
1979 SvCUR_set(pnum, (STRLEN) (t - s));
1984 =for apidoc packlist
1986 The engine implementing pack() Perl function.
1992 Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, SV **beglist, SV **endlist )
1996 PERL_ARGS_ASSERT_PACKLIST;
1998 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
2000 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
2001 Also make sure any UTF8 flag is loaded */
2002 SvPV_force_nolen(cat);
2004 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
2006 (void)pack_rec( cat, &sym, beglist, endlist );
2009 /* like sv_utf8_upgrade, but also repoint the group start markers */
2011 marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2014 const char *from_ptr, *from_start, *from_end, **marks, **m;
2015 char *to_start, *to_ptr;
2017 if (SvUTF8(sv)) return;
2019 from_start = SvPVX_const(sv);
2020 from_end = from_start + SvCUR(sv);
2021 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2022 if (!NATIVE_BYTE_IS_INVARIANT(*from_ptr)) break;
2023 if (from_ptr == from_end) {
2024 /* Simple case: no character needs to be changed */
2029 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
2030 Newx(to_start, len, char);
2031 Copy(from_start, to_start, from_ptr-from_start, char);
2032 to_ptr = to_start + (from_ptr-from_start);
2034 Newx(marks, sym_ptr->level+2, const char *);
2035 for (group=sym_ptr; group; group = group->previous)
2036 marks[group->level] = from_start + group->strbeg;
2037 marks[sym_ptr->level+1] = from_end+1;
2038 for (m = marks; *m < from_ptr; m++)
2039 *m = to_start + (*m-from_start);
2041 for (;from_ptr < from_end; from_ptr++) {
2042 while (*m == from_ptr) *m++ = to_ptr;
2043 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
2047 while (*m == from_ptr) *m++ = to_ptr;
2048 if (m != marks + sym_ptr->level+1) {
2051 Perl_croak(aTHX_ "panic: marks beyond string end, m=%p, marks=%p, "
2052 "level=%d", m, marks, sym_ptr->level);
2054 for (group=sym_ptr; group; group = group->previous)
2055 group->strbeg = marks[group->level] - to_start;
2060 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
2061 from_start -= SvIVX(sv);
2064 SvFLAGS(sv) &= ~SVf_OOK;
2067 Safefree(from_start);
2068 SvPV_set(sv, to_start);
2069 SvCUR_set(sv, to_ptr - to_start);
2074 /* Exponential string grower. Makes string extension effectively O(n)
2075 needed says how many extra bytes we need (not counting the final '\0')
2076 Only grows the string if there is an actual lack of space
2079 S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
2080 const STRLEN cur = SvCUR(sv);
2081 const STRLEN len = SvLEN(sv);
2084 PERL_ARGS_ASSERT_SV_EXP_GROW;
2086 if (len - cur > needed) return SvPVX(sv);
2087 extend = needed > len ? needed : len;
2088 return SvGROW(sv, len+extend+1);
2092 S_sv_check_infnan(pTHX_ SV *sv, I32 datumtype)
2095 if (UNLIKELY(SvAMAGIC(sv)))
2097 if (UNLIKELY(isinfnansv(sv))) {
2098 const I32 c = TYPE_NO_MODIFIERS(datumtype);
2099 const NV nv = SvNV_nomg(sv);
2101 Perl_croak(aTHX_ "Cannot compress %"NVgf" in pack", nv);
2103 Perl_croak(aTHX_ "Cannot pack %"NVgf" with '%c'", nv, (int) c);
2108 #define SvIV_no_inf(sv,d) \
2109 ((sv) = S_sv_check_infnan(aTHX_ sv,d), SvIV_nomg(sv))
2110 #define SvUV_no_inf(sv,d) \
2111 ((sv) = S_sv_check_infnan(aTHX_ sv,d), SvUV_nomg(sv))
2115 S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
2117 tempsym_t lookahead;
2118 I32 items = endlist - beglist;
2119 bool found = next_symbol(symptr);
2120 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
2121 bool warn_utf8 = ckWARN(WARN_UTF8);
2124 PERL_ARGS_ASSERT_PACK_REC;
2126 if (symptr->level == 0 && found && symptr->code == 'U') {
2127 marked_upgrade(aTHX_ cat, symptr);
2128 symptr->flags |= FLAG_DO_UTF8;
2131 symptr->strbeg = SvCUR(cat);
2137 SV *lengthcode = NULL;
2138 I32 datumtype = symptr->code;
2139 howlen_t howlen = symptr->howlen;
2140 char *start = SvPVX(cat);
2141 char *cur = start + SvCUR(cat);
2144 #define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2145 #define PEEKFROM (lengthcode ? lengthcode : items > 0 ? *beglist : &PL_sv_no)
2149 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2153 /* e_no_len and e_number */
2154 len = symptr->length;
2159 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
2161 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2162 /* We can process this letter. */
2163 STRLEN size = props & PACK_SIZE_MASK;
2164 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2168 /* Look ahead for next symbol. Do we have code/code? */
2169 lookahead = *symptr;
2170 found = next_symbol(&lookahead);
2171 if (symptr->flags & FLAG_SLASH) {
2173 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
2174 if (strchr("aAZ", lookahead.code)) {
2175 if (lookahead.howlen == e_number) count = lookahead.length;
2178 count = sv_len_utf8(*beglist);
2181 if (lookahead.code == 'Z') count++;
2184 if (lookahead.howlen == e_number && lookahead.length < items)
2185 count = lookahead.length;
2188 lookahead.howlen = e_number;
2189 lookahead.length = count;
2190 lengthcode = sv_2mortal(newSViv(count));
2193 needs_swap = NEEDS_SWAP(datumtype);
2195 /* Code inside the switch must take care to properly update
2196 cat (CUR length and '\0' termination) if it updated *cur and
2197 doesn't simply leave using break */
2198 switch (TYPE_NO_ENDIANNESS(datumtype)) {
2200 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2201 (int) TYPE_NO_MODIFIERS(datumtype));
2203 Perl_croak(aTHX_ "'%%' may not be used in pack");
2205 case '.' | TYPE_IS_SHRIEKING:
2207 if (howlen == e_star) from = start;
2208 else if (len == 0) from = cur;
2210 tempsym_t *group = symptr;
2212 while (--len && group) group = group->previous;
2213 from = group ? start + group->strbeg : start;
2216 len = SvIV_no_inf(fromstr, datumtype);
2218 case '@' | TYPE_IS_SHRIEKING:
2220 from = start + symptr->strbeg;
2222 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
2224 while (len && from < cur) {
2225 from += UTF8SKIP(from);
2229 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2231 /* Here we know from == cur */
2233 GROWING(0, cat, start, cur, len);
2234 Zero(cur, len, char);
2236 } else if (from < cur) {
2239 } else goto no_change;
2247 if (len > 0) goto grow;
2248 if (len == 0) goto no_change;
2255 tempsym_t savsym = *symptr;
2256 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2257 symptr->flags |= group_modifiers;
2258 symptr->patend = savsym.grpend;
2260 symptr->previous = &lookahead;
2263 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2264 else symptr->flags &= ~FLAG_PARSE_UTF8;
2265 was_utf8 = SvUTF8(cat);
2266 symptr->patptr = savsym.grpbeg;
2267 beglist = pack_rec(cat, symptr, beglist, endlist);
2268 if (SvUTF8(cat) != was_utf8)
2269 /* This had better be an upgrade while in utf8==0 mode */
2272 if (savsym.howlen == e_star && beglist == endlist)
2273 break; /* No way to continue */
2275 items = endlist - beglist;
2276 lookahead.flags = symptr->flags & ~group_modifiers;
2279 case 'X' | TYPE_IS_SHRIEKING:
2280 if (!len) /* Avoid division by 0 */
2287 hop += UTF8SKIP(hop);
2294 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2298 len = (cur-start) % len;
2302 if (len < 1) goto no_change;
2306 Perl_croak(aTHX_ "'%c' outside of string in pack",
2307 (int) TYPE_NO_MODIFIERS(datumtype));
2308 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2310 Perl_croak(aTHX_ "'%c' outside of string in pack",
2311 (int) TYPE_NO_MODIFIERS(datumtype));
2317 if (cur - start < len)
2318 Perl_croak(aTHX_ "'%c' outside of string in pack",
2319 (int) TYPE_NO_MODIFIERS(datumtype));
2322 if (cur < start+symptr->strbeg) {
2323 /* Make sure group starts don't point into the void */
2325 const STRLEN length = cur-start;
2326 for (group = symptr;
2327 group && length < group->strbeg;
2328 group = group->previous) group->strbeg = length;
2329 lookahead.strbeg = length;
2332 case 'x' | TYPE_IS_SHRIEKING: {
2334 if (!len) /* Avoid division by 0 */
2336 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
2337 else ai32 = (cur - start) % len;
2338 if (ai32 == 0) goto no_change;
2350 aptr = SvPV_const(fromstr, fromlen);
2351 if (DO_UTF8(fromstr)) {
2352 const char *end, *s;
2354 if (!utf8 && !SvUTF8(cat)) {
2355 marked_upgrade(aTHX_ cat, symptr);
2356 lookahead.flags |= FLAG_DO_UTF8;
2357 lookahead.strbeg = symptr->strbeg;
2360 cur = start + SvCUR(cat);
2362 if (howlen == e_star) {
2363 if (utf8) goto string_copy;
2367 end = aptr + fromlen;
2368 fromlen = datumtype == 'Z' ? len-1 : len;
2369 while ((I32) fromlen > 0 && s < end) {
2374 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2377 if (datumtype == 'Z') len++;
2383 fromlen = len - fromlen;
2384 if (datumtype == 'Z') fromlen--;
2385 if (howlen == e_star) {
2387 if (datumtype == 'Z') len++;
2389 GROWING(0, cat, start, cur, len);
2390 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
2391 datumtype | TYPE_IS_PACK))
2392 Perl_croak(aTHX_ "panic: predicted utf8 length not available, "
2393 "for '%c', aptr=%p end=%p cur=%p, fromlen=%"UVuf,
2394 (int)datumtype, aptr, end, cur, (UV)fromlen);
2398 if (howlen == e_star) {
2400 if (datumtype == 'Z') len++;
2402 if (len <= (I32) fromlen) {
2404 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2406 /* assumes a byte expands to at most UTF8_EXPAND bytes on
2408 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2409 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
2411 while (fromlen > 0) {
2412 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
2418 if (howlen == e_star) {
2420 if (datumtype == 'Z') len++;
2422 if (len <= (I32) fromlen) {
2424 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2426 GROWING(0, cat, start, cur, len);
2427 Copy(aptr, cur, fromlen, char);
2431 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2438 const char *str, *end;
2445 str = SvPV_const(fromstr, fromlen);
2446 end = str + fromlen;
2447 if (DO_UTF8(fromstr)) {
2449 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2451 utf8_source = FALSE;
2452 utf8_flags = 0; /* Unused, but keep compilers happy */
2454 if (howlen == e_star) len = fromlen;
2455 field_len = (len+7)/8;
2456 GROWING(utf8, cat, start, cur, field_len);
2457 if (len > (I32)fromlen) len = fromlen;
2460 if (datumtype == 'B')
2464 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2466 } else bits |= *str++ & 1;
2467 if (l & 7) bits <<= 1;
2469 PUSH_BYTE(utf8, cur, bits);
2474 /* datumtype == 'b' */
2478 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2479 if (val & 1) bits |= 0x80;
2480 } else if (*str++ & 1)
2482 if (l & 7) bits >>= 1;
2484 PUSH_BYTE(utf8, cur, bits);
2490 if (datumtype == 'B')
2491 bits <<= 7 - (l & 7);
2493 bits >>= 7 - (l & 7);
2494 PUSH_BYTE(utf8, cur, bits);
2497 /* Determine how many chars are left in the requested field */
2499 if (howlen == e_star) field_len = 0;
2500 else field_len -= l;
2501 Zero(cur, field_len, char);
2507 const char *str, *end;
2514 str = SvPV_const(fromstr, fromlen);
2515 end = str + fromlen;
2516 if (DO_UTF8(fromstr)) {
2518 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2520 utf8_source = FALSE;
2521 utf8_flags = 0; /* Unused, but keep compilers happy */
2523 if (howlen == e_star) len = fromlen;
2524 field_len = (len+1)/2;
2525 GROWING(utf8, cat, start, cur, field_len);
2526 if (!utf8 && len > (I32)fromlen) len = fromlen;
2529 if (datumtype == 'H')
2533 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2534 if (val < 256 && isALPHA(val))
2535 bits |= (val + 9) & 0xf;
2538 } else if (isALPHA(*str))
2539 bits |= (*str++ + 9) & 0xf;
2541 bits |= *str++ & 0xf;
2542 if (l & 1) bits <<= 4;
2544 PUSH_BYTE(utf8, cur, bits);
2552 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2553 if (val < 256 && isALPHA(val))
2554 bits |= ((val + 9) & 0xf) << 4;
2556 bits |= (val & 0xf) << 4;
2557 } else if (isALPHA(*str))
2558 bits |= ((*str++ + 9) & 0xf) << 4;
2560 bits |= (*str++ & 0xf) << 4;
2561 if (l & 1) bits >>= 4;
2563 PUSH_BYTE(utf8, cur, bits);
2569 PUSH_BYTE(utf8, cur, bits);
2572 /* Determine how many chars are left in the requested field */
2574 if (howlen == e_star) field_len = 0;
2575 else field_len -= l;
2576 Zero(cur, field_len, char);
2584 aiv = SvIV_no_inf(fromstr, datumtype);
2585 if ((-128 > aiv || aiv > 127))
2586 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2587 "Character in 'c' format wrapped in pack");
2588 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
2593 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2599 aiv = SvIV_no_inf(fromstr, datumtype);
2600 if ((0 > aiv || aiv > 0xff))
2601 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2602 "Character in 'C' format wrapped in pack");
2603 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
2608 U8 in_bytes = (U8)IN_BYTES;
2610 end = start+SvLEN(cat)-1;
2611 if (utf8) end -= UTF8_MAXLEN-1;
2615 auv = SvUV_no_inf(fromstr, datumtype);
2616 if (in_bytes) auv = auv % 0x100;
2621 SvCUR_set(cat, cur - start);
2623 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2624 end = start+SvLEN(cat)-UTF8_MAXLEN;
2626 cur = (char *) uvchr_to_utf8_flags((U8 *) cur,
2629 0 : UNICODE_ALLOW_ANY);
2634 SvCUR_set(cat, cur - start);
2635 marked_upgrade(aTHX_ cat, symptr);
2636 lookahead.flags |= FLAG_DO_UTF8;
2637 lookahead.strbeg = symptr->strbeg;
2640 cur = start + SvCUR(cat);
2641 end = start+SvLEN(cat)-UTF8_MAXLEN;
2644 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2645 "Character in 'W' format wrapped in pack");
2650 SvCUR_set(cat, cur - start);
2651 GROWING(0, cat, start, cur, len+1);
2652 end = start+SvLEN(cat)-1;
2654 *(U8 *) cur++ = (U8)auv;
2663 if (!(symptr->flags & FLAG_DO_UTF8)) {
2664 marked_upgrade(aTHX_ cat, symptr);
2665 lookahead.flags |= FLAG_DO_UTF8;
2666 lookahead.strbeg = symptr->strbeg;
2672 end = start+SvLEN(cat);
2673 if (!utf8) end -= UTF8_MAXLEN;
2677 auv = SvUV_no_inf(fromstr, datumtype);
2679 U8 buffer[UTF8_MAXLEN], *endb;
2680 endb = uvchr_to_utf8_flags(buffer, UNI_TO_NATIVE(auv),
2682 0 : UNICODE_ALLOW_ANY);
2683 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
2685 SvCUR_set(cat, cur - start);
2686 GROWING(0, cat, start, cur,
2687 len+(endb-buffer)*UTF8_EXPAND);
2688 end = start+SvLEN(cat);
2690 cur = S_bytes_to_uni(buffer, endb-buffer, cur, 0);
2694 SvCUR_set(cat, cur - start);
2695 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2696 end = start+SvLEN(cat)-UTF8_MAXLEN;
2698 cur = (char *) uvchr_to_utf8_flags((U8 *) cur, UNI_TO_NATIVE(auv),
2700 0 : UNICODE_ALLOW_ANY);
2705 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
2711 anv = SvNV(fromstr);
2712 # if defined(VMS) && !defined(_IEEE_FP)
2713 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
2714 * on Alpha; fake it if we don't have them.
2718 else if (anv < -FLT_MAX)
2720 else afloat = (float)anv;
2722 /* a simple cast to float is undefined if outside
2723 * the range of values that can be represented */
2724 afloat = (float)(anv > FLT_MAX ? NV_INF :
2725 anv < -FLT_MAX ? -NV_INF : anv);
2727 PUSH_VAR(utf8, cur, afloat, needs_swap);
2735 anv = SvNV(fromstr);
2736 # if defined(VMS) && !defined(_IEEE_FP)
2737 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
2738 * on Alpha; fake it if we don't have them.
2742 else if (anv < -DBL_MAX)
2744 else adouble = (double)anv;
2746 adouble = (double)anv;
2748 PUSH_VAR(utf8, cur, adouble, needs_swap);
2753 Zero(&anv, 1, NV); /* can be long double with unused bits */
2757 /* to work round a gcc/x86 bug; don't use SvNV */
2758 anv.nv = sv_2nv(fromstr);
2760 anv.nv = SvNV(fromstr);
2762 PUSH_BYTES(utf8, cur, anv.bytes, sizeof(anv.bytes), needs_swap);
2766 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2769 /* long doubles can have unused bits, which may be nonzero */
2770 Zero(&aldouble, 1, long double);
2774 /* to work round a gcc/x86 bug; don't use SvNV */
2775 aldouble.ld = (long double)sv_2nv(fromstr);
2777 aldouble.ld = (long double)SvNV(fromstr);
2779 PUSH_BYTES(utf8, cur, aldouble.bytes, sizeof(aldouble.bytes),
2785 case 'n' | TYPE_IS_SHRIEKING:
2790 ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
2791 ai16 = PerlSock_htons(ai16);
2792 PUSH16(utf8, cur, &ai16, FALSE);
2795 case 'v' | TYPE_IS_SHRIEKING:
2800 ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
2802 PUSH16(utf8, cur, &ai16, FALSE);
2805 case 'S' | TYPE_IS_SHRIEKING:
2806 #if SHORTSIZE != SIZE16
2808 unsigned short aushort;
2810 aushort = SvUV_no_inf(fromstr, datumtype);
2811 PUSH_VAR(utf8, cur, aushort, needs_swap);
2821 au16 = (U16)SvUV_no_inf(fromstr, datumtype);
2822 PUSH16(utf8, cur, &au16, needs_swap);
2825 case 's' | TYPE_IS_SHRIEKING:
2826 #if SHORTSIZE != SIZE16
2830 ashort = SvIV_no_inf(fromstr, datumtype);
2831 PUSH_VAR(utf8, cur, ashort, needs_swap);
2841 ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
2842 PUSH16(utf8, cur, &ai16, needs_swap);
2846 case 'I' | TYPE_IS_SHRIEKING:
2850 auint = SvUV_no_inf(fromstr, datumtype);
2851 PUSH_VAR(utf8, cur, auint, needs_swap);
2858 aiv = SvIV_no_inf(fromstr, datumtype);
2859 PUSH_VAR(utf8, cur, aiv, needs_swap);
2866 auv = SvUV_no_inf(fromstr, datumtype);
2867 PUSH_VAR(utf8, cur, auv, needs_swap);
2874 S_sv_check_infnan(aTHX_ fromstr, datumtype);
2875 anv = SvNV_nomg(fromstr);
2879 SvCUR_set(cat, cur - start);
2880 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
2883 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
2884 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
2885 any negative IVs will have already been got by the croak()
2886 above. IOK is untrue for fractions, so we test them
2887 against UV_MAX_P1. */
2888 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
2889 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
2890 char *in = buf + sizeof(buf);
2891 UV auv = SvUV_nomg(fromstr);
2894 *--in = (char)((auv & 0x7f) | 0x80);
2897 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
2898 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2899 in, (buf + sizeof(buf)) - in);
2900 } else if (SvPOKp(fromstr))
2902 else if (SvNOKp(fromstr)) {
2903 /* 10**NV_MAX_10_EXP is the largest power of 10
2904 so 10**(NV_MAX_10_EXP+1) is definitely unrepresentable
2905 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
2906 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
2907 And with that many bytes only Inf can overflow.
2908 Some C compilers are strict about integral constant
2909 expressions so we conservatively divide by a slightly
2910 smaller integer instead of multiplying by the exact
2911 floating-point value.
2913 #ifdef NV_MAX_10_EXP
2914 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
2915 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
2917 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
2918 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
2920 char *in = buf + sizeof(buf);
2922 anv = Perl_floor(anv);
2924 const NV next = Perl_floor(anv / 128);
2925 if (in <= buf) /* this cannot happen ;-) */
2926 Perl_croak(aTHX_ "Cannot compress integer in pack");
2927 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
2930 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
2931 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2932 in, (buf + sizeof(buf)) - in);
2941 /* Copy string and check for compliance */
2942 from = SvPV_nomg_const(fromstr, len);
2943 if ((norm = is_an_int(from, len)) == NULL)
2944 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
2946 Newx(result, len, char);
2949 while (!done) *--in = div128(norm, &done) | 0x80;
2950 result[len - 1] &= 0x7F; /* clear continue bit */
2951 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2952 in, (result + len) - in);
2954 SvREFCNT_dec(norm); /* free norm */
2959 case 'i' | TYPE_IS_SHRIEKING:
2963 aint = SvIV_no_inf(fromstr, datumtype);
2964 PUSH_VAR(utf8, cur, aint, needs_swap);
2967 case 'N' | TYPE_IS_SHRIEKING:
2972 au32 = SvUV_no_inf(fromstr, datumtype);
2973 au32 = PerlSock_htonl(au32);
2974 PUSH32(utf8, cur, &au32, FALSE);
2977 case 'V' | TYPE_IS_SHRIEKING:
2982 au32 = SvUV_no_inf(fromstr, datumtype);
2984 PUSH32(utf8, cur, &au32, FALSE);
2987 case 'L' | TYPE_IS_SHRIEKING:
2988 #if LONGSIZE != SIZE32
2990 unsigned long aulong;
2992 aulong = SvUV_no_inf(fromstr, datumtype);
2993 PUSH_VAR(utf8, cur, aulong, needs_swap);
3003 au32 = SvUV_no_inf(fromstr, datumtype);
3004 PUSH32(utf8, cur, &au32, needs_swap);
3007 case 'l' | TYPE_IS_SHRIEKING:
3008 #if LONGSIZE != SIZE32
3012 along = SvIV_no_inf(fromstr, datumtype);
3013 PUSH_VAR(utf8, cur, along, needs_swap);
3023 ai32 = SvIV_no_inf(fromstr, datumtype);
3024 PUSH32(utf8, cur, &ai32, needs_swap);
3027 #if defined(HAS_QUAD) && IVSIZE >= 8
3032 auquad = (Uquad_t) SvUV_no_inf(fromstr, datumtype);
3033 PUSH_VAR(utf8, cur, auquad, needs_swap);
3040 aquad = (Quad_t)SvIV_no_inf(fromstr, datumtype);
3041 PUSH_VAR(utf8, cur, aquad, needs_swap);
3046 len = 1; /* assume SV is correct length */
3047 GROWING(utf8, cat, start, cur, sizeof(char *));
3054 SvGETMAGIC(fromstr);
3055 if (!SvOK(fromstr)) aptr = NULL;
3057 /* XXX better yet, could spirit away the string to
3058 * a safe spot and hang on to it until the result
3059 * of pack() (and all copies of the result) are
3062 if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
3063 !SvREADONLY(fromstr)))) {
3064 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3065 "Attempt to pack pointer to temporary value");
3067 if (SvPOK(fromstr) || SvNIOK(fromstr))
3068 aptr = SvPV_nomg_const_nolen(fromstr);
3070 aptr = SvPV_force_flags_nolen(fromstr, 0);
3072 PUSH_VAR(utf8, cur, aptr, needs_swap);
3076 const char *aptr, *aend;
3080 if (len <= 2) len = 45;
3081 else len = len / 3 * 3;
3083 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3084 "Field too wide in 'u' format in pack");
3087 aptr = SvPV_const(fromstr, fromlen);
3088 from_utf8 = DO_UTF8(fromstr);
3090 aend = aptr + fromlen;
3091 fromlen = sv_len_utf8_nomg(fromstr);
3092 } else aend = NULL; /* Unused, but keep compilers happy */
3093 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
3094 while (fromlen > 0) {
3097 U8 hunk[1+63/3*4+1];
3099 if ((I32)fromlen > len)
3105 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3106 'u' | TYPE_IS_PACK)) {
3108 SvCUR_set(cat, cur - start);
3109 Perl_croak(aTHX_ "panic: string is shorter than advertised, "
3110 "aptr=%p, aend=%p, buffer=%p, todo=%ld",
3111 aptr, aend, buffer, (long) todo);
3113 end = doencodes(hunk, (const U8 *)buffer, todo);
3115 end = doencodes(hunk, (const U8 *)aptr, todo);
3118 PUSH_BYTES(utf8, cur, hunk, end-hunk, 0);
3125 SvCUR_set(cat, cur - start);
3127 *symptr = lookahead;
3136 dSP; dMARK; dORIGMARK; dTARGET;
3139 SV *pat_sv = *++MARK;
3140 const char *pat = SvPV_const(pat_sv, fromlen);
3141 const char *patend = pat + fromlen;
3147 packlist(cat, pat, patend, MARK, SP + 1);
3157 * c-indentation-style: bsd
3159 * indent-tabs-mode: nil
3162 * ex: set ts=8 sts=4 sw=4 et: