3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
18 * [p.653 of _The Lord of the Rings_, IV/iv: "Of Herbs and Stewed Rabbit"]
21 /* This file contains pp ("push/pop") functions that
22 * execute the opcodes that make up a perl program. A typical pp function
23 * expects to find its arguments on the stack, and usually pushes its
24 * results onto the stack, hence the 'pp' terminology. Each OP structure
25 * contains a pointer to the relevant pp_foo() function.
27 * This particular file just contains pp_pack() and pp_unpack(). See the
28 * other pp*.c files for the rest of the pp_ functions.
32 #define PERL_IN_PP_PACK_C
35 /* Types used by pack/unpack */
37 e_no_len, /* no length */
38 e_number, /* number, [] */
42 typedef struct tempsym {
43 const char* patptr; /* current template char */
44 const char* patend; /* one after last char */
45 const char* grpbeg; /* 1st char of ()-group */
46 const char* grpend; /* end of ()-group */
47 I32 code; /* template code (!<>) */
48 U32 flags; /* /=4, comma=2, pack=1 */
49 /* and group modifiers */
50 SSize_t length; /* length/repeat count */
51 howlen_t howlen; /* how length is given */
52 int level; /* () nesting level */
53 STRLEN strbeg; /* offset of group start */
54 struct tempsym *previous; /* previous group */
57 #define TEMPSYM_INIT(symptr, p, e, f) \
59 (symptr)->patptr = (p); \
60 (symptr)->patend = (e); \
61 (symptr)->grpbeg = NULL; \
62 (symptr)->grpend = NULL; \
63 (symptr)->grpend = NULL; \
65 (symptr)->length = 0; \
66 (symptr)->howlen = e_no_len; \
67 (symptr)->level = 0; \
68 (symptr)->flags = (f); \
69 (symptr)->strbeg = 0; \
70 (symptr)->previous = NULL; \
78 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
81 U8 bytes[sizeof(long double)];
88 /* Maximum number of bytes to which a byte can grow due to upgrade */
92 * Offset for integer pack/unpack.
94 * On architectures where I16 and I32 aren't really 16 and 32 bits,
95 * which for now are all Crays, pack and unpack have to play games.
99 * These values are required for portability of pack() output.
100 * If they're not right on your machine, then pack() and unpack()
101 * wouldn't work right anyway; you'll need to apply the Cray hack.
102 * (I'd like to check them with #if, but you can't use sizeof() in
103 * the preprocessor.) --???
106 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
107 defines are now in config.h. --Andy Dougherty April 1998
112 /* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
115 #if U16SIZE <= SIZE16 && U32SIZE <= SIZE32
116 # define OFF16(p) ((char *) (p))
117 # define OFF32(p) ((char *) (p))
118 #elif BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
119 # define OFF16(p) ((char*)(p))
120 # define OFF32(p) ((char*)(p))
121 #elif BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
122 # define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
123 # define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
125 # error "bad cray byte order"
128 #define PUSH16(utf8, cur, p, needs_swap) \
129 PUSH_BYTES(utf8, cur, OFF16(p), SIZE16, needs_swap)
130 #define PUSH32(utf8, cur, p, needs_swap) \
131 PUSH_BYTES(utf8, cur, OFF32(p), SIZE32, needs_swap)
133 #if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
134 # define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_LITTLE_ENDIAN)
135 #elif BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
136 # define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_BIG_ENDIAN)
138 # error "Unsupported byteorder"
139 /* Need to add code here to re-instate mixed endian support.
140 NEEDS_SWAP would need to hold a flag indicating which action to
141 take, and S_reverse_copy and the code in S_utf8_to_bytes would need
142 logic adding to deal with any mixed-endian transformations needed.
146 /* Only to be used inside a loop (see the break) */
147 #define SHIFT_BYTES(utf8, s, strend, buf, len, datumtype, needs_swap) \
149 if (UNLIKELY(utf8)) { \
150 if (!S_utf8_to_bytes(aTHX_ &s, strend, \
151 (char *) (buf), len, datumtype)) break; \
153 if (UNLIKELY(needs_swap)) \
154 S_reverse_copy(s, (char *) (buf), len); \
156 Copy(s, (char *) (buf), len, char); \
161 #define SHIFT16(utf8, s, strend, p, datumtype, needs_swap) \
162 SHIFT_BYTES(utf8, s, strend, OFF16(p), SIZE16, datumtype, needs_swap)
164 #define SHIFT32(utf8, s, strend, p, datumtype, needs_swap) \
165 SHIFT_BYTES(utf8, s, strend, OFF32(p), SIZE32, datumtype, needs_swap)
167 #define SHIFT_VAR(utf8, s, strend, var, datumtype, needs_swap) \
168 SHIFT_BYTES(utf8, s, strend, &(var), sizeof(var), datumtype, needs_swap)
170 #define PUSH_VAR(utf8, aptr, var, needs_swap) \
171 PUSH_BYTES(utf8, aptr, &(var), sizeof(var), needs_swap)
173 /* Avoid stack overflow due to pathological templates. 100 should be plenty. */
174 #define MAX_SUB_TEMPLATE_LEVEL 100
176 /* flags (note that type modifiers can also be used as flags!) */
177 #define FLAG_WAS_UTF8 0x40
178 #define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
179 #define FLAG_UNPACK_ONLY_ONE 0x10
180 #define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
181 #define FLAG_SLASH 0x04
182 #define FLAG_COMMA 0x02
183 #define FLAG_PACK 0x01
186 S_mul128(pTHX_ SV *sv, U8 m)
189 char *s = SvPV(sv, len);
192 PERL_ARGS_ASSERT_MUL128;
194 if (! memBEGINs(s, len, "0000")) { /* need to grow sv */
195 SV * const tmpNew = newSVpvs("0000000000");
197 sv_catsv(tmpNew, sv);
198 SvREFCNT_dec(sv); /* free old sv */
203 while (!*t) /* trailing '\0'? */
206 const U32 i = ((*t - '0') << 7) + m;
207 *(t--) = '0' + (char)(i % 10);
213 /* Explosives and implosives. */
215 #define ISUUCHAR(ch) inRANGE(NATIVE_TO_LATIN1(ch), \
216 NATIVE_TO_LATIN1(' '), \
217 NATIVE_TO_LATIN1('a') - 1)
220 #define TYPE_IS_SHRIEKING 0x100
221 #define TYPE_IS_BIG_ENDIAN 0x200
222 #define TYPE_IS_LITTLE_ENDIAN 0x400
223 #define TYPE_IS_PACK 0x800
224 #define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
225 #define TYPE_MODIFIERS(t) ((t) & ~0xFF)
226 #define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
228 # define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
229 # define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
231 # define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
233 #define PACK_SIZE_CANNOT_CSUM 0x80
234 #define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
235 #define PACK_SIZE_MASK 0x3F
237 #include "packsizetables.inc"
240 S_reverse_copy(const char *src, char *dest, STRLEN len)
248 utf8_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
256 val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
257 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
258 if (retlen == (STRLEN) -1)
260 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
261 (int) TYPE_NO_MODIFIERS(datumtype));
263 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
264 "Character in '%c' format wrapped in unpack",
265 (int) TYPE_NO_MODIFIERS(datumtype));
272 #define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
273 utf8_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
277 S_utf8_to_bytes(pTHX_ const char **s, const char *end, const char *buf, SSize_t buf_len, I32 datumtype)
281 const char *from = *s;
283 const U32 flags = ckWARN(WARN_UTF8) ?
284 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
285 const bool needs_swap = NEEDS_SWAP(datumtype);
287 if (UNLIKELY(needs_swap))
290 for (;buf_len > 0; buf_len--) {
291 if (from >= end) return FALSE;
292 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
293 if (retlen == (STRLEN) -1) {
294 from += UTF8_SAFE_SKIP(from, end);
296 } else from += retlen;
301 if (UNLIKELY(needs_swap))
302 *(U8 *)--buf = (U8)val;
304 *(U8 *)buf++ = (U8)val;
306 /* We have enough characters for the buffer. Did we have problems ? */
309 /* Rewalk the string fragment while warning */
311 const U32 flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
312 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
313 if (ptr >= end) break;
314 utf8n_to_uvchr((U8 *) ptr, end-ptr, &retlen, flags);
316 if (from > end) from = end;
319 Perl_ck_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
320 WARN_PACK : WARN_UNPACK),
321 "Character(s) in '%c' format wrapped in %s",
322 (int) TYPE_NO_MODIFIERS(datumtype),
323 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
330 S_my_bytes_to_utf8(const U8 *start, STRLEN len, char *dest, const bool needs_swap) {
331 PERL_ARGS_ASSERT_MY_BYTES_TO_UTF8;
333 if (UNLIKELY(needs_swap)) {
334 const U8 *p = start + len;
335 while (p-- > start) {
336 append_utf8_from_native_byte(*p, (U8 **) & dest);
339 const U8 * const end = start + len;
340 while (start < end) {
341 append_utf8_from_native_byte(*start, (U8 **) & dest);
348 #define PUSH_BYTES(utf8, cur, buf, len, needs_swap) \
350 if (UNLIKELY(utf8)) \
351 (cur) = my_bytes_to_utf8((U8 *) buf, len, (cur), needs_swap); \
353 if (UNLIKELY(needs_swap)) \
354 S_reverse_copy((char *)(buf), cur, len); \
356 Copy(buf, cur, len, char); \
361 #define SAFE_UTF8_EXPAND(var) \
363 if ((var) > SSize_t_MAX / UTF8_EXPAND) \
364 Perl_croak(aTHX_ "%s", "Out of memory during pack()"); \
365 (var) = (var) * UTF8_EXPAND; \
368 #define GROWING2(utf8, cat, start, cur, item_size, item_count) \
370 if (SSize_t_MAX / (item_size) < (item_count)) \
371 Perl_croak(aTHX_ "%s", "Out of memory during pack()"); \
372 GROWING((utf8), (cat), (start), (cur), (item_size) * (item_count)); \
375 #define GROWING(utf8, cat, start, cur, in_len) \
377 STRLEN glen = (in_len); \
378 STRLEN catcur = (STRLEN)((cur) - (start)); \
379 if (utf8) SAFE_UTF8_EXPAND(glen); \
380 if (SSize_t_MAX - glen < catcur) \
381 Perl_croak(aTHX_ "%s", "Out of memory during pack()"); \
382 if (catcur + glen >= SvLEN(cat)) { \
383 (start) = sv_exp_grow(cat, glen); \
384 (cur) = (start) + SvCUR(cat); \
388 #define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
390 const STRLEN glen = (in_len); \
392 if (utf8) SAFE_UTF8_EXPAND(gl); \
393 if ((cur) + gl >= (start) + SvLEN(cat)) { \
395 SvCUR_set((cat), (cur) - (start)); \
396 (start) = sv_exp_grow(cat, gl); \
397 (cur) = (start) + SvCUR(cat); \
399 PUSH_BYTES(utf8, cur, buf, glen, 0); \
402 #define PUSH_BYTE(utf8, s, byte) \
405 const U8 au8 = (byte); \
406 (s) = my_bytes_to_utf8(&au8, 1, (s), 0);\
407 } else *(U8 *)(s)++ = (byte); \
410 /* Only to be used inside a loop (see the break) */
411 #define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
414 if (str >= end) break; \
415 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
416 if (retlen == (STRLEN) -1) { \
418 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
423 static const char *_action( const tempsym_t* symptr )
425 return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
428 /* Returns the sizeof() struct described by pat */
430 S_measure_struct(pTHX_ tempsym_t* symptr)
434 PERL_ARGS_ASSERT_MEASURE_STRUCT;
436 while (next_symbol(symptr)) {
439 switch (symptr->howlen) {
441 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
445 /* e_no_len and e_number */
446 len = symptr->length;
450 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
453 /* endianness doesn't influence the size of a type */
454 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
456 Perl_croak(aTHX_ "Invalid type '%c' in %s",
457 (int)TYPE_NO_MODIFIERS(symptr->code),
459 case '.' | TYPE_IS_SHRIEKING:
460 case '@' | TYPE_IS_SHRIEKING:
464 case 'U': /* XXXX Is it correct? */
467 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
468 (int) TYPE_NO_MODIFIERS(symptr->code),
475 tempsym_t savsym = *symptr;
476 symptr->patptr = savsym.grpbeg;
477 symptr->patend = savsym.grpend;
478 /* XXXX Theoretically, we need to measure many times at
479 different positions, since the subexpression may contain
480 alignment commands, but be not of aligned length.
481 Need to detect this and croak(). */
482 size = measure_struct(symptr);
486 case 'X' | TYPE_IS_SHRIEKING:
487 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
489 if (!len) /* Avoid division by 0 */
491 len = total % len; /* Assumed: the start is aligned. */
496 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
498 case 'x' | TYPE_IS_SHRIEKING:
499 if (!len) /* Avoid division by 0 */
501 star = total % len; /* Assumed: the start is aligned. */
502 if (star) /* Other portable ways? */
526 size = sizeof(char*);
536 /* locate matching closing parenthesis or bracket
537 * returns char pointer to char after match, or NULL
540 S_group_end(pTHX_ const char *patptr, const char *patend, char ender)
542 PERL_ARGS_ASSERT_GROUP_END;
544 while (patptr < patend) {
545 const char c = *patptr++;
552 while (patptr < patend && *patptr != '\n')
556 patptr = group_end(patptr, patend, ')') + 1;
558 patptr = group_end(patptr, patend, ']') + 1;
560 Perl_croak(aTHX_ "No group ending character '%c' found in template",
562 NOT_REACHED; /* NOTREACHED */
566 /* Convert unsigned decimal number to binary.
567 * Expects a pointer to the first digit and address of length variable
568 * Advances char pointer to 1st non-digit char and returns number
571 S_get_num(pTHX_ const char *patptr, SSize_t *lenptr )
573 SSize_t len = *patptr++ - '0';
575 PERL_ARGS_ASSERT_GET_NUM;
577 while (isDIGIT(*patptr)) {
578 SSize_t nlen = (len * 10) + (*patptr++ - '0');
579 if (nlen < 0 || nlen/10 != len)
580 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
587 /* The marvellous template parsing routine: Using state stored in *symptr,
588 * locates next template code and count
591 S_next_symbol(pTHX_ tempsym_t* symptr )
593 const char* patptr = symptr->patptr;
594 const char* const patend = symptr->patend;
596 PERL_ARGS_ASSERT_NEXT_SYMBOL;
598 symptr->flags &= ~FLAG_SLASH;
600 while (patptr < patend) {
601 if (isSPACE(*patptr))
603 else if (*patptr == '#') {
605 while (patptr < patend && *patptr != '\n')
610 /* We should have found a template code */
611 I32 code = *patptr++ & 0xFF;
612 U32 inherited_modifiers = 0;
614 if (code == ','){ /* grandfather in commas but with a warning */
615 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
616 symptr->flags |= FLAG_COMMA;
617 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
618 "Invalid type ',' in %s", _action( symptr ) );
623 /* for '(', skip to ')' */
625 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
626 Perl_croak(aTHX_ "()-group starts with a count in %s",
628 symptr->grpbeg = patptr;
629 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
630 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
631 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
635 /* look for group modifiers to inherit */
636 if (TYPE_ENDIANNESS(symptr->flags)) {
637 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
638 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
641 /* look for modifiers */
642 while (patptr < patend) {
647 modifier = TYPE_IS_SHRIEKING;
648 allowed = "sSiIlLxXnNvV@.";
651 modifier = TYPE_IS_BIG_ENDIAN;
652 allowed = ENDIANNESS_ALLOWED_TYPES;
655 modifier = TYPE_IS_LITTLE_ENDIAN;
656 allowed = ENDIANNESS_ALLOWED_TYPES;
667 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
668 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
669 allowed, _action( symptr ) );
671 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
672 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
673 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
674 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
675 TYPE_ENDIANNESS_MASK)
676 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
677 *patptr, _action( symptr ) );
679 if ((code & modifier)) {
680 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
681 "Duplicate modifier '%c' after '%c' in %s",
682 *patptr, (int) TYPE_NO_MODIFIERS(code),
690 /* inherit modifiers */
691 code |= inherited_modifiers;
693 /* look for count and/or / */
694 if (patptr < patend) {
695 if (isDIGIT(*patptr)) {
696 patptr = get_num( patptr, &symptr->length );
697 symptr->howlen = e_number;
699 } else if (*patptr == '*') {
701 symptr->howlen = e_star;
703 } else if (*patptr == '[') {
704 const char* lenptr = ++patptr;
705 symptr->howlen = e_number;
706 patptr = group_end( patptr, patend, ']' ) + 1;
707 /* what kind of [] is it? */
708 if (isDIGIT(*lenptr)) {
709 lenptr = get_num( lenptr, &symptr->length );
711 Perl_croak(aTHX_ "Malformed integer in [] in %s",
714 tempsym_t savsym = *symptr;
715 symptr->patend = patptr-1;
716 symptr->patptr = lenptr;
717 savsym.length = measure_struct(symptr);
721 symptr->howlen = e_no_len;
726 while (patptr < patend) {
727 if (isSPACE(*patptr))
729 else if (*patptr == '#') {
731 while (patptr < patend && *patptr != '\n')
736 if (*patptr == '/') {
737 symptr->flags |= FLAG_SLASH;
739 if (patptr < patend &&
740 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
741 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
748 /* at end - no count, no / */
749 symptr->howlen = e_no_len;
754 symptr->patptr = patptr;
758 symptr->patptr = patptr;
763 There is no way to cleanly handle the case where we should process the
764 string per byte in its upgraded form while it's really in downgraded form
765 (e.g. estimates like strend-s as an upper bound for the number of
766 characters left wouldn't work). So if we foresee the need of this
767 (pattern starts with U or contains U0), we want to work on the encoded
768 version of the string. Users are advised to upgrade their pack string
769 themselves if they need to do a lot of unpacks like this on it
772 need_utf8(const char *pat, const char *patend)
776 PERL_ARGS_ASSERT_NEED_UTF8;
778 while (pat < patend) {
781 pat = (const char *) memchr(pat, '\n', patend-pat);
782 if (!pat) return FALSE;
783 } else if (pat[0] == 'U') {
784 if (first || pat[1] == '0') return TRUE;
785 } else first = FALSE;
792 first_symbol(const char *pat, const char *patend) {
793 PERL_ARGS_ASSERT_FIRST_SYMBOL;
795 while (pat < patend) {
796 if (pat[0] != '#') return pat[0];
798 pat = (const char *) memchr(pat, '\n', patend-pat);
807 =for apidoc_section Pack and Unpack
809 =for apidoc unpackstring
811 The engine implementing the C<unpack()> Perl function.
813 Using the template C<pat..patend>, this function unpacks the string
814 C<s..strend> into a number of mortal SVs, which it pushes onto the perl
815 argument (C<@_>) stack (so you will need to issue a C<PUTBACK> before and
816 C<SPAGAIN> after the call to this function). It returns the number of
819 The C<strend> and C<patend> pointers should point to the byte following the
820 last character of each string.
822 Although this function returns its values on the perl argument stack, it
823 doesn't take any parameters from that stack (and thus in particular
824 there's no need to do a C<PUSHMARK> before calling it, unlike L</call_pv> for
830 Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
834 PERL_ARGS_ASSERT_UNPACKSTRING;
836 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
837 else if (need_utf8(pat, patend)) {
838 /* We probably should try to avoid this in case a scalar context call
839 wouldn't get to the "U0" */
840 STRLEN len = strend - s;
841 s = (char *) bytes_to_utf8((U8 *) s, &len);
844 flags |= FLAG_DO_UTF8;
847 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
848 flags |= FLAG_PARSE_UTF8;
850 TEMPSYM_INIT(&sym, pat, patend, flags);
852 return unpack_rec(&sym, s, s, strend, NULL );
856 S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
860 const SSize_t start_sp_offset = SP - PL_stack_base;
862 SSize_t checksum = 0;
865 const SSize_t bits_in_uv = CHAR_BIT * sizeof(cuv);
867 bool explicit_length;
868 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
869 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
871 PERL_ARGS_ASSERT_UNPACK_REC;
873 symptr->strbeg = s - strbeg;
875 while (next_symbol(symptr)) {
878 I32 datumtype = symptr->code;
880 /* do first one only unless in list context
881 / is implemented by unpacking the count, then popping it from the
882 stack, so must check that we're not in the middle of a / */
884 && (SP - PL_stack_base == start_sp_offset + 1)
885 && (datumtype != '/') ) /* XXX can this be omitted */
888 switch (howlen = symptr->howlen) {
890 len = strend - strbeg; /* long enough */
893 /* e_no_len and e_number */
894 len = symptr->length;
898 explicit_length = TRUE;
900 beyond = s >= strend;
902 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
904 /* props nonzero means we can process this letter. */
905 const SSize_t size = props & PACK_SIZE_MASK;
906 const SSize_t howmany = (strend - s) / size;
910 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
911 if (len && unpack_only_one) len = 1;
917 needs_swap = NEEDS_SWAP(datumtype);
919 switch(TYPE_NO_ENDIANNESS(datumtype)) {
921 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
924 if (howlen == e_no_len)
925 len = 16; /* len is not specified */
933 tempsym_t savsym = *symptr;
934 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
935 symptr->flags |= group_modifiers;
936 symptr->patend = savsym.grpend;
937 symptr->previous = &savsym;
940 if (len && unpack_only_one) len = 1;
942 symptr->patptr = savsym.grpbeg;
943 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
944 else symptr->flags &= ~FLAG_PARSE_UTF8;
945 unpack_rec(symptr, s, strbeg, strend, &s);
946 if (s == strend && savsym.howlen == e_star)
947 break; /* No way to continue */
950 savsym.flags = symptr->flags & ~group_modifiers;
954 case '.' | TYPE_IS_SHRIEKING:
958 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
959 if (howlen == e_star) from = strbeg;
960 else if (len <= 0) from = s;
962 tempsym_t *group = symptr;
964 while (--len && group) group = group->previous;
965 from = group ? strbeg + group->strbeg : strbeg;
968 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
969 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
973 case '@' | TYPE_IS_SHRIEKING:
975 s = strbeg + symptr->strbeg;
976 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
980 Perl_croak(aTHX_ "'@' outside of string in unpack");
985 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
988 Perl_croak(aTHX_ "'@' outside of string in unpack");
992 case 'X' | TYPE_IS_SHRIEKING:
993 if (!len) /* Avoid division by 0 */
996 const char *hop, *last;
1000 hop += UTF8SKIP(hop);
1007 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1011 len = (s - strbeg) % len;
1017 Perl_croak(aTHX_ "'X' outside of string in unpack");
1018 while (--s, UTF8_IS_CONTINUATION(*s)) {
1020 Perl_croak(aTHX_ "'X' outside of string in unpack");
1025 if (len > s - strbeg)
1026 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1030 case 'x' | TYPE_IS_SHRIEKING: {
1032 if (!len) /* Avoid division by 0 */
1034 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1035 else ai32 = (s - strbeg) % len;
1036 if (ai32 == 0) break;
1044 Perl_croak(aTHX_ "'x' outside of string in unpack");
1049 if (len > strend - s)
1050 Perl_croak(aTHX_ "'x' outside of string in unpack");
1055 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1061 /* Preliminary length estimate is assumed done in 'W' */
1062 if (len > strend - s) len = strend - s;
1068 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1069 if (hop >= strend) {
1071 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1076 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1078 } else if (len > strend - s)
1081 if (datumtype == 'Z') {
1082 /* 'Z' strips stuff after first null */
1083 const char *ptr, *end;
1085 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
1086 sv = newSVpvn(s, ptr-s);
1087 if (howlen == e_star) /* exact for 'Z*' */
1088 len = ptr-s + (ptr != strend ? 1 : 0);
1089 } else if (datumtype == 'A') {
1090 /* 'A' strips both nulls and spaces */
1092 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1093 for (ptr = s+len-1; ptr >= s; ptr--) {
1095 && !UTF8_IS_CONTINUATION(*ptr)
1096 && !isSPACE_utf8_safe(ptr, strend))
1101 if (ptr >= s) ptr += UTF8SKIP(ptr);
1104 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1106 for (ptr = s+len-1; ptr >= s; ptr--)
1107 if (*ptr != 0 && !isSPACE(*ptr)) break;
1110 sv = newSVpvn(s, ptr-s);
1111 } else sv = newSVpvn(s, len);
1115 /* Undo any upgrade done due to need_utf8() */
1116 if (!(symptr->flags & FLAG_WAS_UTF8))
1117 sv_utf8_downgrade(sv, 0);
1125 if (howlen == e_star || len > (strend - s) * 8)
1126 len = (strend - s) * 8;
1129 while (len >= 8 && s < strend) {
1130 cuv += PL_bitcount[utf8_to_byte(aTHX_ &s, strend, datumtype)];
1135 cuv += PL_bitcount[*(U8 *)s++];
1138 if (len && s < strend) {
1140 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1141 if (datumtype == 'b')
1143 if (bits & 1) cuv++;
1148 if (bits & 0x80) cuv++;
1155 sv = sv_2mortal(newSV(len ? len : 1));
1158 if (datumtype == 'b') {
1160 const SSize_t ai32 = len;
1161 for (len = 0; len < ai32; len++) {
1162 if (len & 7) bits >>= 1;
1164 if (s >= strend) break;
1165 bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
1166 } else bits = *(U8 *) s++;
1167 *str++ = bits & 1 ? '1' : '0';
1171 const SSize_t ai32 = len;
1172 for (len = 0; len < ai32; len++) {
1173 if (len & 7) bits <<= 1;
1175 if (s >= strend) break;
1176 bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
1177 } else bits = *(U8 *) s++;
1178 *str++ = bits & 0x80 ? '1' : '0';
1182 SvCUR_set(sv, str - SvPVX_const(sv));
1189 /* Preliminary length estimate, acceptable for utf8 too */
1190 if (howlen == e_star || len > (strend - s) * 2)
1191 len = (strend - s) * 2;
1193 sv = sv_2mortal(newSV(len ? len : 1));
1197 if (datumtype == 'h') {
1200 for (len = 0; len < ai32; len++) {
1201 if (len & 1) bits >>= 4;
1203 if (s >= strend) break;
1204 bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
1205 } else bits = * (U8 *) s++;
1207 *str++ = PL_hexdigit[bits & 15];
1211 const SSize_t ai32 = len;
1212 for (len = 0; len < ai32; len++) {
1213 if (len & 1) bits <<= 4;
1215 if (s >= strend) break;
1216 bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
1217 } else bits = *(U8 *) s++;
1219 *str++ = PL_hexdigit[(bits >> 4) & 15];
1224 SvCUR_set(sv, str - SvPVX_const(sv));
1231 if (explicit_length)
1232 /* Switch to "character" mode */
1233 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1238 while (len-- > 0 && s < strend) {
1243 aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1244 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1245 if (retlen == (STRLEN) -1)
1246 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1250 aint = *(U8 *)(s)++;
1251 if (aint >= 128 && datumtype != 'C') /* fake up signed chars */
1255 else if (checksum > bits_in_uv)
1256 cdouble += (NV)aint;
1264 while (len-- > 0 && s < strend) {
1266 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1267 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1268 if (retlen == (STRLEN) -1)
1269 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1273 else if (checksum > bits_in_uv)
1274 cdouble += (NV) val;
1278 } else if (!checksum)
1280 const U8 ch = *(U8 *) s++;
1283 else if (checksum > bits_in_uv)
1284 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1286 while (len-- > 0) cuv += *(U8 *) s++;
1290 if (explicit_length && howlen != e_star) {
1291 /* Switch to "bytes in UTF-8" mode */
1292 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
1294 /* Should be impossible due to the need_utf8() test */
1295 Perl_croak(aTHX_ "U0 mode on a byte string");
1299 if (len > strend - s) len = strend - s;
1301 if (len && unpack_only_one) len = 1;
1305 while (len-- > 0 && s < strend) {
1309 U8 result[UTF8_MAXLEN+1];
1310 const char *ptr = s;
1312 /* Bug: warns about bad utf8 even if we are short on bytes
1313 and will break out of the loop */
1314 if (!S_utf8_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1317 len = UTF8SKIP(result);
1318 if (!S_utf8_to_bytes(aTHX_ &ptr, strend,
1319 (char *) &result[1], len-1, 'U')) break;
1320 auv = NATIVE_TO_UNI(utf8n_to_uvchr(result,
1323 UTF8_ALLOW_DEFAULT));
1326 auv = NATIVE_TO_UNI(utf8n_to_uvchr((U8*)s,
1329 UTF8_ALLOW_DEFAULT));
1330 if (retlen == (STRLEN) -1)
1331 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1336 else if (checksum > bits_in_uv)
1337 cdouble += (NV) auv;
1342 case 's' | TYPE_IS_SHRIEKING:
1343 #if SHORTSIZE != SIZE16
1346 SHIFT_VAR(utf8, s, strend, ashort, datumtype, needs_swap);
1349 else if (checksum > bits_in_uv)
1350 cdouble += (NV)ashort;
1362 #if U16SIZE > SIZE16
1365 SHIFT16(utf8, s, strend, &ai16, datumtype, needs_swap);
1366 #if U16SIZE > SIZE16
1372 else if (checksum > bits_in_uv)
1373 cdouble += (NV)ai16;
1378 case 'S' | TYPE_IS_SHRIEKING:
1379 #if SHORTSIZE != SIZE16
1381 unsigned short aushort;
1382 SHIFT_VAR(utf8, s, strend, aushort, datumtype, needs_swap);
1385 else if (checksum > bits_in_uv)
1386 cdouble += (NV)aushort;
1399 #if U16SIZE > SIZE16
1402 SHIFT16(utf8, s, strend, &au16, datumtype, needs_swap);
1403 if (datumtype == 'n')
1404 au16 = PerlSock_ntohs(au16);
1405 if (datumtype == 'v')
1409 else if (checksum > bits_in_uv)
1410 cdouble += (NV) au16;
1415 case 'v' | TYPE_IS_SHRIEKING:
1416 case 'n' | TYPE_IS_SHRIEKING:
1419 # if U16SIZE > SIZE16
1422 SHIFT16(utf8, s, strend, &ai16, datumtype, needs_swap);
1423 /* There should never be any byte-swapping here. */
1424 assert(!TYPE_ENDIANNESS(datumtype));
1425 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
1426 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1427 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
1428 ai16 = (I16) vtohs((U16) ai16);
1431 else if (checksum > bits_in_uv)
1432 cdouble += (NV) ai16;
1438 case 'i' | TYPE_IS_SHRIEKING:
1441 SHIFT_VAR(utf8, s, strend, aint, datumtype, needs_swap);
1444 else if (checksum > bits_in_uv)
1445 cdouble += (NV)aint;
1451 case 'I' | TYPE_IS_SHRIEKING:
1454 SHIFT_VAR(utf8, s, strend, auint, datumtype, needs_swap);
1457 else if (checksum > bits_in_uv)
1458 cdouble += (NV)auint;
1466 SHIFT_VAR(utf8, s, strend, aiv, datumtype, needs_swap);
1469 else if (checksum > bits_in_uv)
1478 SHIFT_VAR(utf8, s, strend, auv, datumtype, needs_swap);
1481 else if (checksum > bits_in_uv)
1487 case 'l' | TYPE_IS_SHRIEKING:
1488 #if LONGSIZE != SIZE32
1491 SHIFT_VAR(utf8, s, strend, along, datumtype, needs_swap);
1494 else if (checksum > bits_in_uv)
1495 cdouble += (NV)along;
1506 #if U32SIZE > SIZE32
1509 SHIFT32(utf8, s, strend, &ai32, datumtype, needs_swap);
1510 #if U32SIZE > SIZE32
1511 if (ai32 > 2147483647) ai32 -= 4294967296;
1515 else if (checksum > bits_in_uv)
1516 cdouble += (NV)ai32;
1521 case 'L' | TYPE_IS_SHRIEKING:
1522 #if LONGSIZE != SIZE32
1524 unsigned long aulong;
1525 SHIFT_VAR(utf8, s, strend, aulong, datumtype, needs_swap);
1528 else if (checksum > bits_in_uv)
1529 cdouble += (NV)aulong;
1542 #if U32SIZE > SIZE32
1545 SHIFT32(utf8, s, strend, &au32, datumtype, needs_swap);
1546 if (datumtype == 'N')
1547 au32 = PerlSock_ntohl(au32);
1548 if (datumtype == 'V')
1552 else if (checksum > bits_in_uv)
1553 cdouble += (NV)au32;
1558 case 'V' | TYPE_IS_SHRIEKING:
1559 case 'N' | TYPE_IS_SHRIEKING:
1562 #if U32SIZE > SIZE32
1565 SHIFT32(utf8, s, strend, &ai32, datumtype, needs_swap);
1566 /* There should never be any byte swapping here. */
1567 assert(!TYPE_ENDIANNESS(datumtype));
1568 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1569 ai32 = (I32)PerlSock_ntohl((U32)ai32);
1570 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1571 ai32 = (I32)vtohl((U32)ai32);
1574 else if (checksum > bits_in_uv)
1575 cdouble += (NV)ai32;
1583 SHIFT_VAR(utf8, s, strend, aptr, datumtype, needs_swap);
1584 /* newSVpv generates undef if aptr is NULL */
1585 mPUSHs(newSVpv(aptr, 0));
1593 while (len > 0 && s < strend) {
1595 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1596 auv = (auv << 7) | (ch & 0x7f);
1597 /* UTF8_IS_XXXXX not right here because this is a BER, not
1598 * UTF-8 format - using constant 0x80 */
1606 if (++bytes >= sizeof(UV)) { /* promote to string */
1609 sv = Perl_newSVpvf(aTHX_ "%.*" UVuf,
1610 (int)TYPE_DIGITS(UV), auv);
1611 while (s < strend) {
1612 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1613 sv = mul128(sv, (U8)(ch & 0x7f));
1619 t = SvPV_nolen_const(sv);
1628 if ((s >= strend) && bytes)
1629 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
1633 if (symptr->howlen == e_star)
1634 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
1636 if (s + sizeof(char*) <= strend) {
1638 SHIFT_VAR(utf8, s, strend, aptr, datumtype, needs_swap);
1639 /* newSVpvn generates undef if aptr is NULL */
1640 PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP));
1643 #if defined(HAS_QUAD) && IVSIZE >= 8
1647 SHIFT_VAR(utf8, s, strend, aquad, datumtype, needs_swap);
1649 mPUSHs(newSViv((IV)aquad));
1650 else if (checksum > bits_in_uv)
1651 cdouble += (NV)aquad;
1659 SHIFT_VAR(utf8, s, strend, auquad, datumtype, needs_swap);
1661 mPUSHs(newSVuv((UV)auquad));
1662 else if (checksum > bits_in_uv)
1663 cdouble += (NV)auquad;
1669 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
1673 SHIFT_VAR(utf8, s, strend, afloat, datumtype, needs_swap);
1683 SHIFT_VAR(utf8, s, strend, adouble, datumtype, needs_swap);
1693 SHIFT_BYTES(utf8, s, strend, anv.bytes, sizeof(anv.bytes),
1694 datumtype, needs_swap);
1701 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
1705 SHIFT_BYTES(utf8, s, strend, aldouble.bytes,
1706 sizeof(aldouble.bytes), datumtype, needs_swap);
1707 /* The most common long double format, the x86 80-bit
1708 * extended precision, has either 2 or 6 unused bytes,
1709 * which may contain garbage, which may contain
1710 * unintentional data. While we do zero the bytes of
1711 * the long double data in pack(), here in unpack() we
1712 * don't, because it's really hard to envision that
1713 * reading the long double off aldouble would be
1714 * affected by the unused bytes.
1716 * Note that trying to unpack 'long doubles' of 'long
1717 * doubles' packed in another system is in the general
1718 * case doomed without having more detail. */
1720 mPUSHn(aldouble.ld);
1722 cdouble += aldouble.ld;
1728 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
1729 sv = sv_2mortal(newSV(l));
1736 /* Note that all legal uuencoded strings are ASCII printables, so
1737 * have the same representation under UTF-8 vs not. This means we
1738 * can ignore UTF8ness on legal input. For illegal we stop at the
1739 * first failure, and don't report where/what that is, so again we
1740 * can ignore UTF8ness */
1742 while (s < strend && *s != ' ' && ISUUCHAR(*s)) {
1746 len = PL_uudmap[*(U8*)s++] & 077;
1748 if (s < strend && ISUUCHAR(*s))
1749 a = PL_uudmap[*(U8*)s++] & 077;
1752 if (s < strend && ISUUCHAR(*s))
1753 b = PL_uudmap[*(U8*)s++] & 077;
1756 if (s < strend && ISUUCHAR(*s))
1757 c = PL_uudmap[*(U8*)s++] & 077;
1760 if (s < strend && ISUUCHAR(*s))
1761 d = PL_uudmap[*(U8*)s++] & 077;
1764 hunk[0] = (char)((a << 2) | (b >> 4));
1765 hunk[1] = (char)((b << 4) | (c >> 2));
1766 hunk[2] = (char)((c << 6) | d);
1768 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
1773 else /* possible checksum byte */
1774 if (s + 1 < strend && s[1] == '\n')
1780 } /* End of switch */
1783 if (memCHRs("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
1784 (checksum > bits_in_uv &&
1785 memCHRs("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
1788 anv = (NV) (1 << (checksum & 15));
1789 while (checksum >= 16) {
1793 while (cdouble < 0.0)
1795 cdouble = Perl_modf(cdouble / anv, &trouble);
1796 #ifdef LONGDOUBLE_DOUBLEDOUBLE
1797 /* Workaround for powerpc doubledouble modfl bug:
1798 * close to 1.0L and -1.0L cdouble is 0, and trouble
1799 * is cdouble / anv. */
1800 if (trouble != Perl_ceil(trouble)) {
1802 if (cdouble > 1.0L) cdouble -= 1.0L;
1803 if (cdouble < -1.0L) cdouble += 1.0L;
1807 sv = newSVnv(cdouble);
1810 if (checksum < bits_in_uv) {
1811 UV mask = nBIT_MASK(checksum);
1820 if (symptr->flags & FLAG_SLASH){
1821 if (SP - PL_stack_base - start_sp_offset <= 0)
1823 if( next_symbol(symptr) ){
1824 if( symptr->howlen == e_number )
1825 Perl_croak(aTHX_ "Count after length/code in unpack" );
1827 /* ...end of char buffer then no decent length available */
1828 Perl_croak(aTHX_ "length/code after end of string in unpack" );
1830 /* take top of stack (hope it's numeric) */
1833 Perl_croak(aTHX_ "Negative '/' count in unpack" );
1836 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
1838 datumtype = symptr->code;
1839 explicit_length = FALSE;
1847 return SP - PL_stack_base - start_sp_offset;
1857 const char *pat = SvPV_const(left, llen);
1858 const char *s = SvPV_const(right, rlen);
1859 const char *strend = s + rlen;
1860 const char *patend = pat + llen;
1864 cnt = unpackstring(pat, patend, s, strend,
1865 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
1866 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
1869 if ( !cnt && gimme == G_SCALAR )
1870 PUSHs(&PL_sv_undef);
1875 doencodes(U8 *h, const U8 *s, SSize_t len)
1877 *h++ = PL_uuemap[len];
1879 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1880 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
1881 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
1882 *h++ = PL_uuemap[(077 & (s[2] & 077))];
1887 const U8 r = (len > 1 ? s[1] : '\0');
1888 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1889 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
1890 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
1891 *h++ = PL_uuemap[0];
1898 S_is_an_int(pTHX_ const char *s, STRLEN l)
1900 SV *result = newSVpvn(s, l);
1901 char *const result_c = SvPV_nolen(result); /* convenience */
1902 char *out = result_c;
1906 PERL_ARGS_ASSERT_IS_AN_INT;
1914 SvREFCNT_dec(result);
1937 SvREFCNT_dec(result);
1943 SvCUR_set(result, out - result_c);
1947 /* pnum must be '\0' terminated */
1949 S_div128(pTHX_ SV *pnum, bool *done)
1952 char * const s = SvPV(pnum, len);
1956 PERL_ARGS_ASSERT_DIV128;
1960 const int i = m * 10 + (*t - '0');
1961 const int r = (i >> 7); /* r < 10 */
1969 SvCUR_set(pnum, (STRLEN) (t - s));
1974 =for apidoc packlist
1976 The engine implementing C<pack()> Perl function.
1982 Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, SV **beglist, SV **endlist )
1986 PERL_ARGS_ASSERT_PACKLIST;
1988 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
1990 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
1991 Also make sure any UTF8 flag is loaded */
1992 SvPV_force_nolen(cat);
1994 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
1996 (void)pack_rec( cat, &sym, beglist, endlist );
1999 /* like sv_utf8_upgrade, but also repoint the group start markers */
2001 marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2004 const char *from_ptr, *from_start, *from_end, **marks, **m;
2005 char *to_start, *to_ptr;
2007 if (SvUTF8(sv)) return;
2009 from_start = SvPVX_const(sv);
2010 from_end = from_start + SvCUR(sv);
2011 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2012 if (!NATIVE_BYTE_IS_INVARIANT(*from_ptr)) break;
2013 if (from_ptr == from_end) {
2014 /* Simple case: no character needs to be changed */
2019 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
2020 Newx(to_start, len, char);
2021 Copy(from_start, to_start, from_ptr-from_start, char);
2022 to_ptr = to_start + (from_ptr-from_start);
2024 Newx(marks, sym_ptr->level+2, const char *);
2025 for (group=sym_ptr; group; group = group->previous)
2026 marks[group->level] = from_start + group->strbeg;
2027 marks[sym_ptr->level+1] = from_end+1;
2028 for (m = marks; *m < from_ptr; m++)
2029 *m = to_start + (*m-from_start);
2031 for (;from_ptr < from_end; from_ptr++) {
2032 while (*m == from_ptr) *m++ = to_ptr;
2033 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
2037 while (*m == from_ptr) *m++ = to_ptr;
2038 if (m != marks + sym_ptr->level+1) {
2041 Perl_croak(aTHX_ "panic: marks beyond string end, m=%p, marks=%p, "
2042 "level=%d", m, marks, sym_ptr->level);
2044 for (group=sym_ptr; group; group = group->previous)
2045 group->strbeg = marks[group->level] - to_start;
2050 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
2051 from_start -= SvIVX(sv);
2054 SvFLAGS(sv) &= ~SVf_OOK;
2057 Safefree(from_start);
2058 SvPV_set(sv, to_start);
2059 SvCUR_set(sv, to_ptr - to_start);
2064 /* Exponential string grower. Makes string extension effectively O(n)
2065 needed says how many extra bytes we need (not counting the final '\0')
2066 Only grows the string if there is an actual lack of space
2069 S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
2070 const STRLEN cur = SvCUR(sv);
2071 const STRLEN len = SvLEN(sv);
2074 PERL_ARGS_ASSERT_SV_EXP_GROW;
2076 if (len - cur > needed) return SvPVX(sv);
2077 extend = needed > len ? needed : len;
2078 return SvGROW(sv, len+extend+1);
2082 S_sv_check_infnan(pTHX_ SV *sv, I32 datumtype)
2085 if (UNLIKELY(SvAMAGIC(sv)))
2087 if (UNLIKELY(isinfnansv(sv))) {
2088 const I32 c = TYPE_NO_MODIFIERS(datumtype);
2089 const NV nv = SvNV_nomg(sv);
2091 Perl_croak(aTHX_ "Cannot compress %" NVgf " in pack", nv);
2093 Perl_croak(aTHX_ "Cannot pack %" NVgf " with '%c'", nv, (int) c);
2098 #define SvIV_no_inf(sv,d) \
2099 ((sv) = S_sv_check_infnan(aTHX_ sv,d), SvIV_nomg(sv))
2100 #define SvUV_no_inf(sv,d) \
2101 ((sv) = S_sv_check_infnan(aTHX_ sv,d), SvUV_nomg(sv))
2105 S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
2107 tempsym_t lookahead;
2108 SSize_t items = endlist - beglist;
2109 bool found = next_symbol(symptr);
2110 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
2111 bool warn_utf8 = ckWARN(WARN_UTF8);
2114 PERL_ARGS_ASSERT_PACK_REC;
2116 if (symptr->level == 0 && found && symptr->code == 'U') {
2117 marked_upgrade(aTHX_ cat, symptr);
2118 symptr->flags |= FLAG_DO_UTF8;
2121 symptr->strbeg = SvCUR(cat);
2127 SV *lengthcode = NULL;
2128 I32 datumtype = symptr->code;
2129 howlen_t howlen = symptr->howlen;
2130 char *start = SvPVX(cat);
2131 char *cur = start + SvCUR(cat);
2134 #define NEXTFROM (lengthcode ? lengthcode : items > 0 ? (--items, *beglist++) : &PL_sv_no)
2135 #define PEEKFROM (lengthcode ? lengthcode : items > 0 ? *beglist : &PL_sv_no)
2139 len = memCHRs("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2143 /* e_no_len and e_number */
2144 len = symptr->length;
2149 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
2151 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2152 /* We can process this letter. */
2153 STRLEN size = props & PACK_SIZE_MASK;
2154 GROWING2(utf8, cat, start, cur, size, (STRLEN)len);
2158 /* Look ahead for next symbol. Do we have code/code? */
2159 lookahead = *symptr;
2160 found = next_symbol(&lookahead);
2161 if (symptr->flags & FLAG_SLASH) {
2163 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
2164 if (memCHRs("aAZ", lookahead.code)) {
2165 if (lookahead.howlen == e_number) count = lookahead.length;
2168 count = sv_len_utf8(*beglist);
2171 if (lookahead.code == 'Z') count++;
2174 if (lookahead.howlen == e_number && lookahead.length < items)
2175 count = lookahead.length;
2178 lookahead.howlen = e_number;
2179 lookahead.length = count;
2180 lengthcode = sv_2mortal(newSViv(count));
2183 needs_swap = NEEDS_SWAP(datumtype);
2185 /* Code inside the switch must take care to properly update
2186 cat (CUR length and '\0' termination) if it updated *cur and
2187 doesn't simply leave using break */
2188 switch (TYPE_NO_ENDIANNESS(datumtype)) {
2190 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2191 (int) TYPE_NO_MODIFIERS(datumtype));
2193 Perl_croak(aTHX_ "'%%' may not be used in pack");
2195 case '.' | TYPE_IS_SHRIEKING:
2197 if (howlen == e_star) from = start;
2198 else if (len == 0) from = cur;
2200 tempsym_t *group = symptr;
2202 while (--len && group) group = group->previous;
2203 from = group ? start + group->strbeg : start;
2206 len = SvIV_no_inf(fromstr, datumtype);
2208 case '@' | TYPE_IS_SHRIEKING:
2210 from = start + symptr->strbeg;
2212 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
2214 while (len && from < cur) {
2215 from += UTF8SKIP(from);
2219 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2221 /* Here we know from == cur */
2223 GROWING(0, cat, start, cur, len);
2224 Zero(cur, len, char);
2226 } else if (from < cur) {
2229 } else goto no_change;
2237 if (len > 0) goto grow;
2238 if (len == 0) goto no_change;
2245 tempsym_t savsym = *symptr;
2246 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2247 symptr->flags |= group_modifiers;
2248 symptr->patend = savsym.grpend;
2250 symptr->previous = &lookahead;
2253 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2254 else symptr->flags &= ~FLAG_PARSE_UTF8;
2255 was_utf8 = SvUTF8(cat);
2256 symptr->patptr = savsym.grpbeg;
2257 beglist = pack_rec(cat, symptr, beglist, endlist);
2258 if (SvUTF8(cat) != was_utf8)
2259 /* This had better be an upgrade while in utf8==0 mode */
2262 if (savsym.howlen == e_star && beglist == endlist)
2263 break; /* No way to continue */
2265 items = endlist - beglist;
2266 lookahead.flags = symptr->flags & ~group_modifiers;
2269 case 'X' | TYPE_IS_SHRIEKING:
2270 if (!len) /* Avoid division by 0 */
2277 hop += UTF8SKIP(hop);
2284 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2288 len = (cur-start) % len;
2292 if (len < 1) goto no_change;
2296 Perl_croak(aTHX_ "'%c' outside of string in pack",
2297 (int) TYPE_NO_MODIFIERS(datumtype));
2298 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2300 Perl_croak(aTHX_ "'%c' outside of string in pack",
2301 (int) TYPE_NO_MODIFIERS(datumtype));
2307 if (cur - start < len)
2308 Perl_croak(aTHX_ "'%c' outside of string in pack",
2309 (int) TYPE_NO_MODIFIERS(datumtype));
2312 if (cur < start+symptr->strbeg) {
2313 /* Make sure group starts don't point into the void */
2315 const STRLEN length = cur-start;
2316 for (group = symptr;
2317 group && length < group->strbeg;
2318 group = group->previous) group->strbeg = length;
2319 lookahead.strbeg = length;
2322 case 'x' | TYPE_IS_SHRIEKING: {
2324 if (!len) /* Avoid division by 0 */
2326 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
2327 else ai32 = (cur - start) % len;
2328 if (ai32 == 0) goto no_change;
2340 aptr = SvPV_const(fromstr, fromlen);
2341 if (DO_UTF8(fromstr)) {
2342 const char *end, *s;
2344 if (!utf8 && !SvUTF8(cat)) {
2345 marked_upgrade(aTHX_ cat, symptr);
2346 lookahead.flags |= FLAG_DO_UTF8;
2347 lookahead.strbeg = symptr->strbeg;
2350 cur = start + SvCUR(cat);
2352 if (howlen == e_star) {
2353 if (utf8) goto string_copy;
2357 end = aptr + fromlen;
2358 fromlen = datumtype == 'Z' ? len-1 : len;
2359 while ((SSize_t) fromlen > 0 && s < end) {
2364 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2367 if (datumtype == 'Z') len++;
2373 fromlen = len - fromlen;
2374 if (datumtype == 'Z') fromlen--;
2375 if (howlen == e_star) {
2377 if (datumtype == 'Z') len++;
2379 GROWING(0, cat, start, cur, len);
2380 if (!S_utf8_to_bytes(aTHX_ &aptr, end, cur, fromlen,
2381 datumtype | TYPE_IS_PACK))
2382 Perl_croak(aTHX_ "panic: predicted utf8 length not available, "
2383 "for '%c', aptr=%p end=%p cur=%p, fromlen=%zu",
2384 (int)datumtype, aptr, end, cur, fromlen);
2388 if (howlen == e_star) {
2390 if (datumtype == 'Z') len++;
2392 if (len <= (SSize_t) fromlen) {
2394 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2396 /* assumes a byte expands to at most UTF8_EXPAND bytes on
2398 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2399 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
2401 while (fromlen > 0) {
2402 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
2408 if (howlen == e_star) {
2410 if (datumtype == 'Z') len++;
2412 if (len <= (SSize_t) fromlen) {
2414 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2416 GROWING(0, cat, start, cur, len);
2417 Copy(aptr, cur, fromlen, char);
2421 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2428 const char *str, *end;
2429 SSize_t l, field_len;
2435 str = SvPV_const(fromstr, fromlen);
2436 end = str + fromlen;
2437 if (DO_UTF8(fromstr)) {
2439 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2441 utf8_source = FALSE;
2442 utf8_flags = 0; /* Unused, but keep compilers happy */
2444 if (howlen == e_star) len = fromlen;
2445 field_len = (len+7)/8;
2446 GROWING(utf8, cat, start, cur, field_len);
2447 if (len > (SSize_t)fromlen) len = fromlen;
2450 if (datumtype == 'B')
2454 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2456 } else bits |= *str++ & 1;
2457 if (l & 7) bits <<= 1;
2459 PUSH_BYTE(utf8, cur, bits);
2464 /* datumtype == 'b' */
2468 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2469 if (val & 1) bits |= 0x80;
2470 } else if (*str++ & 1)
2472 if (l & 7) bits >>= 1;
2474 PUSH_BYTE(utf8, cur, bits);
2480 if (datumtype == 'B')
2481 bits <<= 7 - (l & 7);
2483 bits >>= 7 - (l & 7);
2484 PUSH_BYTE(utf8, cur, bits);
2487 /* Determine how many chars are left in the requested field */
2489 if (howlen == e_star) field_len = 0;
2490 else field_len -= l;
2491 Zero(cur, field_len, char);
2497 const char *str, *end;
2498 SSize_t l, field_len;
2504 str = SvPV_const(fromstr, fromlen);
2505 end = str + fromlen;
2506 if (DO_UTF8(fromstr)) {
2508 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2510 utf8_source = FALSE;
2511 utf8_flags = 0; /* Unused, but keep compilers happy */
2513 if (howlen == e_star) len = fromlen;
2514 field_len = (len+1)/2;
2515 GROWING(utf8, cat, start, cur, field_len);
2516 if (!utf8_source && len > (SSize_t)fromlen) len = fromlen;
2519 if (datumtype == 'H')
2523 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2524 if (val < 256 && isALPHA(val))
2525 bits |= (val + 9) & 0xf;
2528 } else if (isALPHA(*str))
2529 bits |= (*str++ + 9) & 0xf;
2531 bits |= *str++ & 0xf;
2532 if (l & 1) bits <<= 4;
2534 PUSH_BYTE(utf8, cur, bits);
2542 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2543 if (val < 256 && isALPHA(val))
2544 bits |= ((val + 9) & 0xf) << 4;
2546 bits |= (val & 0xf) << 4;
2547 } else if (isALPHA(*str))
2548 bits |= ((*str++ + 9) & 0xf) << 4;
2550 bits |= (*str++ & 0xf) << 4;
2551 if (l & 1) bits >>= 4;
2553 PUSH_BYTE(utf8, cur, bits);
2559 PUSH_BYTE(utf8, cur, bits);
2562 /* Determine how many chars are left in the requested field */
2564 if (howlen == e_star) field_len = 0;
2565 else field_len -= l;
2566 Zero(cur, field_len, char);
2574 aiv = SvIV_no_inf(fromstr, datumtype);
2575 if ((-128 > aiv || aiv > 127))
2576 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2577 "Character in 'c' format wrapped in pack");
2578 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
2583 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2589 aiv = SvIV_no_inf(fromstr, datumtype);
2590 if ((0 > aiv || aiv > 0xff))
2591 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2592 "Character in 'C' format wrapped in pack");
2593 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
2598 U8 in_bytes = (U8)IN_BYTES;
2600 end = start+SvLEN(cat)-1;
2601 if (utf8) end -= UTF8_MAXLEN-1;
2605 auv = SvUV_no_inf(fromstr, datumtype);
2606 if (in_bytes) auv = auv % 0x100;
2611 SvCUR_set(cat, cur - start);
2613 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2614 end = start+SvLEN(cat)-UTF8_MAXLEN;
2616 cur = (char *) uvchr_to_utf8_flags((U8 *) cur, auv, 0);
2621 SvCUR_set(cat, cur - start);
2622 marked_upgrade(aTHX_ cat, symptr);
2623 lookahead.flags |= FLAG_DO_UTF8;
2624 lookahead.strbeg = symptr->strbeg;
2627 cur = start + SvCUR(cat);
2628 end = start+SvLEN(cat)-UTF8_MAXLEN;
2631 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2632 "Character in 'W' format wrapped in pack");
2637 SvCUR_set(cat, cur - start);
2638 GROWING(0, cat, start, cur, len+1);
2639 end = start+SvLEN(cat)-1;
2641 *(U8 *) cur++ = (U8)auv;
2650 if (!(symptr->flags & FLAG_DO_UTF8)) {
2651 marked_upgrade(aTHX_ cat, symptr);
2652 lookahead.flags |= FLAG_DO_UTF8;
2653 lookahead.strbeg = symptr->strbeg;
2659 end = start+SvLEN(cat);
2660 if (!utf8) end -= UTF8_MAXLEN;
2664 auv = SvUV_no_inf(fromstr, datumtype);
2666 U8 buffer[UTF8_MAXLEN+1], *endb;
2667 endb = uvchr_to_utf8_flags(buffer, UNI_TO_NATIVE(auv), 0);
2668 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
2670 SvCUR_set(cat, cur - start);
2671 GROWING(0, cat, start, cur,
2672 len+(endb-buffer)*UTF8_EXPAND);
2673 end = start+SvLEN(cat);
2675 cur = my_bytes_to_utf8(buffer, endb-buffer, cur, 0);
2679 SvCUR_set(cat, cur - start);
2680 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2681 end = start+SvLEN(cat)-UTF8_MAXLEN;
2683 cur = (char *) uvchr_to_utf8_flags((U8 *) cur,
2690 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
2696 anv = SvNV(fromstr);
2697 # if (defined(VMS) && !defined(_IEEE_FP)) || defined(DOUBLE_IS_VAX_FLOAT)
2698 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
2699 * on Alpha; fake it if we don't have them.
2703 else if (anv < -FLT_MAX)
2705 else afloat = (float)anv;
2707 # if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
2709 afloat = (float)NV_NAN;
2713 /* a simple cast to float is undefined if outside
2714 * the range of values that can be represented */
2715 afloat = (float)(anv > FLT_MAX ? NV_INF :
2716 anv < -FLT_MAX ? -NV_INF : anv);
2719 PUSH_VAR(utf8, cur, afloat, needs_swap);
2727 anv = SvNV(fromstr);
2728 # if (defined(VMS) && !defined(_IEEE_FP)) || defined(DOUBLE_IS_VAX_FLOAT)
2729 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
2730 * on Alpha; fake it if we don't have them.
2734 else if (anv < -DBL_MAX)
2736 else adouble = (double)anv;
2738 adouble = (double)anv;
2740 PUSH_VAR(utf8, cur, adouble, needs_swap);
2745 Zero(&anv, 1, NV); /* can be long double with unused bits */
2749 /* to work round a gcc/x86 bug; don't use SvNV */
2750 anv.nv = sv_2nv(fromstr);
2751 # if defined(LONGDOUBLE_X86_80_BIT) && defined(USE_LONG_DOUBLE) \
2752 && LONG_DOUBLESIZE > 10
2753 /* GCC sometimes overwrites the padding in the
2755 Zero(anv.bytes+10, sizeof(anv.bytes) - 10, U8);
2758 anv.nv = SvNV(fromstr);
2760 PUSH_BYTES(utf8, cur, anv.bytes, sizeof(anv.bytes), needs_swap);
2764 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2767 /* long doubles can have unused bits, which may be nonzero */
2768 Zero(&aldouble, 1, long double);
2772 /* to work round a gcc/x86 bug; don't use SvNV */
2773 aldouble.ld = (long double)sv_2nv(fromstr);
2774 # if defined(LONGDOUBLE_X86_80_BIT) && LONG_DOUBLESIZE > 10
2775 /* GCC sometimes overwrites the padding in the
2777 Zero(aldouble.bytes+10, sizeof(aldouble.bytes) - 10, U8);
2780 aldouble.ld = (long double)SvNV(fromstr);
2782 PUSH_BYTES(utf8, cur, aldouble.bytes, sizeof(aldouble.bytes),
2788 case 'n' | TYPE_IS_SHRIEKING:
2793 ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
2794 ai16 = PerlSock_htons(ai16);
2795 PUSH16(utf8, cur, &ai16, FALSE);
2798 case 'v' | TYPE_IS_SHRIEKING:
2803 ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
2805 PUSH16(utf8, cur, &ai16, FALSE);
2808 case 'S' | TYPE_IS_SHRIEKING:
2809 #if SHORTSIZE != SIZE16
2811 unsigned short aushort;
2813 aushort = SvUV_no_inf(fromstr, datumtype);
2814 PUSH_VAR(utf8, cur, aushort, needs_swap);
2824 au16 = (U16)SvUV_no_inf(fromstr, datumtype);
2825 PUSH16(utf8, cur, &au16, needs_swap);
2828 case 's' | TYPE_IS_SHRIEKING:
2829 #if SHORTSIZE != SIZE16
2833 ashort = SvIV_no_inf(fromstr, datumtype);
2834 PUSH_VAR(utf8, cur, ashort, needs_swap);
2844 ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
2845 PUSH16(utf8, cur, &ai16, needs_swap);
2849 case 'I' | TYPE_IS_SHRIEKING:
2853 auint = SvUV_no_inf(fromstr, datumtype);
2854 PUSH_VAR(utf8, cur, auint, needs_swap);
2861 aiv = SvIV_no_inf(fromstr, datumtype);
2862 PUSH_VAR(utf8, cur, aiv, needs_swap);
2869 auv = SvUV_no_inf(fromstr, datumtype);
2870 PUSH_VAR(utf8, cur, auv, needs_swap);
2877 S_sv_check_infnan(aTHX_ fromstr, datumtype);
2878 anv = SvNV_nomg(fromstr);
2882 SvCUR_set(cat, cur - start);
2883 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
2886 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
2887 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
2888 any negative IVs will have already been got by the croak()
2889 above. IOK is untrue for fractions, so we test them
2890 against UV_MAX_P1. */
2891 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
2892 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
2893 char *in = buf + sizeof(buf);
2894 UV auv = SvUV_nomg(fromstr);
2897 *--in = (char)((auv & 0x7f) | 0x80);
2900 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
2901 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2902 in, (buf + sizeof(buf)) - in);
2903 } else if (SvPOKp(fromstr))
2905 else if (SvNOKp(fromstr)) {
2906 /* 10**NV_MAX_10_EXP is the largest power of 10
2907 so 10**(NV_MAX_10_EXP+1) is definitely unrepresentable
2908 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
2909 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
2910 And with that many bytes only Inf can overflow.
2911 Some C compilers are strict about integral constant
2912 expressions so we conservatively divide by a slightly
2913 smaller integer instead of multiplying by the exact
2914 floating-point value.
2916 #ifdef NV_MAX_10_EXP
2917 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
2918 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
2920 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
2921 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
2923 char *in = buf + sizeof(buf);
2925 anv = Perl_floor(anv);
2927 const NV next = Perl_floor(anv / 128);
2928 if (in <= buf) /* this cannot happen ;-) */
2929 Perl_croak(aTHX_ "Cannot compress integer in pack");
2930 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
2933 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
2934 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2935 in, (buf + sizeof(buf)) - in);
2944 /* Copy string and check for compliance */
2945 from = SvPV_nomg_const(fromstr, len);
2946 if ((norm = is_an_int(from, len)) == NULL)
2947 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
2949 Newx(result, len, char);
2952 while (!done) *--in = div128(norm, &done) | 0x80;
2953 result[len - 1] &= 0x7F; /* clear continue bit */
2954 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2955 in, (result + len) - in);
2957 SvREFCNT_dec(norm); /* free norm */
2962 case 'i' | TYPE_IS_SHRIEKING:
2966 aint = SvIV_no_inf(fromstr, datumtype);
2967 PUSH_VAR(utf8, cur, aint, needs_swap);
2970 case 'N' | TYPE_IS_SHRIEKING:
2975 au32 = SvUV_no_inf(fromstr, datumtype);
2976 au32 = PerlSock_htonl(au32);
2977 PUSH32(utf8, cur, &au32, FALSE);
2980 case 'V' | TYPE_IS_SHRIEKING:
2985 au32 = SvUV_no_inf(fromstr, datumtype);
2987 PUSH32(utf8, cur, &au32, FALSE);
2990 case 'L' | TYPE_IS_SHRIEKING:
2991 #if LONGSIZE != SIZE32
2993 unsigned long aulong;
2995 aulong = SvUV_no_inf(fromstr, datumtype);
2996 PUSH_VAR(utf8, cur, aulong, needs_swap);
3006 au32 = SvUV_no_inf(fromstr, datumtype);
3007 PUSH32(utf8, cur, &au32, needs_swap);
3010 case 'l' | TYPE_IS_SHRIEKING:
3011 #if LONGSIZE != SIZE32
3015 along = SvIV_no_inf(fromstr, datumtype);
3016 PUSH_VAR(utf8, cur, along, needs_swap);
3026 ai32 = SvIV_no_inf(fromstr, datumtype);
3027 PUSH32(utf8, cur, &ai32, needs_swap);
3030 #if defined(HAS_QUAD) && IVSIZE >= 8
3035 auquad = (Uquad_t) SvUV_no_inf(fromstr, datumtype);
3036 PUSH_VAR(utf8, cur, auquad, needs_swap);
3043 aquad = (Quad_t)SvIV_no_inf(fromstr, datumtype);
3044 PUSH_VAR(utf8, cur, aquad, needs_swap);
3049 len = 1; /* assume SV is correct length */
3050 GROWING(utf8, cat, start, cur, sizeof(char *));
3057 SvGETMAGIC(fromstr);
3058 if (!SvOK(fromstr)) aptr = NULL;
3060 /* XXX better yet, could spirit away the string to
3061 * a safe spot and hang on to it until the result
3062 * of pack() (and all copies of the result) are
3065 if (((SvTEMP(fromstr) && SvREFCNT(fromstr) == 1)
3066 || (SvPADTMP(fromstr) &&
3067 !SvREADONLY(fromstr)))) {
3068 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3069 "Attempt to pack pointer to temporary value");
3071 if (SvPOK(fromstr) || SvNIOK(fromstr))
3072 aptr = SvPV_nomg_const_nolen(fromstr);
3074 aptr = SvPV_force_flags_nolen(fromstr, 0);
3076 PUSH_VAR(utf8, cur, aptr, needs_swap);
3080 const char *aptr, *aend;
3084 if (len <= 2) len = 45;
3085 else len = len / 3 * 3;
3087 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3088 "Field too wide in 'u' format in pack");
3091 aptr = SvPV_const(fromstr, fromlen);
3092 from_utf8 = DO_UTF8(fromstr);
3094 aend = aptr + fromlen;
3095 fromlen = sv_len_utf8_nomg(fromstr);
3096 } else aend = NULL; /* Unused, but keep compilers happy */
3097 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
3098 while (fromlen > 0) {
3101 U8 hunk[1+63/3*4+1];
3103 if ((SSize_t)fromlen > len)
3109 if (!S_utf8_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3110 'u' | TYPE_IS_PACK)) {
3112 SvCUR_set(cat, cur - start);
3113 Perl_croak(aTHX_ "panic: string is shorter than advertised, "
3114 "aptr=%p, aend=%p, buffer=%p, todo=%zd",
3115 aptr, aend, buffer, todo);
3117 end = doencodes(hunk, (const U8 *)buffer, todo);
3119 end = doencodes(hunk, (const U8 *)aptr, todo);
3122 PUSH_BYTES(utf8, cur, hunk, end-hunk, 0);
3129 SvCUR_set(cat, cur - start);
3131 *symptr = lookahead;
3140 dSP; dMARK; dORIGMARK; dTARGET;
3143 SV *pat_sv = *++MARK;
3144 const char *pat = SvPV_const(pat_sv, fromlen);
3145 const char *patend = pat + fromlen;
3151 packlist(cat, pat, patend, MARK, SP + 1);
3155 const char * result = SvPV_nomg(cat, result_len);
3156 const U8 * error_pos;
3158 if (! is_utf8_string_loc((U8 *) result, result_len, &error_pos)) {
3159 _force_out_malformed_utf8_message(error_pos,
3160 (U8 *) result + result_len,
3164 NOT_REACHED; /* NOTREACHED */
3175 * ex: set ts=8 sts=4 sw=4 et: