3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
18 * [p.653 of _The Lord of the Rings_, IV/iv: "Of Herbs and Stewed Rabbit"]
21 /* This file contains pp ("push/pop") functions that
22 * execute the opcodes that make up a perl program. A typical pp function
23 * expects to find its arguments on the stack, and usually pushes its
24 * results onto the stack, hence the 'pp' terminology. Each OP structure
25 * contains a pointer to the relevant pp_foo() function.
27 * This particular file just contains pp_pack() and pp_unpack(). See the
28 * other pp*.c files for the rest of the pp_ functions.
32 #define PERL_IN_PP_PACK_C
35 /* Types used by pack/unpack */
37 e_no_len, /* no length */
38 e_number, /* number, [] */
42 typedef struct tempsym {
43 const char* patptr; /* current template char */
44 const char* patend; /* one after last char */
45 const char* grpbeg; /* 1st char of ()-group */
46 const char* grpend; /* end of ()-group */
47 I32 code; /* template code (!<>) */
48 U32 flags; /* /=4, comma=2, pack=1 */
49 /* and group modifiers */
50 SSize_t length; /* length/repeat count */
51 howlen_t howlen; /* how length is given */
52 int level; /* () nesting level */
53 STRLEN strbeg; /* offset of group start */
54 struct tempsym *previous; /* previous group */
57 #define TEMPSYM_INIT(symptr, p, e, f) \
59 (symptr)->patptr = (p); \
60 (symptr)->patend = (e); \
61 (symptr)->grpbeg = NULL; \
62 (symptr)->grpend = NULL; \
63 (symptr)->grpend = NULL; \
65 (symptr)->length = 0; \
66 (symptr)->howlen = e_no_len; \
67 (symptr)->level = 0; \
68 (symptr)->flags = (f); \
69 (symptr)->strbeg = 0; \
70 (symptr)->previous = NULL; \
78 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
81 U8 bytes[sizeof(long double)];
88 /* Maximum number of bytes to which a byte can grow due to upgrade */
92 * Offset for integer pack/unpack.
94 * On architectures where I16 and I32 aren't really 16 and 32 bits,
95 * which for now are all Crays, pack and unpack have to play games.
99 * These values are required for portability of pack() output.
100 * If they're not right on your machine, then pack() and unpack()
101 * wouldn't work right anyway; you'll need to apply the Cray hack.
102 * (I'd like to check them with #if, but you can't use sizeof() in
103 * the preprocessor.) --???
106 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
107 defines are now in config.h. --Andy Dougherty April 1998
112 /* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
115 #if U16SIZE <= SIZE16 && U32SIZE <= SIZE32
116 # define OFF16(p) ((char *) (p))
117 # define OFF32(p) ((char *) (p))
118 #elif BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
119 # define OFF16(p) ((char*)(p))
120 # define OFF32(p) ((char*)(p))
121 #elif BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
122 # define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
123 # define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
125 # error "bad cray byte order"
128 #define PUSH16(utf8, cur, p, needs_swap) \
129 PUSH_BYTES(utf8, cur, OFF16(p), SIZE16, needs_swap)
130 #define PUSH32(utf8, cur, p, needs_swap) \
131 PUSH_BYTES(utf8, cur, OFF32(p), SIZE32, needs_swap)
133 #if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
134 # define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_LITTLE_ENDIAN)
135 #elif BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
136 # define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_BIG_ENDIAN)
138 # error "Unsupported byteorder"
139 /* Need to add code here to re-instate mixed endian support.
140 NEEDS_SWAP would need to hold a flag indicating which action to
141 take, and S_reverse_copy and the code in S_utf8_to_bytes would need
142 logic adding to deal with any mixed-endian transformations needed.
146 /* Only to be used inside a loop (see the break) */
147 #define SHIFT_BYTES(utf8, s, strend, buf, len, datumtype, needs_swap) \
149 if (UNLIKELY(utf8)) { \
150 if (!S_utf8_to_bytes(aTHX_ &s, strend, \
151 (char *) (buf), len, datumtype)) break; \
153 if (UNLIKELY(needs_swap)) \
154 S_reverse_copy(s, (char *) (buf), len); \
156 Copy(s, (char *) (buf), len, char); \
161 #define SHIFT16(utf8, s, strend, p, datumtype, needs_swap) \
162 SHIFT_BYTES(utf8, s, strend, OFF16(p), SIZE16, datumtype, needs_swap)
164 #define SHIFT32(utf8, s, strend, p, datumtype, needs_swap) \
165 SHIFT_BYTES(utf8, s, strend, OFF32(p), SIZE32, datumtype, needs_swap)
167 #define SHIFT_VAR(utf8, s, strend, var, datumtype, needs_swap) \
168 SHIFT_BYTES(utf8, s, strend, &(var), sizeof(var), datumtype, needs_swap)
170 #define PUSH_VAR(utf8, aptr, var, needs_swap) \
171 PUSH_BYTES(utf8, aptr, &(var), sizeof(var), needs_swap)
173 /* Avoid stack overflow due to pathological templates. 100 should be plenty. */
174 #define MAX_SUB_TEMPLATE_LEVEL 100
176 /* flags (note that type modifiers can also be used as flags!) */
177 #define FLAG_WAS_UTF8 0x40
178 #define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
179 #define FLAG_UNPACK_ONLY_ONE 0x10
180 #define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
181 #define FLAG_SLASH 0x04
182 #define FLAG_COMMA 0x02
183 #define FLAG_PACK 0x01
186 S_mul128(pTHX_ SV *sv, U8 m)
189 char *s = SvPV(sv, len);
192 PERL_ARGS_ASSERT_MUL128;
194 if (! memBEGINs(s, len, "0000")) { /* need to grow sv */
195 SV * const tmpNew = newSVpvs("0000000000");
197 sv_catsv(tmpNew, sv);
198 SvREFCNT_dec(sv); /* free old sv */
203 while (!*t) /* trailing '\0'? */
206 const U32 i = ((*t - '0') << 7) + m;
207 *(t--) = '0' + (char)(i % 10);
213 /* Explosives and implosives. */
215 #define ISUUCHAR(ch) inRANGE(NATIVE_TO_LATIN1(ch), \
216 NATIVE_TO_LATIN1(' '), \
217 NATIVE_TO_LATIN1('a') - 1)
220 #define TYPE_IS_SHRIEKING 0x100
221 #define TYPE_IS_BIG_ENDIAN 0x200
222 #define TYPE_IS_LITTLE_ENDIAN 0x400
223 #define TYPE_IS_PACK 0x800
224 #define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
225 #define TYPE_MODIFIERS(t) ((t) & ~0xFF)
226 #define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
228 # define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
229 # define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
231 # define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
233 #define PACK_SIZE_CANNOT_CSUM 0x80
234 #define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
235 #define PACK_SIZE_MASK 0x3F
237 #include "packsizetables.inc"
240 S_reverse_copy(const char *src, char *dest, STRLEN len)
248 utf8_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
256 val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
257 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
258 if (retlen == (STRLEN) -1)
260 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
261 (int) TYPE_NO_MODIFIERS(datumtype));
263 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
264 "Character in '%c' format wrapped in unpack",
265 (int) TYPE_NO_MODIFIERS(datumtype));
272 #define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
273 utf8_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
277 S_utf8_to_bytes(pTHX_ const char **s, const char *end, const char *buf, SSize_t buf_len, I32 datumtype)
281 const char *from = *s;
283 const U32 flags = ckWARN(WARN_UTF8) ?
284 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
285 const bool needs_swap = NEEDS_SWAP(datumtype);
287 if (UNLIKELY(needs_swap))
290 for (;buf_len > 0; buf_len--) {
291 if (from >= end) return FALSE;
292 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
293 if (retlen == (STRLEN) -1) {
294 from += UTF8_SAFE_SKIP(from, end);
296 } else from += retlen;
301 if (UNLIKELY(needs_swap))
302 *(U8 *)--buf = (U8)val;
304 *(U8 *)buf++ = (U8)val;
306 /* We have enough characters for the buffer. Did we have problems ? */
309 /* Rewalk the string fragment while warning */
311 const U32 flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
312 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
313 if (ptr >= end) break;
314 utf8n_to_uvchr((U8 *) ptr, end-ptr, &retlen, flags);
316 if (from > end) from = end;
319 Perl_ck_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
320 WARN_PACK : WARN_UNPACK),
321 "Character(s) in '%c' format wrapped in %s",
322 (int) TYPE_NO_MODIFIERS(datumtype),
323 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
330 S_my_bytes_to_utf8(const U8 *start, STRLEN len, char *dest, const bool needs_swap) {
331 PERL_ARGS_ASSERT_MY_BYTES_TO_UTF8;
333 if (UNLIKELY(needs_swap)) {
334 const U8 *p = start + len;
335 while (p-- > start) {
336 append_utf8_from_native_byte(*p, (U8 **) & dest);
339 const U8 * const end = start + len;
340 while (start < end) {
341 append_utf8_from_native_byte(*start, (U8 **) & dest);
348 #define PUSH_BYTES(utf8, cur, buf, len, needs_swap) \
350 if (UNLIKELY(utf8)) \
351 (cur) = my_bytes_to_utf8((U8 *) buf, len, (cur), needs_swap); \
353 if (UNLIKELY(needs_swap)) \
354 S_reverse_copy((char *)(buf), cur, len); \
356 Copy(buf, cur, len, char); \
361 #define SAFE_UTF8_EXPAND(var) \
363 if ((var) > SSize_t_MAX / UTF8_EXPAND) \
364 Perl_croak(aTHX_ "%s", "Out of memory during pack()"); \
365 (var) = (var) * UTF8_EXPAND; \
368 #define GROWING2(utf8, cat, start, cur, item_size, item_count) \
370 if (SSize_t_MAX / (item_size) < (item_count)) \
371 Perl_croak(aTHX_ "%s", "Out of memory during pack()"); \
372 GROWING((utf8), (cat), (start), (cur), (item_size) * (item_count)); \
375 #define GROWING(utf8, cat, start, cur, in_len) \
377 STRLEN glen = (in_len); \
378 STRLEN catcur = (STRLEN)((cur) - (start)); \
379 if (utf8) SAFE_UTF8_EXPAND(glen); \
380 if (SSize_t_MAX - glen < catcur) \
381 Perl_croak(aTHX_ "%s", "Out of memory during pack()"); \
382 if (catcur + glen >= SvLEN(cat)) { \
383 (start) = sv_exp_grow(cat, glen); \
384 (cur) = (start) + SvCUR(cat); \
388 #define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
390 const STRLEN glen = (in_len); \
392 if (utf8) SAFE_UTF8_EXPAND(gl); \
393 if ((cur) + gl >= (start) + SvLEN(cat)) { \
395 SvCUR_set((cat), (cur) - (start)); \
396 (start) = sv_exp_grow(cat, gl); \
397 (cur) = (start) + SvCUR(cat); \
399 PUSH_BYTES(utf8, cur, buf, glen, 0); \
402 #define PUSH_BYTE(utf8, s, byte) \
405 const U8 au8 = (byte); \
406 (s) = my_bytes_to_utf8(&au8, 1, (s), 0);\
407 } else *(U8 *)(s)++ = (byte); \
410 /* Only to be used inside a loop (see the break) */
411 #define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
414 if (str >= end) break; \
415 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
416 if (retlen == (STRLEN) -1) { \
418 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
423 static const char *_action( const tempsym_t* symptr )
425 return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
428 /* Returns the sizeof() struct described by pat */
430 S_measure_struct(pTHX_ tempsym_t* symptr)
434 PERL_ARGS_ASSERT_MEASURE_STRUCT;
436 while (next_symbol(symptr)) {
439 switch (symptr->howlen) {
441 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
445 /* e_no_len and e_number */
446 len = symptr->length;
450 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
453 /* endianness doesn't influence the size of a type */
454 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
456 Perl_croak(aTHX_ "Invalid type '%c' in %s",
457 (int)TYPE_NO_MODIFIERS(symptr->code),
459 case '.' | TYPE_IS_SHRIEKING:
460 case '@' | TYPE_IS_SHRIEKING:
464 case 'U': /* XXXX Is it correct? */
467 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
468 (int) TYPE_NO_MODIFIERS(symptr->code),
475 tempsym_t savsym = *symptr;
476 symptr->patptr = savsym.grpbeg;
477 symptr->patend = savsym.grpend;
478 /* XXXX Theoretically, we need to measure many times at
479 different positions, since the subexpression may contain
480 alignment commands, but be not of aligned length.
481 Need to detect this and croak(). */
482 size = measure_struct(symptr);
486 case 'X' | TYPE_IS_SHRIEKING:
487 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
489 if (!len) /* Avoid division by 0 */
491 len = total % len; /* Assumed: the start is aligned. */
496 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
498 case 'x' | TYPE_IS_SHRIEKING:
499 if (!len) /* Avoid division by 0 */
501 star = total % len; /* Assumed: the start is aligned. */
502 if (star) /* Other portable ways? */
526 size = sizeof(char*);
536 /* locate matching closing parenthesis or bracket
537 * returns char pointer to char after match, or NULL
540 S_group_end(pTHX_ const char *patptr, const char *patend, char ender)
542 PERL_ARGS_ASSERT_GROUP_END;
544 while (patptr < patend) {
545 const char c = *patptr++;
552 while (patptr < patend && *patptr != '\n')
556 patptr = group_end(patptr, patend, ')') + 1;
558 patptr = group_end(patptr, patend, ']') + 1;
560 Perl_croak(aTHX_ "No group ending character '%c' found in template",
562 NOT_REACHED; /* NOTREACHED */
566 /* Convert unsigned decimal number to binary.
567 * Expects a pointer to the first digit and address of length variable
568 * Advances char pointer to 1st non-digit char and returns number
571 S_get_num(pTHX_ const char *patptr, SSize_t *lenptr )
573 SSize_t len = *patptr++ - '0';
575 PERL_ARGS_ASSERT_GET_NUM;
577 while (isDIGIT(*patptr)) {
578 SSize_t nlen = (len * 10) + (*patptr++ - '0');
579 if (nlen < 0 || nlen/10 != len)
580 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
587 /* The marvellous template parsing routine: Using state stored in *symptr,
588 * locates next template code and count
591 S_next_symbol(pTHX_ tempsym_t* symptr )
593 const char* patptr = symptr->patptr;
594 const char* const patend = symptr->patend;
596 PERL_ARGS_ASSERT_NEXT_SYMBOL;
598 symptr->flags &= ~FLAG_SLASH;
600 while (patptr < patend) {
601 if (isSPACE(*patptr))
603 else if (*patptr == '#') {
605 while (patptr < patend && *patptr != '\n')
610 /* We should have found a template code */
611 I32 code = *patptr++ & 0xFF;
612 U32 inherited_modifiers = 0;
614 if (code == ','){ /* grandfather in commas but with a warning */
615 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
616 symptr->flags |= FLAG_COMMA;
617 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
618 "Invalid type ',' in %s", _action( symptr ) );
623 /* for '(', skip to ')' */
625 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
626 Perl_croak(aTHX_ "()-group starts with a count in %s",
628 symptr->grpbeg = patptr;
629 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
630 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
631 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
635 /* look for group modifiers to inherit */
636 if (TYPE_ENDIANNESS(symptr->flags)) {
637 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
638 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
641 /* look for modifiers */
642 while (patptr < patend) {
647 modifier = TYPE_IS_SHRIEKING;
648 allowed = "sSiIlLxXnNvV@.";
651 modifier = TYPE_IS_BIG_ENDIAN;
652 allowed = ENDIANNESS_ALLOWED_TYPES;
655 modifier = TYPE_IS_LITTLE_ENDIAN;
656 allowed = ENDIANNESS_ALLOWED_TYPES;
667 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
668 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
669 allowed, _action( symptr ) );
671 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
672 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
673 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
674 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
675 TYPE_ENDIANNESS_MASK)
676 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
677 *patptr, _action( symptr ) );
679 if ((code & modifier)) {
680 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
681 "Duplicate modifier '%c' after '%c' in %s",
682 *patptr, (int) TYPE_NO_MODIFIERS(code),
690 /* inherit modifiers */
691 code |= inherited_modifiers;
693 /* look for count and/or / */
694 if (patptr < patend) {
695 if (isDIGIT(*patptr)) {
696 patptr = get_num( patptr, &symptr->length );
697 symptr->howlen = e_number;
699 } else if (*patptr == '*') {
701 symptr->howlen = e_star;
703 } else if (*patptr == '[') {
704 const char* lenptr = ++patptr;
705 symptr->howlen = e_number;
706 patptr = group_end( patptr, patend, ']' ) + 1;
707 /* what kind of [] is it? */
708 if (isDIGIT(*lenptr)) {
709 lenptr = get_num( lenptr, &symptr->length );
711 Perl_croak(aTHX_ "Malformed integer in [] in %s",
714 tempsym_t savsym = *symptr;
715 symptr->patend = patptr-1;
716 symptr->patptr = lenptr;
717 savsym.length = measure_struct(symptr);
721 symptr->howlen = e_no_len;
726 while (patptr < patend) {
727 if (isSPACE(*patptr))
729 else if (*patptr == '#') {
731 while (patptr < patend && *patptr != '\n')
736 if (*patptr == '/') {
737 symptr->flags |= FLAG_SLASH;
739 if (patptr < patend &&
740 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
741 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
748 /* at end - no count, no / */
749 symptr->howlen = e_no_len;
754 symptr->patptr = patptr;
758 symptr->patptr = patptr;
763 There is no way to cleanly handle the case where we should process the
764 string per byte in its upgraded form while it's really in downgraded form
765 (e.g. estimates like strend-s as an upper bound for the number of
766 characters left wouldn't work). So if we foresee the need of this
767 (pattern starts with U or contains U0), we want to work on the encoded
768 version of the string. Users are advised to upgrade their pack string
769 themselves if they need to do a lot of unpacks like this on it
772 need_utf8(const char *pat, const char *patend)
776 PERL_ARGS_ASSERT_NEED_UTF8;
778 while (pat < patend) {
781 pat = (const char *) memchr(pat, '\n', patend-pat);
782 if (!pat) return FALSE;
783 } else if (pat[0] == 'U') {
784 if (first || pat[1] == '0') return TRUE;
785 } else first = FALSE;
792 first_symbol(const char *pat, const char *patend) {
793 PERL_ARGS_ASSERT_FIRST_SYMBOL;
795 while (pat < patend) {
796 if (pat[0] != '#') return pat[0];
798 pat = (const char *) memchr(pat, '\n', patend-pat);
807 =for apidoc unpackstring
809 The engine implementing the C<unpack()> Perl function.
811 Using the template C<pat..patend>, this function unpacks the string
812 C<s..strend> into a number of mortal SVs, which it pushes onto the perl
813 argument (C<@_>) stack (so you will need to issue a C<PUTBACK> before and
814 C<SPAGAIN> after the call to this function). It returns the number of
817 The C<strend> and C<patend> pointers should point to the byte following the
818 last character of each string.
820 Although this function returns its values on the perl argument stack, it
821 doesn't take any parameters from that stack (and thus in particular
822 there's no need to do a C<PUSHMARK> before calling it, unlike L</call_pv> for
828 Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
832 PERL_ARGS_ASSERT_UNPACKSTRING;
834 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
835 else if (need_utf8(pat, patend)) {
836 /* We probably should try to avoid this in case a scalar context call
837 wouldn't get to the "U0" */
838 STRLEN len = strend - s;
839 s = (char *) bytes_to_utf8((U8 *) s, &len);
842 flags |= FLAG_DO_UTF8;
845 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
846 flags |= FLAG_PARSE_UTF8;
848 TEMPSYM_INIT(&sym, pat, patend, flags);
850 return unpack_rec(&sym, s, s, strend, NULL );
854 S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
858 const SSize_t start_sp_offset = SP - PL_stack_base;
860 SSize_t checksum = 0;
863 const SSize_t bits_in_uv = CHAR_BIT * sizeof(cuv);
865 bool explicit_length;
866 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
867 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
869 PERL_ARGS_ASSERT_UNPACK_REC;
871 symptr->strbeg = s - strbeg;
873 while (next_symbol(symptr)) {
876 I32 datumtype = symptr->code;
878 /* do first one only unless in list context
879 / is implemented by unpacking the count, then popping it from the
880 stack, so must check that we're not in the middle of a / */
882 && (SP - PL_stack_base == start_sp_offset + 1)
883 && (datumtype != '/') ) /* XXX can this be omitted */
886 switch (howlen = symptr->howlen) {
888 len = strend - strbeg; /* long enough */
891 /* e_no_len and e_number */
892 len = symptr->length;
896 explicit_length = TRUE;
898 beyond = s >= strend;
900 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
902 /* props nonzero means we can process this letter. */
903 const SSize_t size = props & PACK_SIZE_MASK;
904 const SSize_t howmany = (strend - s) / size;
908 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
909 if (len && unpack_only_one) len = 1;
915 needs_swap = NEEDS_SWAP(datumtype);
917 switch(TYPE_NO_ENDIANNESS(datumtype)) {
919 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
922 if (howlen == e_no_len)
923 len = 16; /* len is not specified */
931 tempsym_t savsym = *symptr;
932 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
933 symptr->flags |= group_modifiers;
934 symptr->patend = savsym.grpend;
935 symptr->previous = &savsym;
938 if (len && unpack_only_one) len = 1;
940 symptr->patptr = savsym.grpbeg;
941 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
942 else symptr->flags &= ~FLAG_PARSE_UTF8;
943 unpack_rec(symptr, s, strbeg, strend, &s);
944 if (s == strend && savsym.howlen == e_star)
945 break; /* No way to continue */
948 savsym.flags = symptr->flags & ~group_modifiers;
952 case '.' | TYPE_IS_SHRIEKING:
956 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
957 if (howlen == e_star) from = strbeg;
958 else if (len <= 0) from = s;
960 tempsym_t *group = symptr;
962 while (--len && group) group = group->previous;
963 from = group ? strbeg + group->strbeg : strbeg;
966 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
967 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
971 case '@' | TYPE_IS_SHRIEKING:
973 s = strbeg + symptr->strbeg;
974 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
978 Perl_croak(aTHX_ "'@' outside of string in unpack");
983 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
986 Perl_croak(aTHX_ "'@' outside of string in unpack");
990 case 'X' | TYPE_IS_SHRIEKING:
991 if (!len) /* Avoid division by 0 */
994 const char *hop, *last;
998 hop += UTF8SKIP(hop);
1005 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1009 len = (s - strbeg) % len;
1015 Perl_croak(aTHX_ "'X' outside of string in unpack");
1016 while (--s, UTF8_IS_CONTINUATION(*s)) {
1018 Perl_croak(aTHX_ "'X' outside of string in unpack");
1023 if (len > s - strbeg)
1024 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1028 case 'x' | TYPE_IS_SHRIEKING: {
1030 if (!len) /* Avoid division by 0 */
1032 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1033 else ai32 = (s - strbeg) % len;
1034 if (ai32 == 0) break;
1042 Perl_croak(aTHX_ "'x' outside of string in unpack");
1047 if (len > strend - s)
1048 Perl_croak(aTHX_ "'x' outside of string in unpack");
1053 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1059 /* Preliminary length estimate is assumed done in 'W' */
1060 if (len > strend - s) len = strend - s;
1066 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1067 if (hop >= strend) {
1069 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1074 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1076 } else if (len > strend - s)
1079 if (datumtype == 'Z') {
1080 /* 'Z' strips stuff after first null */
1081 const char *ptr, *end;
1083 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
1084 sv = newSVpvn(s, ptr-s);
1085 if (howlen == e_star) /* exact for 'Z*' */
1086 len = ptr-s + (ptr != strend ? 1 : 0);
1087 } else if (datumtype == 'A') {
1088 /* 'A' strips both nulls and spaces */
1090 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1091 for (ptr = s+len-1; ptr >= s; ptr--) {
1093 && !UTF8_IS_CONTINUATION(*ptr)
1094 && !isSPACE_utf8_safe(ptr, strend))
1099 if (ptr >= s) ptr += UTF8SKIP(ptr);
1102 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1104 for (ptr = s+len-1; ptr >= s; ptr--)
1105 if (*ptr != 0 && !isSPACE(*ptr)) break;
1108 sv = newSVpvn(s, ptr-s);
1109 } else sv = newSVpvn(s, len);
1113 /* Undo any upgrade done due to need_utf8() */
1114 if (!(symptr->flags & FLAG_WAS_UTF8))
1115 sv_utf8_downgrade(sv, 0);
1123 if (howlen == e_star || len > (strend - s) * 8)
1124 len = (strend - s) * 8;
1127 while (len >= 8 && s < strend) {
1128 cuv += PL_bitcount[utf8_to_byte(aTHX_ &s, strend, datumtype)];
1133 cuv += PL_bitcount[*(U8 *)s++];
1136 if (len && s < strend) {
1138 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1139 if (datumtype == 'b')
1141 if (bits & 1) cuv++;
1146 if (bits & 0x80) cuv++;
1153 sv = sv_2mortal(newSV(len ? len : 1));
1156 if (datumtype == 'b') {
1158 const SSize_t ai32 = len;
1159 for (len = 0; len < ai32; len++) {
1160 if (len & 7) bits >>= 1;
1162 if (s >= strend) break;
1163 bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
1164 } else bits = *(U8 *) s++;
1165 *str++ = bits & 1 ? '1' : '0';
1169 const SSize_t ai32 = len;
1170 for (len = 0; len < ai32; len++) {
1171 if (len & 7) bits <<= 1;
1173 if (s >= strend) break;
1174 bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
1175 } else bits = *(U8 *) s++;
1176 *str++ = bits & 0x80 ? '1' : '0';
1180 SvCUR_set(sv, str - SvPVX_const(sv));
1187 /* Preliminary length estimate, acceptable for utf8 too */
1188 if (howlen == e_star || len > (strend - s) * 2)
1189 len = (strend - s) * 2;
1191 sv = sv_2mortal(newSV(len ? len : 1));
1195 if (datumtype == 'h') {
1198 for (len = 0; len < ai32; len++) {
1199 if (len & 1) bits >>= 4;
1201 if (s >= strend) break;
1202 bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
1203 } else bits = * (U8 *) s++;
1205 *str++ = PL_hexdigit[bits & 15];
1209 const SSize_t ai32 = len;
1210 for (len = 0; len < ai32; len++) {
1211 if (len & 1) bits <<= 4;
1213 if (s >= strend) break;
1214 bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
1215 } else bits = *(U8 *) s++;
1217 *str++ = PL_hexdigit[(bits >> 4) & 15];
1222 SvCUR_set(sv, str - SvPVX_const(sv));
1229 if (explicit_length)
1230 /* Switch to "character" mode */
1231 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1236 while (len-- > 0 && s < strend) {
1241 aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1242 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1243 if (retlen == (STRLEN) -1)
1244 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1248 aint = *(U8 *)(s)++;
1249 if (aint >= 128 && datumtype != 'C') /* fake up signed chars */
1253 else if (checksum > bits_in_uv)
1254 cdouble += (NV)aint;
1262 while (len-- > 0 && s < strend) {
1264 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1265 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1266 if (retlen == (STRLEN) -1)
1267 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1271 else if (checksum > bits_in_uv)
1272 cdouble += (NV) val;
1276 } else if (!checksum)
1278 const U8 ch = *(U8 *) s++;
1281 else if (checksum > bits_in_uv)
1282 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1284 while (len-- > 0) cuv += *(U8 *) s++;
1288 if (explicit_length && howlen != e_star) {
1289 /* Switch to "bytes in UTF-8" mode */
1290 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
1292 /* Should be impossible due to the need_utf8() test */
1293 Perl_croak(aTHX_ "U0 mode on a byte string");
1297 if (len > strend - s) len = strend - s;
1299 if (len && unpack_only_one) len = 1;
1303 while (len-- > 0 && s < strend) {
1307 U8 result[UTF8_MAXLEN+1];
1308 const char *ptr = s;
1310 /* Bug: warns about bad utf8 even if we are short on bytes
1311 and will break out of the loop */
1312 if (!S_utf8_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1315 len = UTF8SKIP(result);
1316 if (!S_utf8_to_bytes(aTHX_ &ptr, strend,
1317 (char *) &result[1], len-1, 'U')) break;
1318 auv = NATIVE_TO_UNI(utf8n_to_uvchr(result,
1321 UTF8_ALLOW_DEFAULT));
1324 auv = NATIVE_TO_UNI(utf8n_to_uvchr((U8*)s,
1327 UTF8_ALLOW_DEFAULT));
1328 if (retlen == (STRLEN) -1)
1329 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1334 else if (checksum > bits_in_uv)
1335 cdouble += (NV) auv;
1340 case 's' | TYPE_IS_SHRIEKING:
1341 #if SHORTSIZE != SIZE16
1344 SHIFT_VAR(utf8, s, strend, ashort, datumtype, needs_swap);
1347 else if (checksum > bits_in_uv)
1348 cdouble += (NV)ashort;
1360 #if U16SIZE > SIZE16
1363 SHIFT16(utf8, s, strend, &ai16, datumtype, needs_swap);
1364 #if U16SIZE > SIZE16
1370 else if (checksum > bits_in_uv)
1371 cdouble += (NV)ai16;
1376 case 'S' | TYPE_IS_SHRIEKING:
1377 #if SHORTSIZE != SIZE16
1379 unsigned short aushort;
1380 SHIFT_VAR(utf8, s, strend, aushort, datumtype, needs_swap);
1383 else if (checksum > bits_in_uv)
1384 cdouble += (NV)aushort;
1397 #if U16SIZE > SIZE16
1400 SHIFT16(utf8, s, strend, &au16, datumtype, needs_swap);
1401 if (datumtype == 'n')
1402 au16 = PerlSock_ntohs(au16);
1403 if (datumtype == 'v')
1407 else if (checksum > bits_in_uv)
1408 cdouble += (NV) au16;
1413 case 'v' | TYPE_IS_SHRIEKING:
1414 case 'n' | TYPE_IS_SHRIEKING:
1417 # if U16SIZE > SIZE16
1420 SHIFT16(utf8, s, strend, &ai16, datumtype, needs_swap);
1421 /* There should never be any byte-swapping here. */
1422 assert(!TYPE_ENDIANNESS(datumtype));
1423 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
1424 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1425 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
1426 ai16 = (I16) vtohs((U16) ai16);
1429 else if (checksum > bits_in_uv)
1430 cdouble += (NV) ai16;
1436 case 'i' | TYPE_IS_SHRIEKING:
1439 SHIFT_VAR(utf8, s, strend, aint, datumtype, needs_swap);
1442 else if (checksum > bits_in_uv)
1443 cdouble += (NV)aint;
1449 case 'I' | TYPE_IS_SHRIEKING:
1452 SHIFT_VAR(utf8, s, strend, auint, datumtype, needs_swap);
1455 else if (checksum > bits_in_uv)
1456 cdouble += (NV)auint;
1464 SHIFT_VAR(utf8, s, strend, aiv, datumtype, needs_swap);
1467 else if (checksum > bits_in_uv)
1476 SHIFT_VAR(utf8, s, strend, auv, datumtype, needs_swap);
1479 else if (checksum > bits_in_uv)
1485 case 'l' | TYPE_IS_SHRIEKING:
1486 #if LONGSIZE != SIZE32
1489 SHIFT_VAR(utf8, s, strend, along, datumtype, needs_swap);
1492 else if (checksum > bits_in_uv)
1493 cdouble += (NV)along;
1504 #if U32SIZE > SIZE32
1507 SHIFT32(utf8, s, strend, &ai32, datumtype, needs_swap);
1508 #if U32SIZE > SIZE32
1509 if (ai32 > 2147483647) ai32 -= 4294967296;
1513 else if (checksum > bits_in_uv)
1514 cdouble += (NV)ai32;
1519 case 'L' | TYPE_IS_SHRIEKING:
1520 #if LONGSIZE != SIZE32
1522 unsigned long aulong;
1523 SHIFT_VAR(utf8, s, strend, aulong, datumtype, needs_swap);
1526 else if (checksum > bits_in_uv)
1527 cdouble += (NV)aulong;
1540 #if U32SIZE > SIZE32
1543 SHIFT32(utf8, s, strend, &au32, datumtype, needs_swap);
1544 if (datumtype == 'N')
1545 au32 = PerlSock_ntohl(au32);
1546 if (datumtype == 'V')
1550 else if (checksum > bits_in_uv)
1551 cdouble += (NV)au32;
1556 case 'V' | TYPE_IS_SHRIEKING:
1557 case 'N' | TYPE_IS_SHRIEKING:
1560 #if U32SIZE > SIZE32
1563 SHIFT32(utf8, s, strend, &ai32, datumtype, needs_swap);
1564 /* There should never be any byte swapping here. */
1565 assert(!TYPE_ENDIANNESS(datumtype));
1566 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1567 ai32 = (I32)PerlSock_ntohl((U32)ai32);
1568 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1569 ai32 = (I32)vtohl((U32)ai32);
1572 else if (checksum > bits_in_uv)
1573 cdouble += (NV)ai32;
1581 SHIFT_VAR(utf8, s, strend, aptr, datumtype, needs_swap);
1582 /* newSVpv generates undef if aptr is NULL */
1583 mPUSHs(newSVpv(aptr, 0));
1591 while (len > 0 && s < strend) {
1593 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1594 auv = (auv << 7) | (ch & 0x7f);
1595 /* UTF8_IS_XXXXX not right here because this is a BER, not
1596 * UTF-8 format - using constant 0x80 */
1604 if (++bytes >= sizeof(UV)) { /* promote to string */
1607 sv = Perl_newSVpvf(aTHX_ "%.*" UVuf,
1608 (int)TYPE_DIGITS(UV), auv);
1609 while (s < strend) {
1610 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1611 sv = mul128(sv, (U8)(ch & 0x7f));
1617 t = SvPV_nolen_const(sv);
1626 if ((s >= strend) && bytes)
1627 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
1631 if (symptr->howlen == e_star)
1632 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
1634 if (s + sizeof(char*) <= strend) {
1636 SHIFT_VAR(utf8, s, strend, aptr, datumtype, needs_swap);
1637 /* newSVpvn generates undef if aptr is NULL */
1638 PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP));
1641 #if defined(HAS_QUAD) && IVSIZE >= 8
1645 SHIFT_VAR(utf8, s, strend, aquad, datumtype, needs_swap);
1647 mPUSHs(newSViv((IV)aquad));
1648 else if (checksum > bits_in_uv)
1649 cdouble += (NV)aquad;
1657 SHIFT_VAR(utf8, s, strend, auquad, datumtype, needs_swap);
1659 mPUSHs(newSVuv((UV)auquad));
1660 else if (checksum > bits_in_uv)
1661 cdouble += (NV)auquad;
1667 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
1671 SHIFT_VAR(utf8, s, strend, afloat, datumtype, needs_swap);
1681 SHIFT_VAR(utf8, s, strend, adouble, datumtype, needs_swap);
1691 SHIFT_BYTES(utf8, s, strend, anv.bytes, sizeof(anv.bytes),
1692 datumtype, needs_swap);
1699 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
1703 SHIFT_BYTES(utf8, s, strend, aldouble.bytes,
1704 sizeof(aldouble.bytes), datumtype, needs_swap);
1705 /* The most common long double format, the x86 80-bit
1706 * extended precision, has either 2 or 6 unused bytes,
1707 * which may contain garbage, which may contain
1708 * unintentional data. While we do zero the bytes of
1709 * the long double data in pack(), here in unpack() we
1710 * don't, because it's really hard to envision that
1711 * reading the long double off aldouble would be
1712 * affected by the unused bytes.
1714 * Note that trying to unpack 'long doubles' of 'long
1715 * doubles' packed in another system is in the general
1716 * case doomed without having more detail. */
1718 mPUSHn(aldouble.ld);
1720 cdouble += aldouble.ld;
1726 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
1727 sv = sv_2mortal(newSV(l));
1734 /* Note that all legal uuencoded strings are ASCII printables, so
1735 * have the same representation under UTF-8 vs not. This means we
1736 * can ignore UTF8ness on legal input. For illegal we stop at the
1737 * first failure, and don't report where/what that is, so again we
1738 * can ignore UTF8ness */
1740 while (s < strend && *s != ' ' && ISUUCHAR(*s)) {
1744 len = PL_uudmap[*(U8*)s++] & 077;
1746 if (s < strend && ISUUCHAR(*s))
1747 a = PL_uudmap[*(U8*)s++] & 077;
1750 if (s < strend && ISUUCHAR(*s))
1751 b = PL_uudmap[*(U8*)s++] & 077;
1754 if (s < strend && ISUUCHAR(*s))
1755 c = PL_uudmap[*(U8*)s++] & 077;
1758 if (s < strend && ISUUCHAR(*s))
1759 d = PL_uudmap[*(U8*)s++] & 077;
1762 hunk[0] = (char)((a << 2) | (b >> 4));
1763 hunk[1] = (char)((b << 4) | (c >> 2));
1764 hunk[2] = (char)((c << 6) | d);
1766 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
1771 else /* possible checksum byte */
1772 if (s + 1 < strend && s[1] == '\n')
1778 } /* End of switch */
1781 if (memCHRs("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
1782 (checksum > bits_in_uv &&
1783 memCHRs("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
1786 anv = (NV) (1 << (checksum & 15));
1787 while (checksum >= 16) {
1791 while (cdouble < 0.0)
1793 cdouble = Perl_modf(cdouble / anv, &trouble);
1794 #ifdef LONGDOUBLE_DOUBLEDOUBLE
1795 /* Workaround for powerpc doubledouble modfl bug:
1796 * close to 1.0L and -1.0L cdouble is 0, and trouble
1797 * is cdouble / anv. */
1798 if (trouble != Perl_ceil(trouble)) {
1800 if (cdouble > 1.0L) cdouble -= 1.0L;
1801 if (cdouble < -1.0L) cdouble += 1.0L;
1805 sv = newSVnv(cdouble);
1808 if (checksum < bits_in_uv) {
1809 UV mask = nBIT_MASK(checksum);
1818 if (symptr->flags & FLAG_SLASH){
1819 if (SP - PL_stack_base - start_sp_offset <= 0)
1821 if( next_symbol(symptr) ){
1822 if( symptr->howlen == e_number )
1823 Perl_croak(aTHX_ "Count after length/code in unpack" );
1825 /* ...end of char buffer then no decent length available */
1826 Perl_croak(aTHX_ "length/code after end of string in unpack" );
1828 /* take top of stack (hope it's numeric) */
1831 Perl_croak(aTHX_ "Negative '/' count in unpack" );
1834 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
1836 datumtype = symptr->code;
1837 explicit_length = FALSE;
1845 return SP - PL_stack_base - start_sp_offset;
1855 const char *pat = SvPV_const(left, llen);
1856 const char *s = SvPV_const(right, rlen);
1857 const char *strend = s + rlen;
1858 const char *patend = pat + llen;
1862 cnt = unpackstring(pat, patend, s, strend,
1863 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
1864 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
1867 if ( !cnt && gimme == G_SCALAR )
1868 PUSHs(&PL_sv_undef);
1873 doencodes(U8 *h, const U8 *s, SSize_t len)
1875 *h++ = PL_uuemap[len];
1877 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1878 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
1879 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
1880 *h++ = PL_uuemap[(077 & (s[2] & 077))];
1885 const U8 r = (len > 1 ? s[1] : '\0');
1886 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1887 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
1888 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
1889 *h++ = PL_uuemap[0];
1896 S_is_an_int(pTHX_ const char *s, STRLEN l)
1898 SV *result = newSVpvn(s, l);
1899 char *const result_c = SvPV_nolen(result); /* convenience */
1900 char *out = result_c;
1904 PERL_ARGS_ASSERT_IS_AN_INT;
1912 SvREFCNT_dec(result);
1935 SvREFCNT_dec(result);
1941 SvCUR_set(result, out - result_c);
1945 /* pnum must be '\0' terminated */
1947 S_div128(pTHX_ SV *pnum, bool *done)
1950 char * const s = SvPV(pnum, len);
1954 PERL_ARGS_ASSERT_DIV128;
1958 const int i = m * 10 + (*t - '0');
1959 const int r = (i >> 7); /* r < 10 */
1967 SvCUR_set(pnum, (STRLEN) (t - s));
1972 =for apidoc packlist
1974 The engine implementing C<pack()> Perl function.
1980 Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, SV **beglist, SV **endlist )
1984 PERL_ARGS_ASSERT_PACKLIST;
1986 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
1988 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
1989 Also make sure any UTF8 flag is loaded */
1990 SvPV_force_nolen(cat);
1992 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
1994 (void)pack_rec( cat, &sym, beglist, endlist );
1997 /* like sv_utf8_upgrade, but also repoint the group start markers */
1999 marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2002 const char *from_ptr, *from_start, *from_end, **marks, **m;
2003 char *to_start, *to_ptr;
2005 if (SvUTF8(sv)) return;
2007 from_start = SvPVX_const(sv);
2008 from_end = from_start + SvCUR(sv);
2009 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2010 if (!NATIVE_BYTE_IS_INVARIANT(*from_ptr)) break;
2011 if (from_ptr == from_end) {
2012 /* Simple case: no character needs to be changed */
2017 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
2018 Newx(to_start, len, char);
2019 Copy(from_start, to_start, from_ptr-from_start, char);
2020 to_ptr = to_start + (from_ptr-from_start);
2022 Newx(marks, sym_ptr->level+2, const char *);
2023 for (group=sym_ptr; group; group = group->previous)
2024 marks[group->level] = from_start + group->strbeg;
2025 marks[sym_ptr->level+1] = from_end+1;
2026 for (m = marks; *m < from_ptr; m++)
2027 *m = to_start + (*m-from_start);
2029 for (;from_ptr < from_end; from_ptr++) {
2030 while (*m == from_ptr) *m++ = to_ptr;
2031 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
2035 while (*m == from_ptr) *m++ = to_ptr;
2036 if (m != marks + sym_ptr->level+1) {
2039 Perl_croak(aTHX_ "panic: marks beyond string end, m=%p, marks=%p, "
2040 "level=%d", m, marks, sym_ptr->level);
2042 for (group=sym_ptr; group; group = group->previous)
2043 group->strbeg = marks[group->level] - to_start;
2048 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
2049 from_start -= SvIVX(sv);
2052 SvFLAGS(sv) &= ~SVf_OOK;
2055 Safefree(from_start);
2056 SvPV_set(sv, to_start);
2057 SvCUR_set(sv, to_ptr - to_start);
2062 /* Exponential string grower. Makes string extension effectively O(n)
2063 needed says how many extra bytes we need (not counting the final '\0')
2064 Only grows the string if there is an actual lack of space
2067 S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
2068 const STRLEN cur = SvCUR(sv);
2069 const STRLEN len = SvLEN(sv);
2072 PERL_ARGS_ASSERT_SV_EXP_GROW;
2074 if (len - cur > needed) return SvPVX(sv);
2075 extend = needed > len ? needed : len;
2076 return SvGROW(sv, len+extend+1);
2080 S_sv_check_infnan(pTHX_ SV *sv, I32 datumtype)
2083 if (UNLIKELY(SvAMAGIC(sv)))
2085 if (UNLIKELY(isinfnansv(sv))) {
2086 const I32 c = TYPE_NO_MODIFIERS(datumtype);
2087 const NV nv = SvNV_nomg(sv);
2089 Perl_croak(aTHX_ "Cannot compress %" NVgf " in pack", nv);
2091 Perl_croak(aTHX_ "Cannot pack %" NVgf " with '%c'", nv, (int) c);
2096 #define SvIV_no_inf(sv,d) \
2097 ((sv) = S_sv_check_infnan(aTHX_ sv,d), SvIV_nomg(sv))
2098 #define SvUV_no_inf(sv,d) \
2099 ((sv) = S_sv_check_infnan(aTHX_ sv,d), SvUV_nomg(sv))
2103 S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
2105 tempsym_t lookahead;
2106 SSize_t items = endlist - beglist;
2107 bool found = next_symbol(symptr);
2108 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
2109 bool warn_utf8 = ckWARN(WARN_UTF8);
2112 PERL_ARGS_ASSERT_PACK_REC;
2114 if (symptr->level == 0 && found && symptr->code == 'U') {
2115 marked_upgrade(aTHX_ cat, symptr);
2116 symptr->flags |= FLAG_DO_UTF8;
2119 symptr->strbeg = SvCUR(cat);
2125 SV *lengthcode = NULL;
2126 I32 datumtype = symptr->code;
2127 howlen_t howlen = symptr->howlen;
2128 char *start = SvPVX(cat);
2129 char *cur = start + SvCUR(cat);
2132 #define NEXTFROM (lengthcode ? lengthcode : items > 0 ? (--items, *beglist++) : &PL_sv_no)
2133 #define PEEKFROM (lengthcode ? lengthcode : items > 0 ? *beglist : &PL_sv_no)
2137 len = memCHRs("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2141 /* e_no_len and e_number */
2142 len = symptr->length;
2147 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
2149 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2150 /* We can process this letter. */
2151 STRLEN size = props & PACK_SIZE_MASK;
2152 GROWING2(utf8, cat, start, cur, size, (STRLEN)len);
2156 /* Look ahead for next symbol. Do we have code/code? */
2157 lookahead = *symptr;
2158 found = next_symbol(&lookahead);
2159 if (symptr->flags & FLAG_SLASH) {
2161 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
2162 if (memCHRs("aAZ", lookahead.code)) {
2163 if (lookahead.howlen == e_number) count = lookahead.length;
2166 count = sv_len_utf8(*beglist);
2169 if (lookahead.code == 'Z') count++;
2172 if (lookahead.howlen == e_number && lookahead.length < items)
2173 count = lookahead.length;
2176 lookahead.howlen = e_number;
2177 lookahead.length = count;
2178 lengthcode = sv_2mortal(newSViv(count));
2181 needs_swap = NEEDS_SWAP(datumtype);
2183 /* Code inside the switch must take care to properly update
2184 cat (CUR length and '\0' termination) if it updated *cur and
2185 doesn't simply leave using break */
2186 switch (TYPE_NO_ENDIANNESS(datumtype)) {
2188 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2189 (int) TYPE_NO_MODIFIERS(datumtype));
2191 Perl_croak(aTHX_ "'%%' may not be used in pack");
2193 case '.' | TYPE_IS_SHRIEKING:
2195 if (howlen == e_star) from = start;
2196 else if (len == 0) from = cur;
2198 tempsym_t *group = symptr;
2200 while (--len && group) group = group->previous;
2201 from = group ? start + group->strbeg : start;
2204 len = SvIV_no_inf(fromstr, datumtype);
2206 case '@' | TYPE_IS_SHRIEKING:
2208 from = start + symptr->strbeg;
2210 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
2212 while (len && from < cur) {
2213 from += UTF8SKIP(from);
2217 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2219 /* Here we know from == cur */
2221 GROWING(0, cat, start, cur, len);
2222 Zero(cur, len, char);
2224 } else if (from < cur) {
2227 } else goto no_change;
2235 if (len > 0) goto grow;
2236 if (len == 0) goto no_change;
2243 tempsym_t savsym = *symptr;
2244 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2245 symptr->flags |= group_modifiers;
2246 symptr->patend = savsym.grpend;
2248 symptr->previous = &lookahead;
2251 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2252 else symptr->flags &= ~FLAG_PARSE_UTF8;
2253 was_utf8 = SvUTF8(cat);
2254 symptr->patptr = savsym.grpbeg;
2255 beglist = pack_rec(cat, symptr, beglist, endlist);
2256 if (SvUTF8(cat) != was_utf8)
2257 /* This had better be an upgrade while in utf8==0 mode */
2260 if (savsym.howlen == e_star && beglist == endlist)
2261 break; /* No way to continue */
2263 items = endlist - beglist;
2264 lookahead.flags = symptr->flags & ~group_modifiers;
2267 case 'X' | TYPE_IS_SHRIEKING:
2268 if (!len) /* Avoid division by 0 */
2275 hop += UTF8SKIP(hop);
2282 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2286 len = (cur-start) % len;
2290 if (len < 1) goto no_change;
2294 Perl_croak(aTHX_ "'%c' outside of string in pack",
2295 (int) TYPE_NO_MODIFIERS(datumtype));
2296 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2298 Perl_croak(aTHX_ "'%c' outside of string in pack",
2299 (int) TYPE_NO_MODIFIERS(datumtype));
2305 if (cur - start < len)
2306 Perl_croak(aTHX_ "'%c' outside of string in pack",
2307 (int) TYPE_NO_MODIFIERS(datumtype));
2310 if (cur < start+symptr->strbeg) {
2311 /* Make sure group starts don't point into the void */
2313 const STRLEN length = cur-start;
2314 for (group = symptr;
2315 group && length < group->strbeg;
2316 group = group->previous) group->strbeg = length;
2317 lookahead.strbeg = length;
2320 case 'x' | TYPE_IS_SHRIEKING: {
2322 if (!len) /* Avoid division by 0 */
2324 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
2325 else ai32 = (cur - start) % len;
2326 if (ai32 == 0) goto no_change;
2338 aptr = SvPV_const(fromstr, fromlen);
2339 if (DO_UTF8(fromstr)) {
2340 const char *end, *s;
2342 if (!utf8 && !SvUTF8(cat)) {
2343 marked_upgrade(aTHX_ cat, symptr);
2344 lookahead.flags |= FLAG_DO_UTF8;
2345 lookahead.strbeg = symptr->strbeg;
2348 cur = start + SvCUR(cat);
2350 if (howlen == e_star) {
2351 if (utf8) goto string_copy;
2355 end = aptr + fromlen;
2356 fromlen = datumtype == 'Z' ? len-1 : len;
2357 while ((SSize_t) fromlen > 0 && s < end) {
2362 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2365 if (datumtype == 'Z') len++;
2371 fromlen = len - fromlen;
2372 if (datumtype == 'Z') fromlen--;
2373 if (howlen == e_star) {
2375 if (datumtype == 'Z') len++;
2377 GROWING(0, cat, start, cur, len);
2378 if (!S_utf8_to_bytes(aTHX_ &aptr, end, cur, fromlen,
2379 datumtype | TYPE_IS_PACK))
2380 Perl_croak(aTHX_ "panic: predicted utf8 length not available, "
2381 "for '%c', aptr=%p end=%p cur=%p, fromlen=%zu",
2382 (int)datumtype, aptr, end, cur, fromlen);
2386 if (howlen == e_star) {
2388 if (datumtype == 'Z') len++;
2390 if (len <= (SSize_t) fromlen) {
2392 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2394 /* assumes a byte expands to at most UTF8_EXPAND bytes on
2396 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2397 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
2399 while (fromlen > 0) {
2400 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
2406 if (howlen == e_star) {
2408 if (datumtype == 'Z') len++;
2410 if (len <= (SSize_t) fromlen) {
2412 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2414 GROWING(0, cat, start, cur, len);
2415 Copy(aptr, cur, fromlen, char);
2419 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2426 const char *str, *end;
2427 SSize_t l, field_len;
2433 str = SvPV_const(fromstr, fromlen);
2434 end = str + fromlen;
2435 if (DO_UTF8(fromstr)) {
2437 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2439 utf8_source = FALSE;
2440 utf8_flags = 0; /* Unused, but keep compilers happy */
2442 if (howlen == e_star) len = fromlen;
2443 field_len = (len+7)/8;
2444 GROWING(utf8, cat, start, cur, field_len);
2445 if (len > (SSize_t)fromlen) len = fromlen;
2448 if (datumtype == 'B')
2452 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2454 } else bits |= *str++ & 1;
2455 if (l & 7) bits <<= 1;
2457 PUSH_BYTE(utf8, cur, bits);
2462 /* datumtype == 'b' */
2466 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2467 if (val & 1) bits |= 0x80;
2468 } else if (*str++ & 1)
2470 if (l & 7) bits >>= 1;
2472 PUSH_BYTE(utf8, cur, bits);
2478 if (datumtype == 'B')
2479 bits <<= 7 - (l & 7);
2481 bits >>= 7 - (l & 7);
2482 PUSH_BYTE(utf8, cur, bits);
2485 /* Determine how many chars are left in the requested field */
2487 if (howlen == e_star) field_len = 0;
2488 else field_len -= l;
2489 Zero(cur, field_len, char);
2495 const char *str, *end;
2496 SSize_t l, field_len;
2502 str = SvPV_const(fromstr, fromlen);
2503 end = str + fromlen;
2504 if (DO_UTF8(fromstr)) {
2506 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2508 utf8_source = FALSE;
2509 utf8_flags = 0; /* Unused, but keep compilers happy */
2511 if (howlen == e_star) len = fromlen;
2512 field_len = (len+1)/2;
2513 GROWING(utf8, cat, start, cur, field_len);
2514 if (!utf8_source && len > (SSize_t)fromlen) len = fromlen;
2517 if (datumtype == 'H')
2521 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2522 if (val < 256 && isALPHA(val))
2523 bits |= (val + 9) & 0xf;
2526 } else if (isALPHA(*str))
2527 bits |= (*str++ + 9) & 0xf;
2529 bits |= *str++ & 0xf;
2530 if (l & 1) bits <<= 4;
2532 PUSH_BYTE(utf8, cur, bits);
2540 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2541 if (val < 256 && isALPHA(val))
2542 bits |= ((val + 9) & 0xf) << 4;
2544 bits |= (val & 0xf) << 4;
2545 } else if (isALPHA(*str))
2546 bits |= ((*str++ + 9) & 0xf) << 4;
2548 bits |= (*str++ & 0xf) << 4;
2549 if (l & 1) bits >>= 4;
2551 PUSH_BYTE(utf8, cur, bits);
2557 PUSH_BYTE(utf8, cur, bits);
2560 /* Determine how many chars are left in the requested field */
2562 if (howlen == e_star) field_len = 0;
2563 else field_len -= l;
2564 Zero(cur, field_len, char);
2572 aiv = SvIV_no_inf(fromstr, datumtype);
2573 if ((-128 > aiv || aiv > 127))
2574 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2575 "Character in 'c' format wrapped in pack");
2576 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
2581 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2587 aiv = SvIV_no_inf(fromstr, datumtype);
2588 if ((0 > aiv || aiv > 0xff))
2589 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2590 "Character in 'C' format wrapped in pack");
2591 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
2596 U8 in_bytes = (U8)IN_BYTES;
2598 end = start+SvLEN(cat)-1;
2599 if (utf8) end -= UTF8_MAXLEN-1;
2603 auv = SvUV_no_inf(fromstr, datumtype);
2604 if (in_bytes) auv = auv % 0x100;
2609 SvCUR_set(cat, cur - start);
2611 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2612 end = start+SvLEN(cat)-UTF8_MAXLEN;
2614 cur = (char *) uvchr_to_utf8_flags((U8 *) cur, auv, 0);
2619 SvCUR_set(cat, cur - start);
2620 marked_upgrade(aTHX_ cat, symptr);
2621 lookahead.flags |= FLAG_DO_UTF8;
2622 lookahead.strbeg = symptr->strbeg;
2625 cur = start + SvCUR(cat);
2626 end = start+SvLEN(cat)-UTF8_MAXLEN;
2629 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2630 "Character in 'W' format wrapped in pack");
2635 SvCUR_set(cat, cur - start);
2636 GROWING(0, cat, start, cur, len+1);
2637 end = start+SvLEN(cat)-1;
2639 *(U8 *) cur++ = (U8)auv;
2648 if (!(symptr->flags & FLAG_DO_UTF8)) {
2649 marked_upgrade(aTHX_ cat, symptr);
2650 lookahead.flags |= FLAG_DO_UTF8;
2651 lookahead.strbeg = symptr->strbeg;
2657 end = start+SvLEN(cat);
2658 if (!utf8) end -= UTF8_MAXLEN;
2662 auv = SvUV_no_inf(fromstr, datumtype);
2664 U8 buffer[UTF8_MAXLEN+1], *endb;
2665 endb = uvchr_to_utf8_flags(buffer, UNI_TO_NATIVE(auv), 0);
2666 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
2668 SvCUR_set(cat, cur - start);
2669 GROWING(0, cat, start, cur,
2670 len+(endb-buffer)*UTF8_EXPAND);
2671 end = start+SvLEN(cat);
2673 cur = my_bytes_to_utf8(buffer, endb-buffer, cur, 0);
2677 SvCUR_set(cat, cur - start);
2678 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2679 end = start+SvLEN(cat)-UTF8_MAXLEN;
2681 cur = (char *) uvchr_to_utf8_flags((U8 *) cur,
2688 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
2694 anv = SvNV(fromstr);
2695 # if (defined(VMS) && !defined(_IEEE_FP)) || defined(DOUBLE_IS_VAX_FLOAT)
2696 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
2697 * on Alpha; fake it if we don't have them.
2701 else if (anv < -FLT_MAX)
2703 else afloat = (float)anv;
2705 # if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
2707 afloat = (float)NV_NAN;
2711 /* a simple cast to float is undefined if outside
2712 * the range of values that can be represented */
2713 afloat = (float)(anv > FLT_MAX ? NV_INF :
2714 anv < -FLT_MAX ? -NV_INF : anv);
2717 PUSH_VAR(utf8, cur, afloat, needs_swap);
2725 anv = SvNV(fromstr);
2726 # if (defined(VMS) && !defined(_IEEE_FP)) || defined(DOUBLE_IS_VAX_FLOAT)
2727 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
2728 * on Alpha; fake it if we don't have them.
2732 else if (anv < -DBL_MAX)
2734 else adouble = (double)anv;
2736 adouble = (double)anv;
2738 PUSH_VAR(utf8, cur, adouble, needs_swap);
2743 Zero(&anv, 1, NV); /* can be long double with unused bits */
2747 /* to work round a gcc/x86 bug; don't use SvNV */
2748 anv.nv = sv_2nv(fromstr);
2749 # if defined(LONGDOUBLE_X86_80_BIT) && defined(USE_LONG_DOUBLE) \
2750 && LONG_DOUBLESIZE > 10
2751 /* GCC sometimes overwrites the padding in the
2753 Zero(anv.bytes+10, sizeof(anv.bytes) - 10, U8);
2756 anv.nv = SvNV(fromstr);
2758 PUSH_BYTES(utf8, cur, anv.bytes, sizeof(anv.bytes), needs_swap);
2762 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2765 /* long doubles can have unused bits, which may be nonzero */
2766 Zero(&aldouble, 1, long double);
2770 /* to work round a gcc/x86 bug; don't use SvNV */
2771 aldouble.ld = (long double)sv_2nv(fromstr);
2772 # if defined(LONGDOUBLE_X86_80_BIT) && LONG_DOUBLESIZE > 10
2773 /* GCC sometimes overwrites the padding in the
2775 Zero(aldouble.bytes+10, sizeof(aldouble.bytes) - 10, U8);
2778 aldouble.ld = (long double)SvNV(fromstr);
2780 PUSH_BYTES(utf8, cur, aldouble.bytes, sizeof(aldouble.bytes),
2786 case 'n' | TYPE_IS_SHRIEKING:
2791 ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
2792 ai16 = PerlSock_htons(ai16);
2793 PUSH16(utf8, cur, &ai16, FALSE);
2796 case 'v' | TYPE_IS_SHRIEKING:
2801 ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
2803 PUSH16(utf8, cur, &ai16, FALSE);
2806 case 'S' | TYPE_IS_SHRIEKING:
2807 #if SHORTSIZE != SIZE16
2809 unsigned short aushort;
2811 aushort = SvUV_no_inf(fromstr, datumtype);
2812 PUSH_VAR(utf8, cur, aushort, needs_swap);
2822 au16 = (U16)SvUV_no_inf(fromstr, datumtype);
2823 PUSH16(utf8, cur, &au16, needs_swap);
2826 case 's' | TYPE_IS_SHRIEKING:
2827 #if SHORTSIZE != SIZE16
2831 ashort = SvIV_no_inf(fromstr, datumtype);
2832 PUSH_VAR(utf8, cur, ashort, needs_swap);
2842 ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
2843 PUSH16(utf8, cur, &ai16, needs_swap);
2847 case 'I' | TYPE_IS_SHRIEKING:
2851 auint = SvUV_no_inf(fromstr, datumtype);
2852 PUSH_VAR(utf8, cur, auint, needs_swap);
2859 aiv = SvIV_no_inf(fromstr, datumtype);
2860 PUSH_VAR(utf8, cur, aiv, needs_swap);
2867 auv = SvUV_no_inf(fromstr, datumtype);
2868 PUSH_VAR(utf8, cur, auv, needs_swap);
2875 S_sv_check_infnan(aTHX_ fromstr, datumtype);
2876 anv = SvNV_nomg(fromstr);
2880 SvCUR_set(cat, cur - start);
2881 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
2884 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
2885 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
2886 any negative IVs will have already been got by the croak()
2887 above. IOK is untrue for fractions, so we test them
2888 against UV_MAX_P1. */
2889 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
2890 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
2891 char *in = buf + sizeof(buf);
2892 UV auv = SvUV_nomg(fromstr);
2895 *--in = (char)((auv & 0x7f) | 0x80);
2898 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
2899 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2900 in, (buf + sizeof(buf)) - in);
2901 } else if (SvPOKp(fromstr))
2903 else if (SvNOKp(fromstr)) {
2904 /* 10**NV_MAX_10_EXP is the largest power of 10
2905 so 10**(NV_MAX_10_EXP+1) is definitely unrepresentable
2906 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
2907 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
2908 And with that many bytes only Inf can overflow.
2909 Some C compilers are strict about integral constant
2910 expressions so we conservatively divide by a slightly
2911 smaller integer instead of multiplying by the exact
2912 floating-point value.
2914 #ifdef NV_MAX_10_EXP
2915 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
2916 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
2918 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
2919 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
2921 char *in = buf + sizeof(buf);
2923 anv = Perl_floor(anv);
2925 const NV next = Perl_floor(anv / 128);
2926 if (in <= buf) /* this cannot happen ;-) */
2927 Perl_croak(aTHX_ "Cannot compress integer in pack");
2928 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
2931 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
2932 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2933 in, (buf + sizeof(buf)) - in);
2942 /* Copy string and check for compliance */
2943 from = SvPV_nomg_const(fromstr, len);
2944 if ((norm = is_an_int(from, len)) == NULL)
2945 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
2947 Newx(result, len, char);
2950 while (!done) *--in = div128(norm, &done) | 0x80;
2951 result[len - 1] &= 0x7F; /* clear continue bit */
2952 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2953 in, (result + len) - in);
2955 SvREFCNT_dec(norm); /* free norm */
2960 case 'i' | TYPE_IS_SHRIEKING:
2964 aint = SvIV_no_inf(fromstr, datumtype);
2965 PUSH_VAR(utf8, cur, aint, needs_swap);
2968 case 'N' | TYPE_IS_SHRIEKING:
2973 au32 = SvUV_no_inf(fromstr, datumtype);
2974 au32 = PerlSock_htonl(au32);
2975 PUSH32(utf8, cur, &au32, FALSE);
2978 case 'V' | TYPE_IS_SHRIEKING:
2983 au32 = SvUV_no_inf(fromstr, datumtype);
2985 PUSH32(utf8, cur, &au32, FALSE);
2988 case 'L' | TYPE_IS_SHRIEKING:
2989 #if LONGSIZE != SIZE32
2991 unsigned long aulong;
2993 aulong = SvUV_no_inf(fromstr, datumtype);
2994 PUSH_VAR(utf8, cur, aulong, needs_swap);
3004 au32 = SvUV_no_inf(fromstr, datumtype);
3005 PUSH32(utf8, cur, &au32, needs_swap);
3008 case 'l' | TYPE_IS_SHRIEKING:
3009 #if LONGSIZE != SIZE32
3013 along = SvIV_no_inf(fromstr, datumtype);
3014 PUSH_VAR(utf8, cur, along, needs_swap);
3024 ai32 = SvIV_no_inf(fromstr, datumtype);
3025 PUSH32(utf8, cur, &ai32, needs_swap);
3028 #if defined(HAS_QUAD) && IVSIZE >= 8
3033 auquad = (Uquad_t) SvUV_no_inf(fromstr, datumtype);
3034 PUSH_VAR(utf8, cur, auquad, needs_swap);
3041 aquad = (Quad_t)SvIV_no_inf(fromstr, datumtype);
3042 PUSH_VAR(utf8, cur, aquad, needs_swap);
3047 len = 1; /* assume SV is correct length */
3048 GROWING(utf8, cat, start, cur, sizeof(char *));
3055 SvGETMAGIC(fromstr);
3056 if (!SvOK(fromstr)) aptr = NULL;
3058 /* XXX better yet, could spirit away the string to
3059 * a safe spot and hang on to it until the result
3060 * of pack() (and all copies of the result) are
3063 if (((SvTEMP(fromstr) && SvREFCNT(fromstr) == 1)
3064 || (SvPADTMP(fromstr) &&
3065 !SvREADONLY(fromstr)))) {
3066 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3067 "Attempt to pack pointer to temporary value");
3069 if (SvPOK(fromstr) || SvNIOK(fromstr))
3070 aptr = SvPV_nomg_const_nolen(fromstr);
3072 aptr = SvPV_force_flags_nolen(fromstr, 0);
3074 PUSH_VAR(utf8, cur, aptr, needs_swap);
3078 const char *aptr, *aend;
3082 if (len <= 2) len = 45;
3083 else len = len / 3 * 3;
3085 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3086 "Field too wide in 'u' format in pack");
3089 aptr = SvPV_const(fromstr, fromlen);
3090 from_utf8 = DO_UTF8(fromstr);
3092 aend = aptr + fromlen;
3093 fromlen = sv_len_utf8_nomg(fromstr);
3094 } else aend = NULL; /* Unused, but keep compilers happy */
3095 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
3096 while (fromlen > 0) {
3099 U8 hunk[1+63/3*4+1];
3101 if ((SSize_t)fromlen > len)
3107 if (!S_utf8_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3108 'u' | TYPE_IS_PACK)) {
3110 SvCUR_set(cat, cur - start);
3111 Perl_croak(aTHX_ "panic: string is shorter than advertised, "
3112 "aptr=%p, aend=%p, buffer=%p, todo=%zd",
3113 aptr, aend, buffer, todo);
3115 end = doencodes(hunk, (const U8 *)buffer, todo);
3117 end = doencodes(hunk, (const U8 *)aptr, todo);
3120 PUSH_BYTES(utf8, cur, hunk, end-hunk, 0);
3127 SvCUR_set(cat, cur - start);
3129 *symptr = lookahead;
3138 dSP; dMARK; dORIGMARK; dTARGET;
3141 SV *pat_sv = *++MARK;
3142 const char *pat = SvPV_const(pat_sv, fromlen);
3143 const char *patend = pat + fromlen;
3149 packlist(cat, pat, patend, MARK, SP + 1);
3153 const char * result = SvPV_nomg(cat, result_len);
3154 const U8 * error_pos;
3156 if (! is_utf8_string_loc((U8 *) result, result_len, &error_pos)) {
3157 _force_out_malformed_utf8_message(error_pos,
3158 (U8 *) result + result_len,
3162 NOT_REACHED; /* NOTREACHED */
3173 * ex: set ts=8 sts=4 sw=4 et: