3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
18 * [p.653 of _The Lord of the Rings_, IV/iv: "Of Herbs and Stewed Rabbit"]
21 /* This file contains pp ("push/pop") functions that
22 * execute the opcodes that make up a perl program. A typical pp function
23 * expects to find its arguments on the stack, and usually pushes its
24 * results onto the stack, hence the 'pp' terminology. Each OP structure
25 * contains a pointer to the relevant pp_foo() function.
27 * This particular file just contains pp_pack() and pp_unpack(). See the
28 * other pp*.c files for the rest of the pp_ functions.
32 #define PERL_IN_PP_PACK_C
35 /* Types used by pack/unpack */
37 e_no_len, /* no length */
38 e_number, /* number, [] */
42 typedef struct tempsym {
43 const char* patptr; /* current template char */
44 const char* patend; /* one after last char */
45 const char* grpbeg; /* 1st char of ()-group */
46 const char* grpend; /* end of ()-group */
47 I32 code; /* template code (!<>) */
48 U32 flags; /* /=4, comma=2, pack=1 */
49 /* and group modifiers */
50 SSize_t length; /* length/repeat count */
51 howlen_t howlen; /* how length is given */
52 int level; /* () nesting level */
53 STRLEN strbeg; /* offset of group start */
54 struct tempsym *previous; /* previous group */
57 #define TEMPSYM_INIT(symptr, p, e, f) \
59 (symptr)->patptr = (p); \
60 (symptr)->patend = (e); \
61 (symptr)->grpbeg = NULL; \
62 (symptr)->grpend = NULL; \
63 (symptr)->grpend = NULL; \
65 (symptr)->length = 0; \
66 (symptr)->howlen = e_no_len; \
67 (symptr)->level = 0; \
68 (symptr)->flags = (f); \
69 (symptr)->strbeg = 0; \
70 (symptr)->previous = NULL; \
78 #if defined(HAS_LONG_DOUBLE)
81 U8 bytes[sizeof(long double)];
88 /* Maximum number of bytes to which a byte can grow due to upgrade */
92 * Offset for integer pack/unpack.
94 * On architectures where I16 and I32 aren't really 16 and 32 bits,
95 * which for now are all Crays, pack and unpack have to play games.
99 * These values are required for portability of pack() output.
100 * If they're not right on your machine, then pack() and unpack()
101 * wouldn't work right anyway; you'll need to apply the Cray hack.
102 * (I'd like to check them with #if, but you can't use sizeof() in
103 * the preprocessor.) --???
106 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
107 defines are now in config.h. --Andy Dougherty April 1998
112 /* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
115 #if U16SIZE <= SIZE16 && U32SIZE <= SIZE32
116 # define OFF16(p) ((char *) (p))
117 # define OFF32(p) ((char *) (p))
118 #elif BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
119 # define OFF16(p) ((char*)(p))
120 # define OFF32(p) ((char*)(p))
121 #elif BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
122 # define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
123 # define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
125 # error "bad cray byte order"
128 #define PUSH16(utf8, cur, p, needs_swap) \
129 PUSH_BYTES(utf8, cur, OFF16(p), SIZE16, needs_swap)
130 #define PUSH32(utf8, cur, p, needs_swap) \
131 PUSH_BYTES(utf8, cur, OFF32(p), SIZE32, needs_swap)
133 #if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
134 # define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_LITTLE_ENDIAN)
135 #elif BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
136 # define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_BIG_ENDIAN)
138 # error "Unsupported byteorder"
139 /* Need to add code here to re-instate mixed endian support.
140 NEEDS_SWAP would need to hold a flag indicating which action to
141 take, and S_reverse_copy and the code in S_utf8_to_bytes would need
142 logic adding to deal with any mixed-endian transformations needed.
146 /* Only to be used inside a loop (see the break) */
147 #define SHIFT_BYTES(utf8, s, strend, buf, len, datumtype, needs_swap) \
149 if (UNLIKELY(utf8)) { \
150 if (!S_utf8_to_bytes(aTHX_ &s, strend, \
151 (char *) (buf), len, datumtype)) break; \
153 if (UNLIKELY(needs_swap)) \
154 S_reverse_copy(s, (char *) (buf), len); \
156 Copy(s, (char *) (buf), len, char); \
161 #define SHIFT16(utf8, s, strend, p, datumtype, needs_swap) \
162 SHIFT_BYTES(utf8, s, strend, OFF16(p), SIZE16, datumtype, needs_swap)
164 #define SHIFT32(utf8, s, strend, p, datumtype, needs_swap) \
165 SHIFT_BYTES(utf8, s, strend, OFF32(p), SIZE32, datumtype, needs_swap)
167 #define SHIFT_VAR(utf8, s, strend, var, datumtype, needs_swap) \
168 SHIFT_BYTES(utf8, s, strend, &(var), sizeof(var), datumtype, needs_swap)
170 #define PUSH_VAR(utf8, aptr, var, needs_swap) \
171 PUSH_BYTES(utf8, aptr, &(var), sizeof(var), needs_swap)
173 /* Avoid stack overflow due to pathological templates. 100 should be plenty. */
174 #define MAX_SUB_TEMPLATE_LEVEL 100
176 /* flags (note that type modifiers can also be used as flags!) */
177 #define FLAG_WAS_UTF8 0x40
178 #define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
179 #define FLAG_UNPACK_ONLY_ONE 0x10
180 #define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
181 #define FLAG_SLASH 0x04
182 #define FLAG_COMMA 0x02
183 #define FLAG_PACK 0x01
186 S_mul128(pTHX_ SV *sv, U8 m)
189 char *s = SvPV(sv, len);
192 PERL_ARGS_ASSERT_MUL128;
194 if (! memBEGINs(s, len, "0000")) { /* need to grow sv */
195 SV * const tmpNew = newSVpvs("0000000000");
197 sv_catsv(tmpNew, sv);
198 SvREFCNT_dec(sv); /* free old sv */
203 while (!*t) /* trailing '\0'? */
206 const U32 i = ((*t - '0') << 7) + m;
207 *(t--) = '0' + (char)(i % 10);
213 /* Explosives and implosives. */
215 #define ISUUCHAR(ch) inRANGE(NATIVE_TO_LATIN1(ch), \
216 NATIVE_TO_LATIN1(' '), \
217 NATIVE_TO_LATIN1('a') - 1)
220 #define TYPE_IS_SHRIEKING 0x100
221 #define TYPE_IS_BIG_ENDIAN 0x200
222 #define TYPE_IS_LITTLE_ENDIAN 0x400
223 #define TYPE_IS_PACK 0x800
224 #define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
225 #define TYPE_MODIFIERS(t) ((t) & ~0xFF)
226 #define TYPE_NO_MODIFIERS(t) ((U8) (t))
228 # define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
229 # define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
231 # define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
233 #define PACK_SIZE_CANNOT_CSUM 0x80
234 #define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
235 #define PACK_SIZE_MASK 0x3F
237 #include "packsizetables.inc"
240 S_reverse_copy(const char *src, char *dest, STRLEN len)
248 utf8_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
256 val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
257 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
258 if (retlen == (STRLEN) -1)
260 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
261 (int) TYPE_NO_MODIFIERS(datumtype));
263 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
264 "Character in '%c' format wrapped in unpack",
265 (int) TYPE_NO_MODIFIERS(datumtype));
272 #define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
273 utf8_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
277 S_utf8_to_bytes(pTHX_ const char **s, const char *end, const char *buf, SSize_t buf_len, I32 datumtype)
281 const char *from = *s;
283 const U32 flags = ckWARN(WARN_UTF8) ?
284 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
285 const bool needs_swap = NEEDS_SWAP(datumtype);
287 if (UNLIKELY(needs_swap))
290 for (;buf_len > 0; buf_len--) {
291 if (from >= end) return FALSE;
292 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
293 if (retlen == (STRLEN) -1) {
294 from += UTF8_SAFE_SKIP(from, end);
296 } else from += retlen;
301 if (UNLIKELY(needs_swap))
302 *(U8 *)--buf = (U8)val;
304 *(U8 *)buf++ = (U8)val;
306 /* We have enough characters for the buffer. Did we have problems ? */
309 /* Rewalk the string fragment while warning */
311 const U32 flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
312 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
313 if (ptr >= end) break;
314 utf8n_to_uvchr((U8 *) ptr, end-ptr, &retlen, flags);
316 if (from > end) from = end;
319 Perl_ck_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
320 WARN_PACK : WARN_UNPACK),
321 "Character(s) in '%c' format wrapped in %s",
322 (int) TYPE_NO_MODIFIERS(datumtype),
323 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
330 S_my_bytes_to_utf8(const U8 *start, STRLEN len, char *dest, const bool needs_swap) {
331 PERL_ARGS_ASSERT_MY_BYTES_TO_UTF8;
333 if (UNLIKELY(needs_swap)) {
334 const U8 *p = start + len;
335 while (p-- > start) {
336 append_utf8_from_native_byte(*p, (U8 **) & dest);
339 const U8 * const end = start + len;
340 while (start < end) {
341 append_utf8_from_native_byte(*start, (U8 **) & dest);
348 #define PUSH_BYTES(utf8, cur, buf, len, needs_swap) \
350 if (UNLIKELY(utf8)) \
351 (cur) = my_bytes_to_utf8((U8 *) buf, len, (cur), needs_swap); \
353 if (UNLIKELY(needs_swap)) \
354 S_reverse_copy((char *)(buf), cur, len); \
356 Copy(buf, cur, len, char); \
361 #define SAFE_UTF8_EXPAND(var) \
363 if ((var) > SSize_t_MAX / UTF8_EXPAND) \
364 Perl_croak(aTHX_ "%s", "Out of memory during pack()"); \
365 (var) = (var) * UTF8_EXPAND; \
368 #define GROWING2(utf8, cat, start, cur, item_size, item_count) \
370 if (SSize_t_MAX / (item_size) < (item_count)) \
371 Perl_croak(aTHX_ "%s", "Out of memory during pack()"); \
372 GROWING((utf8), (cat), (start), (cur), (item_size) * (item_count)); \
375 #define GROWING(utf8, cat, start, cur, in_len) \
377 STRLEN glen = (in_len); \
378 STRLEN catcur = (STRLEN)((cur) - (start)); \
379 if (utf8) SAFE_UTF8_EXPAND(glen); \
380 if (SSize_t_MAX - glen < catcur) \
381 Perl_croak(aTHX_ "%s", "Out of memory during pack()"); \
382 if (catcur + glen >= SvLEN(cat)) { \
383 (start) = sv_exp_grow(cat, glen); \
384 (cur) = (start) + SvCUR(cat); \
388 #define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
390 const STRLEN glen = (in_len); \
392 if (utf8) SAFE_UTF8_EXPAND(gl); \
393 if ((cur) + gl >= (start) + SvLEN(cat)) { \
395 SvCUR_set((cat), (cur) - (start)); \
396 (start) = sv_exp_grow(cat, gl); \
397 (cur) = (start) + SvCUR(cat); \
399 PUSH_BYTES(utf8, cur, buf, glen, 0); \
402 #define PUSH_BYTE(utf8, s, byte) \
405 const U8 au8 = (byte); \
406 (s) = my_bytes_to_utf8(&au8, 1, (s), 0);\
407 } else *(U8 *)(s)++ = (byte); \
410 /* Only to be used inside a loop (see the break) */
411 #define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
414 if (str >= end) break; \
415 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
416 if (retlen == (STRLEN) -1) { \
418 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
423 static const char *_action( const tempsym_t* symptr )
425 return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
428 /* Returns the sizeof() struct described by pat */
430 S_measure_struct(pTHX_ tempsym_t* symptr)
434 PERL_ARGS_ASSERT_MEASURE_STRUCT;
436 while (next_symbol(symptr)) {
439 switch (symptr->howlen) {
441 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
445 /* e_no_len and e_number */
446 len = symptr->length;
450 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
453 /* endianness doesn't influence the size of a type */
454 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
456 /* diag_listed_as: Invalid type '%s' in %s */
457 Perl_croak(aTHX_ "Invalid type '%c' in %s",
458 (int)TYPE_NO_MODIFIERS(symptr->code),
460 case '.' | TYPE_IS_SHRIEKING:
461 case '@' | TYPE_IS_SHRIEKING:
465 case 'U': /* XXXX Is it correct? */
468 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
469 (int) TYPE_NO_MODIFIERS(symptr->code),
476 tempsym_t savsym = *symptr;
477 symptr->patptr = savsym.grpbeg;
478 symptr->patend = savsym.grpend;
479 /* XXXX Theoretically, we need to measure many times at
480 different positions, since the subexpression may contain
481 alignment commands, but be not of aligned length.
482 Need to detect this and croak(). */
483 size = measure_struct(symptr);
487 case 'X' | TYPE_IS_SHRIEKING:
488 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
490 if (!len) /* Avoid division by 0 */
492 len = total % len; /* Assumed: the start is aligned. */
497 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
499 case 'x' | TYPE_IS_SHRIEKING:
500 if (!len) /* Avoid division by 0 */
502 star = total % len; /* Assumed: the start is aligned. */
503 if (star) /* Other portable ways? */
527 size = sizeof(char*);
537 /* locate matching closing parenthesis or bracket
538 * returns char pointer to char after match, or NULL
541 S_group_end(pTHX_ const char *patptr, const char *patend, char ender)
543 PERL_ARGS_ASSERT_GROUP_END;
544 Size_t opened = 0; /* number of pending opened brackets */
546 while (patptr < patend) {
547 const char c = *patptr++;
549 if (opened == 0 && c == ender)
552 while (patptr < patend && *patptr != '\n')
555 } else if (c == '(' || c == '[')
557 else if (c == ')' || c == ']') {
559 Perl_croak(aTHX_ "Mismatched brackets in template");
563 Perl_croak(aTHX_ "No group ending character '%c' found in template",
565 NOT_REACHED; /* NOTREACHED */
569 /* Convert unsigned decimal number to binary.
570 * Expects a pointer to the first digit and address of length variable
571 * Advances char pointer to 1st non-digit char and returns number
574 S_get_num(pTHX_ const char *patptr, SSize_t *lenptr )
576 SSize_t len = *patptr++ - '0';
578 PERL_ARGS_ASSERT_GET_NUM;
580 while (isDIGIT(*patptr)) {
581 SSize_t nlen = (len * 10) + (*patptr++ - '0');
582 if (nlen < 0 || nlen/10 != len)
583 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
590 /* The marvellous template parsing routine: Using state stored in *symptr,
591 * locates next template code and count
594 S_next_symbol(pTHX_ tempsym_t* symptr )
596 const char* patptr = symptr->patptr;
597 const char* const patend = symptr->patend;
599 PERL_ARGS_ASSERT_NEXT_SYMBOL;
601 symptr->flags &= ~FLAG_SLASH;
603 while (patptr < patend) {
604 if (isSPACE(*patptr))
606 else if (*patptr == '#') {
608 while (patptr < patend && *patptr != '\n')
613 /* We should have found a template code */
614 I32 code = (U8) *patptr++;
615 U32 inherited_modifiers = 0;
617 /* unrecognised characters in pack/unpack formats were made fatal in
618 * 5.004, with an exception added in 5.004_04 for ',' to "just" warn: */
620 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
621 symptr->flags |= FLAG_COMMA;
622 /* diag_listed_as: Invalid type '%s' in %s */
623 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
624 "Invalid type ',' in %s", _action( symptr ) );
629 /* for '(', skip to ')' */
631 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
632 Perl_croak(aTHX_ "()-group starts with a count in %s",
634 symptr->grpbeg = patptr;
635 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
636 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
637 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
641 /* look for group modifiers to inherit */
642 if (TYPE_ENDIANNESS(symptr->flags)) {
643 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
644 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
647 /* look for modifiers */
648 while (patptr < patend) {
653 modifier = TYPE_IS_SHRIEKING;
654 allowed = "sSiIlLxXnNvV@.";
657 modifier = TYPE_IS_BIG_ENDIAN;
658 allowed = ENDIANNESS_ALLOWED_TYPES;
661 modifier = TYPE_IS_LITTLE_ENDIAN;
662 allowed = ENDIANNESS_ALLOWED_TYPES;
673 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
674 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
675 allowed, _action( symptr ) );
677 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
678 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
679 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
680 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
681 TYPE_ENDIANNESS_MASK)
682 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
683 *patptr, _action( symptr ) );
685 if ((code & modifier)) {
686 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
687 "Duplicate modifier '%c' after '%c' in %s",
688 *patptr, (int) TYPE_NO_MODIFIERS(code),
696 /* inherit modifiers */
697 code |= inherited_modifiers;
699 /* look for count and/or / */
700 if (patptr < patend) {
701 if (isDIGIT(*patptr)) {
702 patptr = get_num( patptr, &symptr->length );
703 symptr->howlen = e_number;
705 } else if (*patptr == '*') {
707 symptr->howlen = e_star;
709 } else if (*patptr == '[') {
710 const char* lenptr = ++patptr;
711 symptr->howlen = e_number;
712 patptr = group_end( patptr, patend, ']' ) + 1;
713 /* what kind of [] is it? */
714 if (isDIGIT(*lenptr)) {
715 lenptr = get_num( lenptr, &symptr->length );
717 Perl_croak(aTHX_ "Malformed integer in [] in %s",
720 tempsym_t savsym = *symptr;
721 symptr->patend = patptr-1;
722 symptr->patptr = lenptr;
723 savsym.length = measure_struct(symptr);
727 symptr->howlen = e_no_len;
732 while (patptr < patend) {
733 if (isSPACE(*patptr))
735 else if (*patptr == '#') {
737 while (patptr < patend && *patptr != '\n')
742 if (*patptr == '/') {
743 symptr->flags |= FLAG_SLASH;
745 if (patptr < patend &&
746 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
747 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
754 /* at end - no count, no / */
755 symptr->howlen = e_no_len;
760 symptr->patptr = patptr;
764 symptr->patptr = patptr;
769 There is no way to cleanly handle the case where we should process the
770 string per byte in its upgraded form while it's really in downgraded form
771 (e.g. estimates like strend-s as an upper bound for the number of
772 characters left wouldn't work). So if we foresee the need of this
773 (pattern starts with U or contains U0), we want to work on the encoded
774 version of the string. Users are advised to upgrade their pack string
775 themselves if they need to do a lot of unpacks like this on it
778 need_utf8(const char *pat, const char *patend)
782 PERL_ARGS_ASSERT_NEED_UTF8;
784 while (pat < patend) {
787 pat = (const char *) memchr(pat, '\n', patend-pat);
788 if (!pat) return FALSE;
789 } else if (pat[0] == 'U') {
790 if (first || pat[1] == '0') return TRUE;
791 } else first = FALSE;
798 first_symbol(const char *pat, const char *patend) {
799 PERL_ARGS_ASSERT_FIRST_SYMBOL;
801 while (pat < patend) {
802 if (pat[0] != '#') return pat[0];
804 pat = (const char *) memchr(pat, '\n', patend-pat);
813 =for apidoc unpackstring
815 The engine implementing the C<unpack()> Perl function.
817 Using the template C<pat..patend>, this function unpacks the string
818 C<s..strend> into a number of mortal SVs, which it pushes onto the perl
819 argument (C<@_>) stack (so you will need to issue a C<PUTBACK> before and
820 C<SPAGAIN> after the call to this function). It returns the number of
823 The C<strend> and C<patend> pointers should point to the byte following the
824 last character of each string.
826 Although this function returns its values on the perl argument stack, it
827 doesn't take any parameters from that stack (and thus in particular
828 there's no need to do a C<PUSHMARK> before calling it, unlike L</call_pv> for
834 Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
838 PERL_ARGS_ASSERT_UNPACKSTRING;
840 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
841 else if (need_utf8(pat, patend)) {
842 /* We probably should try to avoid this in case a scalar context call
843 wouldn't get to the "U0" */
844 STRLEN len = strend - s;
845 s = (char *) bytes_to_utf8((U8 *) s, &len);
848 flags |= FLAG_DO_UTF8;
851 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
852 flags |= FLAG_PARSE_UTF8;
854 TEMPSYM_INIT(&sym, pat, patend, flags);
856 return unpack_rec(&sym, s, s, strend, NULL );
860 S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
864 const SSize_t start_sp_offset = SP - PL_stack_base;
866 SSize_t checksum = 0;
869 const SSize_t bits_in_uv = CHAR_BIT * sizeof(cuv);
871 bool explicit_length;
872 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
873 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
875 PERL_ARGS_ASSERT_UNPACK_REC;
877 symptr->strbeg = s - strbeg;
879 while (next_symbol(symptr)) {
882 I32 datumtype = symptr->code;
884 /* do first one only unless in list context
885 / is implemented by unpacking the count, then popping it from the
886 stack, so must check that we're not in the middle of a / */
888 && (SP - PL_stack_base == start_sp_offset + 1)
889 && (datumtype != '/') ) /* XXX can this be omitted */
892 switch (howlen = symptr->howlen) {
894 len = strend - strbeg; /* long enough */
897 /* e_no_len and e_number */
898 len = symptr->length;
902 explicit_length = TRUE;
904 beyond = s >= strend;
906 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
908 /* props nonzero means we can process this letter. */
909 const SSize_t size = props & PACK_SIZE_MASK;
910 const SSize_t howmany = (strend - s) / size;
914 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
915 if (len && unpack_only_one) len = 1;
921 needs_swap = NEEDS_SWAP(datumtype);
923 switch(TYPE_NO_ENDIANNESS(datumtype)) {
925 /* diag_listed_as: Invalid type '%s' in %s */
926 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
929 if (howlen == e_no_len)
930 len = 16; /* len is not specified */
938 tempsym_t savsym = *symptr;
939 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
940 symptr->flags |= group_modifiers;
941 symptr->patend = savsym.grpend;
942 symptr->previous = &savsym;
945 if (len && unpack_only_one) len = 1;
947 symptr->patptr = savsym.grpbeg;
948 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
949 else symptr->flags &= ~FLAG_PARSE_UTF8;
950 unpack_rec(symptr, s, strbeg, strend, &s);
951 if (s == strend && savsym.howlen == e_star)
952 break; /* No way to continue */
955 savsym.flags = symptr->flags & ~group_modifiers;
959 case '.' | TYPE_IS_SHRIEKING:
963 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
964 if (howlen == e_star) from = strbeg;
965 else if (len <= 0) from = s;
967 tempsym_t *group = symptr;
969 while (--len && group) group = group->previous;
970 from = group ? strbeg + group->strbeg : strbeg;
973 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
974 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
978 case '@' | TYPE_IS_SHRIEKING:
980 s = strbeg + symptr->strbeg;
981 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
985 Perl_croak(aTHX_ "'@' outside of string in unpack");
990 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
993 Perl_croak(aTHX_ "'@' outside of string in unpack");
997 case 'X' | TYPE_IS_SHRIEKING:
998 if (!len) /* Avoid division by 0 */
1001 const char *hop, *last;
1003 hop = last = strbeg;
1005 hop += UTF8SKIP(hop);
1012 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1016 len = (s - strbeg) % len;
1022 Perl_croak(aTHX_ "'X' outside of string in unpack");
1023 while (--s, UTF8_IS_CONTINUATION(*s)) {
1025 Perl_croak(aTHX_ "'X' outside of string in unpack");
1030 if (len > s - strbeg)
1031 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1035 case 'x' | TYPE_IS_SHRIEKING: {
1037 if (!len) /* Avoid division by 0 */
1039 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1040 else ai32 = (s - strbeg) % len;
1041 if (ai32 == 0) break;
1049 Perl_croak(aTHX_ "'x' outside of string in unpack");
1054 if (len > strend - s)
1055 Perl_croak(aTHX_ "'x' outside of string in unpack");
1060 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1066 /* Preliminary length estimate is assumed done in 'W' */
1067 if (len > strend - s) len = strend - s;
1073 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1074 if (hop >= strend) {
1076 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1081 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1083 } else if (len > strend - s)
1086 if (datumtype == 'Z') {
1087 /* 'Z' strips stuff after first null */
1088 const char *ptr, *end;
1090 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
1091 sv = newSVpvn(s, ptr-s);
1092 if (howlen == e_star) /* exact for 'Z*' */
1093 len = ptr-s + (ptr != strend ? 1 : 0);
1094 } else if (datumtype == 'A') {
1095 /* 'A' strips both nulls and spaces */
1097 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1098 for (ptr = s+len-1; ptr >= s; ptr--) {
1100 && !UTF8_IS_CONTINUATION(*ptr)
1101 && !isSPACE_utf8_safe(ptr, strend))
1106 if (ptr >= s) ptr += UTF8SKIP(ptr);
1109 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1111 for (ptr = s+len-1; ptr >= s; ptr--)
1112 if (*ptr != 0 && !isSPACE(*ptr)) break;
1115 sv = newSVpvn(s, ptr-s);
1116 } else sv = newSVpvn(s, len);
1120 /* Undo any upgrade done due to need_utf8() */
1121 if (!(symptr->flags & FLAG_WAS_UTF8))
1122 sv_utf8_downgrade(sv, 0);
1130 if (howlen == e_star || len > (strend - s) * 8)
1131 len = (strend - s) * 8;
1134 while (len >= 8 && s < strend) {
1135 cuv += PL_bitcount[utf8_to_byte(aTHX_ &s, strend, datumtype)];
1140 cuv += PL_bitcount[*(U8 *)s++];
1143 if (len && s < strend) {
1145 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1146 if (datumtype == 'b')
1148 if (bits & 1) cuv++;
1153 if (bits & 0x80) cuv++;
1160 sv = sv_2mortal(newSV(len ? len : 1));
1163 if (datumtype == 'b') {
1165 const SSize_t ai32 = len;
1166 for (len = 0; len < ai32; len++) {
1167 if (len & 7) bits >>= 1;
1169 if (s >= strend) break;
1170 bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
1171 } else bits = *(U8 *) s++;
1172 *str++ = bits & 1 ? '1' : '0';
1176 const SSize_t ai32 = len;
1177 for (len = 0; len < ai32; len++) {
1178 if (len & 7) bits <<= 1;
1180 if (s >= strend) break;
1181 bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
1182 } else bits = *(U8 *) s++;
1183 *str++ = bits & 0x80 ? '1' : '0';
1187 SvCUR_set(sv, str - SvPVX_const(sv));
1194 /* Preliminary length estimate, acceptable for utf8 too */
1195 if (howlen == e_star || len > (strend - s) * 2)
1196 len = (strend - s) * 2;
1198 sv = sv_2mortal(newSV(len ? len : 1));
1202 if (datumtype == 'h') {
1205 for (len = 0; len < ai32; len++) {
1206 if (len & 1) bits >>= 4;
1208 if (s >= strend) break;
1209 bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
1210 } else bits = * (U8 *) s++;
1212 *str++ = PL_hexdigit[bits & 15];
1216 const SSize_t ai32 = len;
1217 for (len = 0; len < ai32; len++) {
1218 if (len & 1) bits <<= 4;
1220 if (s >= strend) break;
1221 bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
1222 } else bits = *(U8 *) s++;
1224 *str++ = PL_hexdigit[(bits >> 4) & 15];
1229 SvCUR_set(sv, str - SvPVX_const(sv));
1236 if (explicit_length)
1237 /* Switch to "character" mode */
1238 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1243 while (len-- > 0 && s < strend) {
1248 aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1249 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1250 if (retlen == (STRLEN) -1)
1251 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1255 aint = *(U8 *)(s)++;
1256 if (aint >= 128 && datumtype != 'C') /* fake up signed chars */
1260 else if (checksum > bits_in_uv)
1261 cdouble += (NV)aint;
1269 while (len-- > 0 && s < strend) {
1271 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1272 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1273 if (retlen == (STRLEN) -1)
1274 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1278 else if (checksum > bits_in_uv)
1279 cdouble += (NV) val;
1283 } else if (!checksum)
1285 const U8 ch = *(U8 *) s++;
1288 else if (checksum > bits_in_uv)
1289 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1291 while (len-- > 0) cuv += *(U8 *) s++;
1295 if (explicit_length && howlen != e_star) {
1296 /* Switch to "bytes in UTF-8" mode */
1297 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
1299 /* Should be impossible due to the need_utf8() test */
1300 Perl_croak(aTHX_ "U0 mode on a byte string");
1304 if (len > strend - s) len = strend - s;
1306 if (len && unpack_only_one) len = 1;
1310 while (len-- > 0 && s < strend) {
1314 U8 result[UTF8_MAXLEN+1];
1315 const char *ptr = s;
1317 /* Bug: warns about bad utf8 even if we are short on bytes
1318 and will break out of the loop */
1319 if (!S_utf8_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1322 len = UTF8SKIP(result);
1323 if (!S_utf8_to_bytes(aTHX_ &ptr, strend,
1324 (char *) &result[1], len-1, 'U')) break;
1325 auv = utf8n_to_uvchr(result, len, &retlen,
1326 UTF8_ALLOW_DEFAULT);
1329 auv = utf8n_to_uvchr((U8*)s, strend - s, &retlen,
1330 UTF8_ALLOW_DEFAULT);
1331 if (retlen == (STRLEN) -1)
1332 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1337 else if (checksum > bits_in_uv)
1338 cdouble += (NV) auv;
1343 case 's' | TYPE_IS_SHRIEKING:
1344 #if SHORTSIZE != SIZE16
1347 SHIFT_VAR(utf8, s, strend, ashort, datumtype, needs_swap);
1350 else if (checksum > bits_in_uv)
1351 cdouble += (NV)ashort;
1363 #if U16SIZE > SIZE16
1366 SHIFT16(utf8, s, strend, &ai16, datumtype, needs_swap);
1367 #if U16SIZE > SIZE16
1373 else if (checksum > bits_in_uv)
1374 cdouble += (NV)ai16;
1379 case 'S' | TYPE_IS_SHRIEKING:
1380 #if SHORTSIZE != SIZE16
1382 unsigned short aushort;
1383 SHIFT_VAR(utf8, s, strend, aushort, datumtype, needs_swap);
1386 else if (checksum > bits_in_uv)
1387 cdouble += (NV)aushort;
1400 #if U16SIZE > SIZE16
1403 SHIFT16(utf8, s, strend, &au16, datumtype, needs_swap);
1404 if (datumtype == 'n')
1405 au16 = PerlSock_ntohs(au16);
1406 if (datumtype == 'v')
1410 else if (checksum > bits_in_uv)
1411 cdouble += (NV) au16;
1416 case 'v' | TYPE_IS_SHRIEKING:
1417 case 'n' | TYPE_IS_SHRIEKING:
1420 # if U16SIZE > SIZE16
1423 SHIFT16(utf8, s, strend, &ai16, datumtype, needs_swap);
1424 /* There should never be any byte-swapping here. */
1425 assert(!TYPE_ENDIANNESS(datumtype));
1426 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
1427 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1428 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
1429 ai16 = (I16) vtohs((U16) ai16);
1432 else if (checksum > bits_in_uv)
1433 cdouble += (NV) ai16;
1439 case 'i' | TYPE_IS_SHRIEKING:
1442 SHIFT_VAR(utf8, s, strend, aint, datumtype, needs_swap);
1445 else if (checksum > bits_in_uv)
1446 cdouble += (NV)aint;
1452 case 'I' | TYPE_IS_SHRIEKING:
1455 SHIFT_VAR(utf8, s, strend, auint, datumtype, needs_swap);
1458 else if (checksum > bits_in_uv)
1459 cdouble += (NV)auint;
1467 SHIFT_VAR(utf8, s, strend, aiv, datumtype, needs_swap);
1470 else if (checksum > bits_in_uv)
1479 SHIFT_VAR(utf8, s, strend, auv, datumtype, needs_swap);
1482 else if (checksum > bits_in_uv)
1488 case 'l' | TYPE_IS_SHRIEKING:
1489 #if LONGSIZE != SIZE32
1492 SHIFT_VAR(utf8, s, strend, along, datumtype, needs_swap);
1495 else if (checksum > bits_in_uv)
1496 cdouble += (NV)along;
1507 #if U32SIZE > SIZE32
1510 SHIFT32(utf8, s, strend, &ai32, datumtype, needs_swap);
1511 #if U32SIZE > SIZE32
1512 if (ai32 > 2147483647) ai32 -= 4294967296;
1516 else if (checksum > bits_in_uv)
1517 cdouble += (NV)ai32;
1522 case 'L' | TYPE_IS_SHRIEKING:
1523 #if LONGSIZE != SIZE32
1525 unsigned long aulong;
1526 SHIFT_VAR(utf8, s, strend, aulong, datumtype, needs_swap);
1529 else if (checksum > bits_in_uv)
1530 cdouble += (NV)aulong;
1543 #if U32SIZE > SIZE32
1546 SHIFT32(utf8, s, strend, &au32, datumtype, needs_swap);
1547 if (datumtype == 'N')
1548 au32 = PerlSock_ntohl(au32);
1549 if (datumtype == 'V')
1553 else if (checksum > bits_in_uv)
1554 cdouble += (NV)au32;
1559 case 'V' | TYPE_IS_SHRIEKING:
1560 case 'N' | TYPE_IS_SHRIEKING:
1563 #if U32SIZE > SIZE32
1566 SHIFT32(utf8, s, strend, &ai32, datumtype, needs_swap);
1567 /* There should never be any byte swapping here. */
1568 assert(!TYPE_ENDIANNESS(datumtype));
1569 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1570 ai32 = (I32)PerlSock_ntohl((U32)ai32);
1571 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1572 ai32 = (I32)vtohl((U32)ai32);
1575 else if (checksum > bits_in_uv)
1576 cdouble += (NV)ai32;
1584 SHIFT_VAR(utf8, s, strend, aptr, datumtype, needs_swap);
1585 /* newSVpv generates undef if aptr is NULL */
1586 mPUSHs(newSVpv(aptr, 0));
1594 while (len > 0 && s < strend) {
1596 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1597 auv = (auv << 7) | (ch & 0x7f);
1598 /* UTF8_IS_XXXXX not right here because this is a BER, not
1599 * UTF-8 format - using constant 0x80 */
1607 if (++bytes >= sizeof(UV)) { /* promote to string */
1610 sv = Perl_newSVpvf(aTHX_ "%.*" UVuf,
1611 (int)TYPE_DIGITS(UV), auv);
1612 while (s < strend) {
1613 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1614 sv = mul128(sv, (U8)(ch & 0x7f));
1620 t = SvPV_nolen_const(sv);
1629 if ((s >= strend) && bytes)
1630 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
1634 if (symptr->howlen == e_star)
1635 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
1637 if (s + sizeof(char*) <= strend) {
1639 SHIFT_VAR(utf8, s, strend, aptr, datumtype, needs_swap);
1640 /* newSVpvn generates undef if aptr is NULL */
1641 PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP));
1644 #if defined(HAS_QUAD) && IVSIZE >= 8
1648 SHIFT_VAR(utf8, s, strend, aquad, datumtype, needs_swap);
1650 mPUSHs(newSViv((IV)aquad));
1651 else if (checksum > bits_in_uv)
1652 cdouble += (NV)aquad;
1660 SHIFT_VAR(utf8, s, strend, auquad, datumtype, needs_swap);
1662 mPUSHs(newSVuv((UV)auquad));
1663 else if (checksum > bits_in_uv)
1664 cdouble += (NV)auquad;
1670 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
1674 SHIFT_VAR(utf8, s, strend, afloat, datumtype, needs_swap);
1684 SHIFT_VAR(utf8, s, strend, adouble, datumtype, needs_swap);
1694 SHIFT_BYTES(utf8, s, strend, anv.bytes, sizeof(anv.bytes),
1695 datumtype, needs_swap);
1702 #if defined(HAS_LONG_DOUBLE)
1706 SHIFT_BYTES(utf8, s, strend, aldouble.bytes,
1707 sizeof(aldouble.bytes), datumtype, needs_swap);
1708 /* The most common long double format, the x86 80-bit
1709 * extended precision, has either 2 or 6 unused bytes,
1710 * which may contain garbage, which may contain
1711 * unintentional data. While we do zero the bytes of
1712 * the long double data in pack(), here in unpack() we
1713 * don't, because it's really hard to envision that
1714 * reading the long double off aldouble would be
1715 * affected by the unused bytes.
1717 * Note that trying to unpack 'long doubles' of 'long
1718 * doubles' packed in another system is in the general
1719 * case doomed without having more detail. */
1721 mPUSHn(aldouble.ld);
1723 cdouble += aldouble.ld;
1729 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
1730 sv = sv_2mortal(newSV(l));
1737 /* Note that all legal uuencoded strings are ASCII printables, so
1738 * have the same representation under UTF-8 vs not. This means we
1739 * can ignore UTF8ness on legal input. For illegal we stop at the
1740 * first failure, and don't report where/what that is, so again we
1741 * can ignore UTF8ness */
1743 while (s < strend && *s != ' ' && ISUUCHAR(*s)) {
1747 len = PL_uudmap[*(U8*)s++] & 077;
1749 if (s < strend && ISUUCHAR(*s))
1750 a = PL_uudmap[*(U8*)s++] & 077;
1753 if (s < strend && ISUUCHAR(*s))
1754 b = PL_uudmap[*(U8*)s++] & 077;
1757 if (s < strend && ISUUCHAR(*s))
1758 c = PL_uudmap[*(U8*)s++] & 077;
1761 if (s < strend && ISUUCHAR(*s))
1762 d = PL_uudmap[*(U8*)s++] & 077;
1765 hunk[0] = (char)((a << 2) | (b >> 4));
1766 hunk[1] = (char)((b << 4) | (c >> 2));
1767 hunk[2] = (char)((c << 6) | d);
1769 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
1774 else /* possible checksum byte */
1775 if (s + 1 < strend && s[1] == '\n')
1781 } /* End of switch */
1784 if (memCHRs("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
1785 (checksum > bits_in_uv &&
1786 memCHRs("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
1789 anv = (NV) (1 << (checksum & 15));
1790 while (checksum >= 16) {
1794 while (cdouble < 0.0)
1796 cdouble = Perl_modf(cdouble / anv, &trouble);
1797 #ifdef LONGDOUBLE_DOUBLEDOUBLE
1798 /* Workaround for powerpc doubledouble modfl bug:
1799 * close to 1.0L and -1.0L cdouble is 0, and trouble
1800 * is cdouble / anv. */
1801 if (trouble != Perl_ceil(trouble)) {
1803 if (cdouble > 1.0L) cdouble -= 1.0L;
1804 if (cdouble < -1.0L) cdouble += 1.0L;
1808 sv = newSVnv(cdouble);
1811 if (checksum < bits_in_uv) {
1812 UV mask = nBIT_MASK(checksum);
1821 if (symptr->flags & FLAG_SLASH){
1822 if (SP - PL_stack_base - start_sp_offset <= 0)
1824 if( next_symbol(symptr) ){
1825 if( symptr->howlen == e_number )
1826 Perl_croak(aTHX_ "Count after length/code in unpack" );
1828 /* ...end of char buffer then no decent length available */
1829 Perl_croak(aTHX_ "length/code after end of string in unpack" );
1831 /* take top of stack (hope it's numeric) */
1834 Perl_croak(aTHX_ "Negative '/' count in unpack" );
1837 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
1839 datumtype = symptr->code;
1840 explicit_length = FALSE;
1848 return SP - PL_stack_base - start_sp_offset;
1858 const char *pat = SvPV_const(left, llen);
1859 const char *s = SvPV_const(right, rlen);
1860 const char *strend = s + rlen;
1861 const char *patend = pat + llen;
1865 cnt = unpackstring(pat, patend, s, strend,
1866 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
1867 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
1870 if ( !cnt && gimme == G_SCALAR )
1871 PUSHs(&PL_sv_undef);
1876 doencodes(U8 *h, const U8 *s, SSize_t len)
1878 *h++ = PL_uuemap[len];
1880 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1881 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
1882 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
1883 *h++ = PL_uuemap[(077 & (s[2] & 077))];
1888 const U8 r = (len > 1 ? s[1] : '\0');
1889 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1890 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
1891 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
1892 *h++ = PL_uuemap[0];
1899 S_is_an_int(pTHX_ const char *s, STRLEN l)
1901 SV *result = newSVpvn(s, l);
1902 char *const result_c = SvPV_nolen(result); /* convenience */
1903 char *out = result_c;
1907 PERL_ARGS_ASSERT_IS_AN_INT;
1915 SvREFCNT_dec(result);
1938 SvREFCNT_dec(result);
1944 SvCUR_set(result, out - result_c);
1948 /* pnum must be '\0' terminated */
1950 S_div128(pTHX_ SV *pnum, bool *done)
1953 char * const s = SvPV(pnum, len);
1957 PERL_ARGS_ASSERT_DIV128;
1961 const int i = m * 10 + (*t - '0');
1962 const int r = (i >> 7); /* r < 10 */
1970 SvCUR_set(pnum, (STRLEN) (t - s));
1975 =for apidoc packlist
1977 The engine implementing C<pack()> Perl function.
1983 Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, SV **beglist, SV **endlist )
1987 PERL_ARGS_ASSERT_PACKLIST;
1989 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
1991 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
1992 Also make sure any UTF8 flag is loaded */
1993 SvPV_force_nolen(cat);
1995 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
1997 (void)pack_rec( cat, &sym, beglist, endlist );
2000 /* like sv_utf8_upgrade, but also repoint the group start markers */
2002 marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2005 const char *from_ptr, *from_start, *from_end, **marks, **m;
2006 char *to_start, *to_ptr;
2008 if (SvUTF8(sv)) return;
2010 from_start = SvPVX_const(sv);
2011 from_end = from_start + SvCUR(sv);
2012 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2013 if (!NATIVE_BYTE_IS_INVARIANT(*from_ptr)) break;
2014 if (from_ptr == from_end) {
2015 /* Simple case: no character needs to be changed */
2020 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
2021 Newx(to_start, len, char);
2022 Copy(from_start, to_start, from_ptr-from_start, char);
2023 to_ptr = to_start + (from_ptr-from_start);
2025 Newx(marks, sym_ptr->level+2, const char *);
2026 for (group=sym_ptr; group; group = group->previous)
2027 marks[group->level] = from_start + group->strbeg;
2028 marks[sym_ptr->level+1] = from_end+1;
2029 for (m = marks; *m < from_ptr; m++)
2030 *m = to_start + (*m-from_start);
2032 for (;from_ptr < from_end; from_ptr++) {
2033 while (*m == from_ptr) *m++ = to_ptr;
2034 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
2038 while (*m == from_ptr) *m++ = to_ptr;
2039 if (m != marks + sym_ptr->level+1) {
2042 Perl_croak(aTHX_ "panic: marks beyond string end, m=%p, marks=%p, "
2043 "level=%d", m, marks, sym_ptr->level);
2045 for (group=sym_ptr; group; group = group->previous)
2046 group->strbeg = marks[group->level] - to_start;
2051 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
2052 from_start -= SvIVX(sv);
2055 SvFLAGS(sv) &= ~SVf_OOK;
2058 Safefree(from_start);
2059 SvPV_set(sv, to_start);
2060 SvCUR_set(sv, to_ptr - to_start);
2065 /* Exponential string grower. Makes string extension effectively O(n)
2066 needed says how many extra bytes we need (not counting the final '\0')
2067 Only grows the string if there is an actual lack of space
2070 S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
2071 const STRLEN cur = SvCUR(sv);
2072 const STRLEN len = SvLEN(sv);
2075 PERL_ARGS_ASSERT_SV_EXP_GROW;
2077 if (len - cur > needed) return SvPVX(sv);
2078 extend = needed > len ? needed : len;
2079 return SvGROW(sv, len+extend+1);
2083 S_sv_check_infnan(pTHX_ SV *sv, I32 datumtype)
2086 if (UNLIKELY(SvAMAGIC(sv)))
2088 if (UNLIKELY(isinfnansv(sv))) {
2089 const I32 c = TYPE_NO_MODIFIERS(datumtype);
2090 const NV nv = SvNV_nomg(sv);
2092 Perl_croak(aTHX_ "Cannot compress %" NVgf " in pack", nv);
2094 Perl_croak(aTHX_ "Cannot pack %" NVgf " with '%c'", nv, (int) c);
2099 #define SvIV_no_inf(sv,d) \
2100 ((sv) = S_sv_check_infnan(aTHX_ sv,d), SvIV_nomg(sv))
2101 #define SvUV_no_inf(sv,d) \
2102 ((sv) = S_sv_check_infnan(aTHX_ sv,d), SvUV_nomg(sv))
2106 S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
2108 tempsym_t lookahead;
2109 SSize_t items = endlist - beglist;
2110 bool found = next_symbol(symptr);
2111 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
2112 bool warn_utf8 = ckWARN(WARN_UTF8);
2115 PERL_ARGS_ASSERT_PACK_REC;
2117 if (symptr->level == 0 && found && symptr->code == 'U') {
2118 marked_upgrade(aTHX_ cat, symptr);
2119 symptr->flags |= FLAG_DO_UTF8;
2122 symptr->strbeg = SvCUR(cat);
2128 SV *lengthcode = NULL;
2129 I32 datumtype = symptr->code;
2130 howlen_t howlen = symptr->howlen;
2131 char *start = SvPVX(cat);
2132 char *cur = start + SvCUR(cat);
2135 #define NEXTFROM (lengthcode ? lengthcode : items > 0 ? (--items, *beglist++) : &PL_sv_no)
2136 #define PEEKFROM (lengthcode ? lengthcode : items > 0 ? *beglist : &PL_sv_no)
2140 len = memCHRs("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2144 /* e_no_len and e_number */
2145 len = symptr->length;
2150 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
2152 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2153 /* We can process this letter. */
2154 STRLEN size = props & PACK_SIZE_MASK;
2155 GROWING2(utf8, cat, start, cur, size, (STRLEN)len);
2159 /* Look ahead for next symbol. Do we have code/code? */
2160 lookahead = *symptr;
2161 found = next_symbol(&lookahead);
2162 if (symptr->flags & FLAG_SLASH) {
2164 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
2165 if (memCHRs("aAZ", lookahead.code)) {
2166 if (lookahead.howlen == e_number) count = lookahead.length;
2169 count = sv_len_utf8(*beglist);
2172 if (lookahead.code == 'Z') count++;
2175 if (lookahead.howlen == e_number && lookahead.length < items)
2176 count = lookahead.length;
2179 lookahead.howlen = e_number;
2180 lookahead.length = count;
2181 lengthcode = sv_2mortal(newSViv(count));
2184 needs_swap = NEEDS_SWAP(datumtype);
2186 /* Code inside the switch must take care to properly update
2187 cat (CUR length and '\0' termination) if it updated *cur and
2188 doesn't simply leave using break */
2189 switch (TYPE_NO_ENDIANNESS(datumtype)) {
2191 /* diag_listed_as: Invalid type '%s' in %s */
2192 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2193 (int) TYPE_NO_MODIFIERS(datumtype));
2195 Perl_croak(aTHX_ "'%%' may not be used in pack");
2197 case '.' | TYPE_IS_SHRIEKING:
2199 if (howlen == e_star) from = start;
2200 else if (len == 0) from = cur;
2202 tempsym_t *group = symptr;
2204 while (--len && group) group = group->previous;
2205 from = group ? start + group->strbeg : start;
2208 len = SvIV_no_inf(fromstr, datumtype);
2210 case '@' | TYPE_IS_SHRIEKING:
2212 from = start + symptr->strbeg;
2214 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
2216 while (len && from < cur) {
2217 from += UTF8SKIP(from);
2221 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2223 /* Here we know from == cur */
2225 GROWING(0, cat, start, cur, len);
2226 Zero(cur, len, char);
2228 } else if (from < cur) {
2231 } else goto no_change;
2239 if (len > 0) goto grow;
2240 if (len == 0) goto no_change;
2247 tempsym_t savsym = *symptr;
2248 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2249 symptr->flags |= group_modifiers;
2250 symptr->patend = savsym.grpend;
2252 symptr->previous = &lookahead;
2255 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2256 else symptr->flags &= ~FLAG_PARSE_UTF8;
2257 was_utf8 = SvUTF8(cat);
2258 symptr->patptr = savsym.grpbeg;
2259 beglist = pack_rec(cat, symptr, beglist, endlist);
2260 if (SvUTF8(cat) != was_utf8)
2261 /* This had better be an upgrade while in utf8==0 mode */
2264 if (savsym.howlen == e_star && beglist == endlist)
2265 break; /* No way to continue */
2267 items = endlist - beglist;
2268 lookahead.flags = symptr->flags & ~group_modifiers;
2271 case 'X' | TYPE_IS_SHRIEKING:
2272 if (!len) /* Avoid division by 0 */
2279 hop += UTF8SKIP(hop);
2286 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2290 len = (cur-start) % len;
2294 if (len < 1) goto no_change;
2298 Perl_croak(aTHX_ "'%c' outside of string in pack",
2299 (int) TYPE_NO_MODIFIERS(datumtype));
2300 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2302 Perl_croak(aTHX_ "'%c' outside of string in pack",
2303 (int) TYPE_NO_MODIFIERS(datumtype));
2309 if (cur - start < len)
2310 Perl_croak(aTHX_ "'%c' outside of string in pack",
2311 (int) TYPE_NO_MODIFIERS(datumtype));
2314 if (cur < start+symptr->strbeg) {
2315 /* Make sure group starts don't point into the void */
2317 const STRLEN length = cur-start;
2318 for (group = symptr;
2319 group && length < group->strbeg;
2320 group = group->previous) group->strbeg = length;
2321 lookahead.strbeg = length;
2324 case 'x' | TYPE_IS_SHRIEKING: {
2326 if (!len) /* Avoid division by 0 */
2328 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
2329 else ai32 = (cur - start) % len;
2330 if (ai32 == 0) goto no_change;
2342 aptr = SvPV_const(fromstr, fromlen);
2343 if (DO_UTF8(fromstr)) {
2344 const char *end, *s;
2346 if (!utf8 && !SvUTF8(cat)) {
2347 marked_upgrade(aTHX_ cat, symptr);
2348 lookahead.flags |= FLAG_DO_UTF8;
2349 lookahead.strbeg = symptr->strbeg;
2352 cur = start + SvCUR(cat);
2354 if (howlen == e_star) {
2355 if (utf8) goto string_copy;
2359 end = aptr + fromlen;
2360 fromlen = datumtype == 'Z' ? len-1 : len;
2361 while ((SSize_t) fromlen > 0 && s < end) {
2366 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2369 if (datumtype == 'Z') len++;
2375 fromlen = len - fromlen;
2376 if (datumtype == 'Z') fromlen--;
2377 if (howlen == e_star) {
2379 if (datumtype == 'Z') len++;
2381 GROWING(0, cat, start, cur, len);
2382 if (!S_utf8_to_bytes(aTHX_ &aptr, end, cur, fromlen,
2383 datumtype | TYPE_IS_PACK))
2384 Perl_croak(aTHX_ "panic: predicted utf8 length not available, "
2385 "for '%c', aptr=%p end=%p cur=%p, fromlen=%zu",
2386 (int)datumtype, aptr, end, cur, fromlen);
2390 if (howlen == e_star) {
2392 if (datumtype == 'Z') len++;
2394 if (len <= (SSize_t) fromlen) {
2396 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2398 /* assumes a byte expands to at most UTF8_EXPAND bytes on
2400 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2401 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
2403 while (fromlen > 0) {
2404 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
2410 if (howlen == e_star) {
2412 if (datumtype == 'Z') len++;
2414 if (len <= (SSize_t) fromlen) {
2416 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2418 GROWING(0, cat, start, cur, len);
2419 Copy(aptr, cur, fromlen, char);
2423 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2430 const char *str, *end;
2431 SSize_t l, field_len;
2437 str = SvPV_const(fromstr, fromlen);
2438 end = str + fromlen;
2439 if (DO_UTF8(fromstr)) {
2441 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2443 utf8_source = FALSE;
2444 utf8_flags = 0; /* Unused, but keep compilers happy */
2446 if (howlen == e_star) len = fromlen;
2447 field_len = (len+7)/8;
2448 GROWING(utf8, cat, start, cur, field_len);
2449 if (len > (SSize_t)fromlen) len = fromlen;
2452 if (datumtype == 'B')
2456 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2458 } else bits |= *str++ & 1;
2459 if (l & 7) bits <<= 1;
2461 PUSH_BYTE(utf8, cur, bits);
2466 /* datumtype == 'b' */
2470 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2471 if (val & 1) bits |= 0x80;
2472 } else if (*str++ & 1)
2474 if (l & 7) bits >>= 1;
2476 PUSH_BYTE(utf8, cur, bits);
2482 if (datumtype == 'B')
2483 bits <<= 7 - (l & 7);
2485 bits >>= 7 - (l & 7);
2486 PUSH_BYTE(utf8, cur, bits);
2489 /* Determine how many chars are left in the requested field */
2491 if (howlen == e_star) field_len = 0;
2492 else field_len -= l;
2493 Zero(cur, field_len, char);
2499 const char *str, *end;
2500 SSize_t l, field_len;
2506 str = SvPV_const(fromstr, fromlen);
2507 end = str + fromlen;
2508 if (DO_UTF8(fromstr)) {
2510 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2512 utf8_source = FALSE;
2513 utf8_flags = 0; /* Unused, but keep compilers happy */
2515 if (howlen == e_star) len = fromlen;
2516 field_len = (len+1)/2;
2517 GROWING(utf8, cat, start, cur, field_len);
2518 if (!utf8_source && len > (SSize_t)fromlen) len = fromlen;
2521 if (datumtype == 'H')
2525 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2526 if (val < 256 && isALPHA(val))
2527 bits |= (val + 9) & 0xf;
2530 } else if (isALPHA(*str))
2531 bits |= (*str++ + 9) & 0xf;
2533 bits |= *str++ & 0xf;
2534 if (l & 1) bits <<= 4;
2536 PUSH_BYTE(utf8, cur, bits);
2544 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2545 if (val < 256 && isALPHA(val))
2546 bits |= ((val + 9) & 0xf) << 4;
2548 bits |= (val & 0xf) << 4;
2549 } else if (isALPHA(*str))
2550 bits |= ((*str++ + 9) & 0xf) << 4;
2552 bits |= (*str++ & 0xf) << 4;
2553 if (l & 1) bits >>= 4;
2555 PUSH_BYTE(utf8, cur, bits);
2561 PUSH_BYTE(utf8, cur, bits);
2564 /* Determine how many chars are left in the requested field */
2566 if (howlen == e_star) field_len = 0;
2567 else field_len -= l;
2568 Zero(cur, field_len, char);
2576 aiv = SvIV_no_inf(fromstr, datumtype);
2577 if ((-128 > aiv || aiv > 127))
2578 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2579 "Character in 'c' format wrapped in pack");
2580 PUSH_BYTE(utf8, cur, (U8)aiv);
2585 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2591 aiv = SvIV_no_inf(fromstr, datumtype);
2592 if ((0 > aiv || aiv > 0xff))
2593 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2594 "Character in 'C' format wrapped in pack");
2595 PUSH_BYTE(utf8, cur, (U8)aiv);
2600 U8 in_bytes = (U8)IN_BYTES;
2602 end = start+SvLEN(cat)-1;
2603 if (utf8) end -= UTF8_MAXLEN-1;
2607 auv = SvUV_no_inf(fromstr, datumtype);
2608 if (in_bytes) auv = auv % 0x100;
2613 SvCUR_set(cat, cur - start);
2615 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2616 end = start+SvLEN(cat)-UTF8_MAXLEN;
2618 cur = (char *) uvchr_to_utf8_flags((U8 *) cur, auv, 0);
2623 SvCUR_set(cat, cur - start);
2624 marked_upgrade(aTHX_ cat, symptr);
2625 lookahead.flags |= FLAG_DO_UTF8;
2626 lookahead.strbeg = symptr->strbeg;
2629 cur = start + SvCUR(cat);
2630 end = start+SvLEN(cat)-UTF8_MAXLEN;
2633 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2634 "Character in 'W' format wrapped in pack");
2639 SvCUR_set(cat, cur - start);
2640 GROWING(0, cat, start, cur, len+1);
2641 end = start+SvLEN(cat)-1;
2643 *(U8 *) cur++ = (U8)auv;
2652 if (!(symptr->flags & FLAG_DO_UTF8)) {
2653 marked_upgrade(aTHX_ cat, symptr);
2654 lookahead.flags |= FLAG_DO_UTF8;
2655 lookahead.strbeg = symptr->strbeg;
2661 end = start+SvLEN(cat);
2662 if (!utf8) end -= UTF8_MAXLEN;
2666 auv = SvUV_no_inf(fromstr, datumtype);
2668 U8 buffer[UTF8_MAXLEN+1], *endb;
2669 endb = uvchr_to_utf8_flags(buffer, auv, 0);
2670 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
2672 SvCUR_set(cat, cur - start);
2673 GROWING(0, cat, start, cur,
2674 len+(endb-buffer)*UTF8_EXPAND);
2675 end = start+SvLEN(cat);
2677 cur = my_bytes_to_utf8(buffer, endb-buffer, cur, 0);
2681 SvCUR_set(cat, cur - start);
2682 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2683 end = start+SvLEN(cat)-UTF8_MAXLEN;
2685 cur = (char *) uvchr_to_utf8_flags((U8 *) cur, auv, 0);
2690 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
2696 anv = SvNV(fromstr);
2697 # if (defined(VMS) && !defined(_IEEE_FP)) || defined(DOUBLE_IS_VAX_FLOAT)
2698 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
2699 * on Alpha; fake it if we don't have them.
2703 else if (anv < -FLT_MAX)
2705 else afloat = (float)anv;
2707 # if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
2709 afloat = (float)NV_NAN;
2713 /* a simple cast to float is undefined if outside
2714 * the range of values that can be represented */
2715 afloat = (float)(anv > FLT_MAX ? NV_INF :
2716 anv < -FLT_MAX ? -NV_INF : anv);
2719 PUSH_VAR(utf8, cur, afloat, needs_swap);
2727 anv = SvNV(fromstr);
2728 # if (defined(VMS) && !defined(_IEEE_FP)) || defined(DOUBLE_IS_VAX_FLOAT)
2729 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
2730 * on Alpha; fake it if we don't have them.
2734 else if (anv < -DBL_MAX)
2736 else adouble = (double)anv;
2738 adouble = (double)anv;
2740 PUSH_VAR(utf8, cur, adouble, needs_swap);
2745 Zero(&anv, 1, NV); /* can be long double with unused bits */
2749 /* to work round a gcc/x86 bug; don't use SvNV */
2750 anv.nv = sv_2nv(fromstr);
2751 # if defined(LONGDOUBLE_X86_80_BIT) && defined(USE_LONG_DOUBLE) \
2752 && LONG_DOUBLESIZE > 10
2753 /* GCC sometimes overwrites the padding in the
2755 Zero(anv.bytes+10, sizeof(anv.bytes) - 10, U8);
2758 anv.nv = SvNV(fromstr);
2760 PUSH_BYTES(utf8, cur, anv.bytes, sizeof(anv.bytes), needs_swap);
2764 #if defined(HAS_LONG_DOUBLE)
2767 /* long doubles can have unused bits, which may be nonzero */
2768 Zero(&aldouble, 1, long double);
2772 /* to work round a gcc/x86 bug; don't use SvNV */
2773 aldouble.ld = (long double)sv_2nv(fromstr);
2774 # if defined(LONGDOUBLE_X86_80_BIT) && LONG_DOUBLESIZE > 10
2775 /* GCC sometimes overwrites the padding in the
2777 Zero(aldouble.bytes+10, sizeof(aldouble.bytes) - 10, U8);
2780 aldouble.ld = (long double)SvNV(fromstr);
2782 PUSH_BYTES(utf8, cur, aldouble.bytes, sizeof(aldouble.bytes),
2788 case 'n' | TYPE_IS_SHRIEKING:
2793 ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
2794 ai16 = PerlSock_htons(ai16);
2795 PUSH16(utf8, cur, &ai16, FALSE);
2798 case 'v' | TYPE_IS_SHRIEKING:
2803 ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
2805 PUSH16(utf8, cur, &ai16, FALSE);
2808 case 'S' | TYPE_IS_SHRIEKING:
2809 #if SHORTSIZE != SIZE16
2811 unsigned short aushort;
2813 aushort = SvUV_no_inf(fromstr, datumtype);
2814 PUSH_VAR(utf8, cur, aushort, needs_swap);
2824 au16 = (U16)SvUV_no_inf(fromstr, datumtype);
2825 PUSH16(utf8, cur, &au16, needs_swap);
2828 case 's' | TYPE_IS_SHRIEKING:
2829 #if SHORTSIZE != SIZE16
2833 ashort = SvIV_no_inf(fromstr, datumtype);
2834 PUSH_VAR(utf8, cur, ashort, needs_swap);
2844 ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
2845 PUSH16(utf8, cur, &ai16, needs_swap);
2849 case 'I' | TYPE_IS_SHRIEKING:
2853 auint = SvUV_no_inf(fromstr, datumtype);
2854 PUSH_VAR(utf8, cur, auint, needs_swap);
2861 aiv = SvIV_no_inf(fromstr, datumtype);
2862 PUSH_VAR(utf8, cur, aiv, needs_swap);
2869 auv = SvUV_no_inf(fromstr, datumtype);
2870 PUSH_VAR(utf8, cur, auv, needs_swap);
2877 S_sv_check_infnan(aTHX_ fromstr, datumtype);
2878 anv = SvNV_nomg(fromstr);
2882 SvCUR_set(cat, cur - start);
2883 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
2886 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
2887 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
2888 any negative IVs will have already been got by the croak()
2889 above. IOK is untrue for fractions, so we test them
2890 against UV_MAX_P1. */
2891 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
2892 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
2893 char *in = buf + sizeof(buf);
2894 UV auv = SvUV_nomg(fromstr);
2897 *--in = (char)((auv & 0x7f) | 0x80);
2900 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
2901 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2902 in, (buf + sizeof(buf)) - in);
2903 } else if (SvPOKp(fromstr))
2905 else if (SvNOKp(fromstr)) {
2906 /* 10**NV_MAX_10_EXP is the largest power of 10
2907 so 10**(NV_MAX_10_EXP+1) is definitely unrepresentable
2908 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
2909 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
2910 And with that many bytes only Inf can overflow.
2911 Some C compilers are strict about integral constant
2912 expressions so we conservatively divide by a slightly
2913 smaller integer instead of multiplying by the exact
2914 floating-point value.
2916 #ifdef NV_MAX_10_EXP
2917 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
2918 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
2920 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
2921 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
2923 char *in = buf + sizeof(buf);
2925 anv = Perl_floor(anv);
2927 const NV next = Perl_floor(anv / 128);
2928 if (in <= buf) /* this cannot happen ;-) */
2929 Perl_croak(aTHX_ "Cannot compress integer in pack");
2930 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
2933 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
2934 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2935 in, (buf + sizeof(buf)) - in);
2944 /* Copy string and check for compliance */
2945 from = SvPV_nomg_const(fromstr, len);
2946 if ((norm = is_an_int(from, len)) == NULL)
2947 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
2949 Newx(result, len, char);
2952 while (!done) *--in = div128(norm, &done) | 0x80;
2953 result[len - 1] &= 0x7F; /* clear continue bit */
2954 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2955 in, (result + len) - in);
2957 SvREFCNT_dec(norm); /* free norm */
2962 case 'i' | TYPE_IS_SHRIEKING:
2966 aint = SvIV_no_inf(fromstr, datumtype);
2967 PUSH_VAR(utf8, cur, aint, needs_swap);
2970 case 'N' | TYPE_IS_SHRIEKING:
2975 au32 = SvUV_no_inf(fromstr, datumtype);
2976 au32 = PerlSock_htonl(au32);
2977 PUSH32(utf8, cur, &au32, FALSE);
2980 case 'V' | TYPE_IS_SHRIEKING:
2985 au32 = SvUV_no_inf(fromstr, datumtype);
2987 PUSH32(utf8, cur, &au32, FALSE);
2990 case 'L' | TYPE_IS_SHRIEKING:
2991 #if LONGSIZE != SIZE32
2993 unsigned long aulong;
2995 aulong = SvUV_no_inf(fromstr, datumtype);
2996 PUSH_VAR(utf8, cur, aulong, needs_swap);
3006 au32 = SvUV_no_inf(fromstr, datumtype);
3007 PUSH32(utf8, cur, &au32, needs_swap);
3010 case 'l' | TYPE_IS_SHRIEKING:
3011 #if LONGSIZE != SIZE32
3015 along = SvIV_no_inf(fromstr, datumtype);
3016 PUSH_VAR(utf8, cur, along, needs_swap);
3026 ai32 = SvIV_no_inf(fromstr, datumtype);
3027 PUSH32(utf8, cur, &ai32, needs_swap);
3030 #if defined(HAS_QUAD) && IVSIZE >= 8
3035 auquad = (Uquad_t) SvUV_no_inf(fromstr, datumtype);
3036 PUSH_VAR(utf8, cur, auquad, needs_swap);
3043 aquad = (Quad_t)SvIV_no_inf(fromstr, datumtype);
3044 PUSH_VAR(utf8, cur, aquad, needs_swap);
3049 len = 1; /* assume SV is correct length */
3050 GROWING(utf8, cat, start, cur, sizeof(char *));
3057 SvGETMAGIC(fromstr);
3058 if (!SvOK(fromstr)) aptr = NULL;
3060 /* XXX better yet, could spirit away the string to
3061 * a safe spot and hang on to it until the result
3062 * of pack() (and all copies of the result) are
3065 if (((SvTEMP(fromstr) && SvREFCNT(fromstr) == 1)
3066 || (SvPADTMP(fromstr) &&
3067 !SvREADONLY(fromstr)))) {
3068 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3069 "Attempt to pack pointer to temporary value");
3071 if (SvPOK(fromstr) || SvNIOK(fromstr))
3072 aptr = SvPV_nomg_const_nolen(fromstr);
3074 aptr = SvPV_force_flags_nolen(fromstr, 0);
3076 PUSH_VAR(utf8, cur, aptr, needs_swap);
3080 const char *aptr, *aend;
3084 if (len <= 2) len = 45;
3085 else len = len / 3 * 3;
3087 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3088 "Field too wide in 'u' format in pack");
3091 aptr = SvPV_const(fromstr, fromlen);
3092 from_utf8 = DO_UTF8(fromstr);
3094 aend = aptr + fromlen;
3095 fromlen = sv_len_utf8_nomg(fromstr);
3096 } else aend = NULL; /* Unused, but keep compilers happy */
3097 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
3098 while (fromlen > 0) {
3101 U8 hunk[1+63/3*4+1];
3103 if ((SSize_t)fromlen > len)
3109 if (!S_utf8_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3110 'u' | TYPE_IS_PACK)) {
3112 SvCUR_set(cat, cur - start);
3113 Perl_croak(aTHX_ "panic: string is shorter than advertised, "
3114 "aptr=%p, aend=%p, buffer=%p, todo=%zd",
3115 aptr, aend, buffer, todo);
3117 end = doencodes(hunk, (const U8 *)buffer, todo);
3119 end = doencodes(hunk, (const U8 *)aptr, todo);
3122 PUSH_BYTES(utf8, cur, hunk, end-hunk, 0);
3129 SvCUR_set(cat, cur - start);
3131 *symptr = lookahead;
3140 dSP; dMARK; dORIGMARK; dTARGET;
3143 SV *pat_sv = *++MARK;
3144 const char *pat = SvPV_const(pat_sv, fromlen);
3145 const char *patend = pat + fromlen;
3151 packlist(cat, pat, patend, MARK, SP + 1);
3155 const char * result = SvPV_nomg(cat, result_len);
3156 const U8 * error_pos;
3158 if (! is_utf8_string_loc((U8 *) result, result_len, &error_pos)) {
3159 _force_out_malformed_utf8_message(error_pos,
3160 (U8 *) result + result_len,
3164 NOT_REACHED; /* NOTREACHED */
3175 * ex: set ts=8 sts=4 sw=4 et: