3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
18 * [p.653 of _The Lord of the Rings_, IV/iv: "Of Herbs and Stewed Rabbit"]
21 /* This file contains pp ("push/pop") functions that
22 * execute the opcodes that make up a perl program. A typical pp function
23 * expects to find its arguments on the stack, and usually pushes its
24 * results onto the stack, hence the 'pp' terminology. Each OP structure
25 * contains a pointer to the relevant pp_foo() function.
27 * This particular file just contains pp_pack() and pp_unpack(). See the
28 * other pp*.c files for the rest of the pp_ functions.
32 #define PERL_IN_PP_PACK_C
35 /* Types used by pack/unpack */
37 e_no_len, /* no length */
38 e_number, /* number, [] */
42 typedef struct tempsym {
43 const char* patptr; /* current template char */
44 const char* patend; /* one after last char */
45 const char* grpbeg; /* 1st char of ()-group */
46 const char* grpend; /* end of ()-group */
47 I32 code; /* template code (!<>) */
48 U32 flags; /* /=4, comma=2, pack=1 */
49 /* and group modifiers */
50 SSize_t length; /* length/repeat count */
51 howlen_t howlen; /* how length is given */
52 int level; /* () nesting level */
53 STRLEN strbeg; /* offset of group start */
54 struct tempsym *previous; /* previous group */
57 #define TEMPSYM_INIT(symptr, p, e, f) \
59 (symptr)->patptr = (p); \
60 (symptr)->patend = (e); \
61 (symptr)->grpbeg = NULL; \
62 (symptr)->grpend = NULL; \
63 (symptr)->grpend = NULL; \
65 (symptr)->length = 0; \
66 (symptr)->howlen = e_no_len; \
67 (symptr)->level = 0; \
68 (symptr)->flags = (f); \
69 (symptr)->strbeg = 0; \
70 (symptr)->previous = NULL; \
78 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
81 U8 bytes[sizeof(long double)];
88 /* Maximum number of bytes to which a byte can grow due to upgrade */
92 * Offset for integer pack/unpack.
94 * On architectures where I16 and I32 aren't really 16 and 32 bits,
95 * which for now are all Crays, pack and unpack have to play games.
99 * These values are required for portability of pack() output.
100 * If they're not right on your machine, then pack() and unpack()
101 * wouldn't work right anyway; you'll need to apply the Cray hack.
102 * (I'd like to check them with #if, but you can't use sizeof() in
103 * the preprocessor.) --???
106 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
107 defines are now in config.h. --Andy Dougherty April 1998
112 /* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
115 #if U16SIZE <= SIZE16 && U32SIZE <= SIZE32
116 # define OFF16(p) ((char *) (p))
117 # define OFF32(p) ((char *) (p))
118 #elif BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
119 # define OFF16(p) ((char*)(p))
120 # define OFF32(p) ((char*)(p))
121 #elif BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
122 # define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
123 # define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
125 # error "bad cray byte order"
128 #define PUSH16(utf8, cur, p, needs_swap) \
129 PUSH_BYTES(utf8, cur, OFF16(p), SIZE16, needs_swap)
130 #define PUSH32(utf8, cur, p, needs_swap) \
131 PUSH_BYTES(utf8, cur, OFF32(p), SIZE32, needs_swap)
133 #if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
134 # define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_LITTLE_ENDIAN)
135 #elif BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
136 # define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_BIG_ENDIAN)
138 # error "Unsupported byteorder"
139 /* Need to add code here to re-instate mixed endian support.
140 NEEDS_SWAP would need to hold a flag indicating which action to
141 take, and S_reverse_copy and the code in S_utf8_to_bytes would need
142 logic adding to deal with any mixed-endian transformations needed.
146 /* Only to be used inside a loop (see the break) */
147 #define SHIFT_BYTES(utf8, s, strend, buf, len, datumtype, needs_swap) \
149 if (UNLIKELY(utf8)) { \
150 if (!S_utf8_to_bytes(aTHX_ &s, strend, \
151 (char *) (buf), len, datumtype)) break; \
153 if (UNLIKELY(needs_swap)) \
154 S_reverse_copy(s, (char *) (buf), len); \
156 Copy(s, (char *) (buf), len, char); \
161 #define SHIFT16(utf8, s, strend, p, datumtype, needs_swap) \
162 SHIFT_BYTES(utf8, s, strend, OFF16(p), SIZE16, datumtype, needs_swap)
164 #define SHIFT32(utf8, s, strend, p, datumtype, needs_swap) \
165 SHIFT_BYTES(utf8, s, strend, OFF32(p), SIZE32, datumtype, needs_swap)
167 #define SHIFT_VAR(utf8, s, strend, var, datumtype, needs_swap) \
168 SHIFT_BYTES(utf8, s, strend, &(var), sizeof(var), datumtype, needs_swap)
170 #define PUSH_VAR(utf8, aptr, var, needs_swap) \
171 PUSH_BYTES(utf8, aptr, &(var), sizeof(var), needs_swap)
173 /* Avoid stack overflow due to pathological templates. 100 should be plenty. */
174 #define MAX_SUB_TEMPLATE_LEVEL 100
176 /* flags (note that type modifiers can also be used as flags!) */
177 #define FLAG_WAS_UTF8 0x40
178 #define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
179 #define FLAG_UNPACK_ONLY_ONE 0x10
180 #define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
181 #define FLAG_SLASH 0x04
182 #define FLAG_COMMA 0x02
183 #define FLAG_PACK 0x01
186 S_mul128(pTHX_ SV *sv, U8 m)
189 char *s = SvPV(sv, len);
192 PERL_ARGS_ASSERT_MUL128;
194 if (! memBEGINs(s, len, "0000")) { /* need to grow sv */
195 SV * const tmpNew = newSVpvs("0000000000");
197 sv_catsv(tmpNew, sv);
198 SvREFCNT_dec(sv); /* free old sv */
203 while (!*t) /* trailing '\0'? */
206 const U32 i = ((*t - '0') << 7) + m;
207 *(t--) = '0' + (char)(i % 10);
213 /* Explosives and implosives. */
215 #define ISUUCHAR(ch) (NATIVE_TO_LATIN1(ch) >= NATIVE_TO_LATIN1(' ') \
216 && NATIVE_TO_LATIN1(ch) < NATIVE_TO_LATIN1('a'))
219 #define TYPE_IS_SHRIEKING 0x100
220 #define TYPE_IS_BIG_ENDIAN 0x200
221 #define TYPE_IS_LITTLE_ENDIAN 0x400
222 #define TYPE_IS_PACK 0x800
223 #define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
224 #define TYPE_MODIFIERS(t) ((t) & ~0xFF)
225 #define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
227 # define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
228 # define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
230 # define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
232 #define PACK_SIZE_CANNOT_CSUM 0x80
233 #define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
234 #define PACK_SIZE_MASK 0x3F
236 #include "packsizetables.inc"
239 S_reverse_copy(const char *src, char *dest, STRLEN len)
247 utf8_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
255 val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
256 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
257 if (retlen == (STRLEN) -1)
259 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
260 (int) TYPE_NO_MODIFIERS(datumtype));
262 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
263 "Character in '%c' format wrapped in unpack",
264 (int) TYPE_NO_MODIFIERS(datumtype));
271 #define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
272 utf8_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
276 S_utf8_to_bytes(pTHX_ const char **s, const char *end, const char *buf, SSize_t buf_len, I32 datumtype)
280 const char *from = *s;
282 const U32 flags = ckWARN(WARN_UTF8) ?
283 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
284 const bool needs_swap = NEEDS_SWAP(datumtype);
286 if (UNLIKELY(needs_swap))
289 for (;buf_len > 0; buf_len--) {
290 if (from >= end) return FALSE;
291 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
292 if (retlen == (STRLEN) -1) {
293 from += UTF8_SAFE_SKIP(from, end);
295 } else from += retlen;
300 if (UNLIKELY(needs_swap))
301 *(U8 *)--buf = (U8)val;
303 *(U8 *)buf++ = (U8)val;
305 /* We have enough characters for the buffer. Did we have problems ? */
308 /* Rewalk the string fragment while warning */
310 const U32 flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
311 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
312 if (ptr >= end) break;
313 utf8n_to_uvchr((U8 *) ptr, end-ptr, &retlen, flags);
315 if (from > end) from = end;
318 Perl_ck_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
319 WARN_PACK : WARN_UNPACK),
320 "Character(s) in '%c' format wrapped in %s",
321 (int) TYPE_NO_MODIFIERS(datumtype),
322 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
329 S_my_bytes_to_utf8(const U8 *start, STRLEN len, char *dest, const bool needs_swap) {
330 PERL_ARGS_ASSERT_MY_BYTES_TO_UTF8;
332 if (UNLIKELY(needs_swap)) {
333 const U8 *p = start + len;
334 while (p-- > start) {
335 append_utf8_from_native_byte(*p, (U8 **) & dest);
338 const U8 * const end = start + len;
339 while (start < end) {
340 append_utf8_from_native_byte(*start, (U8 **) & dest);
347 #define PUSH_BYTES(utf8, cur, buf, len, needs_swap) \
349 if (UNLIKELY(utf8)) \
350 (cur) = my_bytes_to_utf8((U8 *) buf, len, (cur), needs_swap); \
352 if (UNLIKELY(needs_swap)) \
353 S_reverse_copy((char *)(buf), cur, len); \
355 Copy(buf, cur, len, char); \
360 #define SAFE_UTF8_EXPAND(var) \
362 if ((var) > SSize_t_MAX / UTF8_EXPAND) \
363 Perl_croak(aTHX_ "%s", "Out of memory during pack()"); \
364 (var) = (var) * UTF8_EXPAND; \
367 #define GROWING2(utf8, cat, start, cur, item_size, item_count) \
369 if (SSize_t_MAX / (item_size) < (item_count)) \
370 Perl_croak(aTHX_ "%s", "Out of memory during pack()"); \
371 GROWING((utf8), (cat), (start), (cur), (item_size) * (item_count)); \
374 #define GROWING(utf8, cat, start, cur, in_len) \
376 STRLEN glen = (in_len); \
377 STRLEN catcur = (STRLEN)((cur) - (start)); \
378 if (utf8) SAFE_UTF8_EXPAND(glen); \
379 if (SSize_t_MAX - glen < catcur) \
380 Perl_croak(aTHX_ "%s", "Out of memory during pack()"); \
381 if (catcur + glen >= SvLEN(cat)) { \
382 (start) = sv_exp_grow(cat, glen); \
383 (cur) = (start) + SvCUR(cat); \
387 #define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
389 const STRLEN glen = (in_len); \
391 if (utf8) SAFE_UTF8_EXPAND(gl); \
392 if ((cur) + gl >= (start) + SvLEN(cat)) { \
394 SvCUR_set((cat), (cur) - (start)); \
395 (start) = sv_exp_grow(cat, gl); \
396 (cur) = (start) + SvCUR(cat); \
398 PUSH_BYTES(utf8, cur, buf, glen, 0); \
401 #define PUSH_BYTE(utf8, s, byte) \
404 const U8 au8 = (byte); \
405 (s) = my_bytes_to_utf8(&au8, 1, (s), 0);\
406 } else *(U8 *)(s)++ = (byte); \
409 /* Only to be used inside a loop (see the break) */
410 #define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
413 if (str >= end) break; \
414 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
415 if (retlen == (STRLEN) -1) { \
417 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
422 static const char *_action( const tempsym_t* symptr )
424 return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
427 /* Returns the sizeof() struct described by pat */
429 S_measure_struct(pTHX_ tempsym_t* symptr)
433 PERL_ARGS_ASSERT_MEASURE_STRUCT;
435 while (next_symbol(symptr)) {
438 switch (symptr->howlen) {
440 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
444 /* e_no_len and e_number */
445 len = symptr->length;
449 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
452 /* endianness doesn't influence the size of a type */
453 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
455 Perl_croak(aTHX_ "Invalid type '%c' in %s",
456 (int)TYPE_NO_MODIFIERS(symptr->code),
458 case '.' | TYPE_IS_SHRIEKING:
459 case '@' | TYPE_IS_SHRIEKING:
463 case 'U': /* XXXX Is it correct? */
466 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
467 (int) TYPE_NO_MODIFIERS(symptr->code),
474 tempsym_t savsym = *symptr;
475 symptr->patptr = savsym.grpbeg;
476 symptr->patend = savsym.grpend;
477 /* XXXX Theoretically, we need to measure many times at
478 different positions, since the subexpression may contain
479 alignment commands, but be not of aligned length.
480 Need to detect this and croak(). */
481 size = measure_struct(symptr);
485 case 'X' | TYPE_IS_SHRIEKING:
486 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
488 if (!len) /* Avoid division by 0 */
490 len = total % len; /* Assumed: the start is aligned. */
495 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
497 case 'x' | TYPE_IS_SHRIEKING:
498 if (!len) /* Avoid division by 0 */
500 star = total % len; /* Assumed: the start is aligned. */
501 if (star) /* Other portable ways? */
525 size = sizeof(char*);
535 /* locate matching closing parenthesis or bracket
536 * returns char pointer to char after match, or NULL
539 S_group_end(pTHX_ const char *patptr, const char *patend, char ender)
541 PERL_ARGS_ASSERT_GROUP_END;
543 while (patptr < patend) {
544 const char c = *patptr++;
551 while (patptr < patend && *patptr != '\n')
555 patptr = group_end(patptr, patend, ')') + 1;
557 patptr = group_end(patptr, patend, ']') + 1;
559 Perl_croak(aTHX_ "No group ending character '%c' found in template",
561 NOT_REACHED; /* NOTREACHED */
565 /* Convert unsigned decimal number to binary.
566 * Expects a pointer to the first digit and address of length variable
567 * Advances char pointer to 1st non-digit char and returns number
570 S_get_num(pTHX_ const char *patptr, SSize_t *lenptr )
572 SSize_t len = *patptr++ - '0';
574 PERL_ARGS_ASSERT_GET_NUM;
576 while (isDIGIT(*patptr)) {
577 SSize_t nlen = (len * 10) + (*patptr++ - '0');
578 if (nlen < 0 || nlen/10 != len)
579 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
586 /* The marvellous template parsing routine: Using state stored in *symptr,
587 * locates next template code and count
590 S_next_symbol(pTHX_ tempsym_t* symptr )
592 const char* patptr = symptr->patptr;
593 const char* const patend = symptr->patend;
595 PERL_ARGS_ASSERT_NEXT_SYMBOL;
597 symptr->flags &= ~FLAG_SLASH;
599 while (patptr < patend) {
600 if (isSPACE(*patptr))
602 else if (*patptr == '#') {
604 while (patptr < patend && *patptr != '\n')
609 /* We should have found a template code */
610 I32 code = *patptr++ & 0xFF;
611 U32 inherited_modifiers = 0;
613 if (code == ','){ /* grandfather in commas but with a warning */
614 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
615 symptr->flags |= FLAG_COMMA;
616 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
617 "Invalid type ',' in %s", _action( symptr ) );
622 /* for '(', skip to ')' */
624 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
625 Perl_croak(aTHX_ "()-group starts with a count in %s",
627 symptr->grpbeg = patptr;
628 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
629 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
630 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
634 /* look for group modifiers to inherit */
635 if (TYPE_ENDIANNESS(symptr->flags)) {
636 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
637 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
640 /* look for modifiers */
641 while (patptr < patend) {
646 modifier = TYPE_IS_SHRIEKING;
647 allowed = "sSiIlLxXnNvV@.";
650 modifier = TYPE_IS_BIG_ENDIAN;
651 allowed = ENDIANNESS_ALLOWED_TYPES;
654 modifier = TYPE_IS_LITTLE_ENDIAN;
655 allowed = ENDIANNESS_ALLOWED_TYPES;
666 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
667 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
668 allowed, _action( symptr ) );
670 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
671 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
672 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
673 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
674 TYPE_ENDIANNESS_MASK)
675 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
676 *patptr, _action( symptr ) );
678 if ((code & modifier)) {
679 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
680 "Duplicate modifier '%c' after '%c' in %s",
681 *patptr, (int) TYPE_NO_MODIFIERS(code),
689 /* inherit modifiers */
690 code |= inherited_modifiers;
692 /* look for count and/or / */
693 if (patptr < patend) {
694 if (isDIGIT(*patptr)) {
695 patptr = get_num( patptr, &symptr->length );
696 symptr->howlen = e_number;
698 } else if (*patptr == '*') {
700 symptr->howlen = e_star;
702 } else if (*patptr == '[') {
703 const char* lenptr = ++patptr;
704 symptr->howlen = e_number;
705 patptr = group_end( patptr, patend, ']' ) + 1;
706 /* what kind of [] is it? */
707 if (isDIGIT(*lenptr)) {
708 lenptr = get_num( lenptr, &symptr->length );
710 Perl_croak(aTHX_ "Malformed integer in [] in %s",
713 tempsym_t savsym = *symptr;
714 symptr->patend = patptr-1;
715 symptr->patptr = lenptr;
716 savsym.length = measure_struct(symptr);
720 symptr->howlen = e_no_len;
725 while (patptr < patend) {
726 if (isSPACE(*patptr))
728 else if (*patptr == '#') {
730 while (patptr < patend && *patptr != '\n')
735 if (*patptr == '/') {
736 symptr->flags |= FLAG_SLASH;
738 if (patptr < patend &&
739 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
740 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
747 /* at end - no count, no / */
748 symptr->howlen = e_no_len;
753 symptr->patptr = patptr;
757 symptr->patptr = patptr;
762 There is no way to cleanly handle the case where we should process the
763 string per byte in its upgraded form while it's really in downgraded form
764 (e.g. estimates like strend-s as an upper bound for the number of
765 characters left wouldn't work). So if we foresee the need of this
766 (pattern starts with U or contains U0), we want to work on the encoded
767 version of the string. Users are advised to upgrade their pack string
768 themselves if they need to do a lot of unpacks like this on it
771 need_utf8(const char *pat, const char *patend)
775 PERL_ARGS_ASSERT_NEED_UTF8;
777 while (pat < patend) {
780 pat = (const char *) memchr(pat, '\n', patend-pat);
781 if (!pat) return FALSE;
782 } else if (pat[0] == 'U') {
783 if (first || pat[1] == '0') return TRUE;
784 } else first = FALSE;
791 first_symbol(const char *pat, const char *patend) {
792 PERL_ARGS_ASSERT_FIRST_SYMBOL;
794 while (pat < patend) {
795 if (pat[0] != '#') return pat[0];
797 pat = (const char *) memchr(pat, '\n', patend-pat);
806 =head1 Pack and Unpack
808 =for apidoc unpackstring
810 The engine implementing the C<unpack()> Perl function.
812 Using the template C<pat..patend>, this function unpacks the string
813 C<s..strend> into a number of mortal SVs, which it pushes onto the perl
814 argument (C<@_>) stack (so you will need to issue a C<PUTBACK> before and
815 C<SPAGAIN> after the call to this function). It returns the number of
818 The C<strend> and C<patend> pointers should point to the byte following the
819 last character of each string.
821 Although this function returns its values on the perl argument stack, it
822 doesn't take any parameters from that stack (and thus in particular
823 there's no need to do a C<PUSHMARK> before calling it, unlike L</call_pv> for
829 Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
833 PERL_ARGS_ASSERT_UNPACKSTRING;
835 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
836 else if (need_utf8(pat, patend)) {
837 /* We probably should try to avoid this in case a scalar context call
838 wouldn't get to the "U0" */
839 STRLEN len = strend - s;
840 s = (char *) bytes_to_utf8((U8 *) s, &len);
843 flags |= FLAG_DO_UTF8;
846 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
847 flags |= FLAG_PARSE_UTF8;
849 TEMPSYM_INIT(&sym, pat, patend, flags);
851 return unpack_rec(&sym, s, s, strend, NULL );
855 S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
859 const SSize_t start_sp_offset = SP - PL_stack_base;
861 SSize_t checksum = 0;
864 const SSize_t bits_in_uv = CHAR_BIT * sizeof(cuv);
866 bool explicit_length;
867 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
868 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
870 PERL_ARGS_ASSERT_UNPACK_REC;
872 symptr->strbeg = s - strbeg;
874 while (next_symbol(symptr)) {
877 I32 datumtype = symptr->code;
879 /* do first one only unless in list context
880 / is implemented by unpacking the count, then popping it from the
881 stack, so must check that we're not in the middle of a / */
883 && (SP - PL_stack_base == start_sp_offset + 1)
884 && (datumtype != '/') ) /* XXX can this be omitted */
887 switch (howlen = symptr->howlen) {
889 len = strend - strbeg; /* long enough */
892 /* e_no_len and e_number */
893 len = symptr->length;
897 explicit_length = TRUE;
899 beyond = s >= strend;
901 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
903 /* props nonzero means we can process this letter. */
904 const SSize_t size = props & PACK_SIZE_MASK;
905 const SSize_t howmany = (strend - s) / size;
909 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
910 if (len && unpack_only_one) len = 1;
916 needs_swap = NEEDS_SWAP(datumtype);
918 switch(TYPE_NO_ENDIANNESS(datumtype)) {
920 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
923 if (howlen == e_no_len)
924 len = 16; /* len is not specified */
932 tempsym_t savsym = *symptr;
933 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
934 symptr->flags |= group_modifiers;
935 symptr->patend = savsym.grpend;
936 symptr->previous = &savsym;
939 if (len && unpack_only_one) len = 1;
941 symptr->patptr = savsym.grpbeg;
942 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
943 else symptr->flags &= ~FLAG_PARSE_UTF8;
944 unpack_rec(symptr, s, strbeg, strend, &s);
945 if (s == strend && savsym.howlen == e_star)
946 break; /* No way to continue */
949 savsym.flags = symptr->flags & ~group_modifiers;
953 case '.' | TYPE_IS_SHRIEKING:
957 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
958 if (howlen == e_star) from = strbeg;
959 else if (len <= 0) from = s;
961 tempsym_t *group = symptr;
963 while (--len && group) group = group->previous;
964 from = group ? strbeg + group->strbeg : strbeg;
967 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
968 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
972 case '@' | TYPE_IS_SHRIEKING:
974 s = strbeg + symptr->strbeg;
975 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
979 Perl_croak(aTHX_ "'@' outside of string in unpack");
984 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
987 Perl_croak(aTHX_ "'@' outside of string in unpack");
991 case 'X' | TYPE_IS_SHRIEKING:
992 if (!len) /* Avoid division by 0 */
995 const char *hop, *last;
999 hop += UTF8SKIP(hop);
1006 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1010 len = (s - strbeg) % len;
1016 Perl_croak(aTHX_ "'X' outside of string in unpack");
1017 while (--s, UTF8_IS_CONTINUATION(*s)) {
1019 Perl_croak(aTHX_ "'X' outside of string in unpack");
1024 if (len > s - strbeg)
1025 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1029 case 'x' | TYPE_IS_SHRIEKING: {
1031 if (!len) /* Avoid division by 0 */
1033 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1034 else ai32 = (s - strbeg) % len;
1035 if (ai32 == 0) break;
1043 Perl_croak(aTHX_ "'x' outside of string in unpack");
1048 if (len > strend - s)
1049 Perl_croak(aTHX_ "'x' outside of string in unpack");
1054 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1060 /* Preliminary length estimate is assumed done in 'W' */
1061 if (len > strend - s) len = strend - s;
1067 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1068 if (hop >= strend) {
1070 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1075 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1077 } else if (len > strend - s)
1080 if (datumtype == 'Z') {
1081 /* 'Z' strips stuff after first null */
1082 const char *ptr, *end;
1084 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
1085 sv = newSVpvn(s, ptr-s);
1086 if (howlen == e_star) /* exact for 'Z*' */
1087 len = ptr-s + (ptr != strend ? 1 : 0);
1088 } else if (datumtype == 'A') {
1089 /* 'A' strips both nulls and spaces */
1091 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1092 for (ptr = s+len-1; ptr >= s; ptr--) {
1094 && !UTF8_IS_CONTINUATION(*ptr)
1095 && !isSPACE_utf8_safe(ptr, strend))
1100 if (ptr >= s) ptr += UTF8SKIP(ptr);
1103 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1105 for (ptr = s+len-1; ptr >= s; ptr--)
1106 if (*ptr != 0 && !isSPACE(*ptr)) break;
1109 sv = newSVpvn(s, ptr-s);
1110 } else sv = newSVpvn(s, len);
1114 /* Undo any upgrade done due to need_utf8() */
1115 if (!(symptr->flags & FLAG_WAS_UTF8))
1116 sv_utf8_downgrade(sv, 0);
1124 if (howlen == e_star || len > (strend - s) * 8)
1125 len = (strend - s) * 8;
1128 while (len >= 8 && s < strend) {
1129 cuv += PL_bitcount[utf8_to_byte(aTHX_ &s, strend, datumtype)];
1134 cuv += PL_bitcount[*(U8 *)s++];
1137 if (len && s < strend) {
1139 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1140 if (datumtype == 'b')
1142 if (bits & 1) cuv++;
1147 if (bits & 0x80) cuv++;
1154 sv = sv_2mortal(newSV(len ? len : 1));
1157 if (datumtype == 'b') {
1159 const SSize_t ai32 = len;
1160 for (len = 0; len < ai32; len++) {
1161 if (len & 7) bits >>= 1;
1163 if (s >= strend) break;
1164 bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
1165 } else bits = *(U8 *) s++;
1166 *str++ = bits & 1 ? '1' : '0';
1170 const SSize_t ai32 = len;
1171 for (len = 0; len < ai32; len++) {
1172 if (len & 7) bits <<= 1;
1174 if (s >= strend) break;
1175 bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
1176 } else bits = *(U8 *) s++;
1177 *str++ = bits & 0x80 ? '1' : '0';
1181 SvCUR_set(sv, str - SvPVX_const(sv));
1188 /* Preliminary length estimate, acceptable for utf8 too */
1189 if (howlen == e_star || len > (strend - s) * 2)
1190 len = (strend - s) * 2;
1192 sv = sv_2mortal(newSV(len ? len : 1));
1196 if (datumtype == 'h') {
1199 for (len = 0; len < ai32; len++) {
1200 if (len & 1) bits >>= 4;
1202 if (s >= strend) break;
1203 bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
1204 } else bits = * (U8 *) s++;
1206 *str++ = PL_hexdigit[bits & 15];
1210 const SSize_t ai32 = len;
1211 for (len = 0; len < ai32; len++) {
1212 if (len & 1) bits <<= 4;
1214 if (s >= strend) break;
1215 bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
1216 } else bits = *(U8 *) s++;
1218 *str++ = PL_hexdigit[(bits >> 4) & 15];
1223 SvCUR_set(sv, str - SvPVX_const(sv));
1230 if (explicit_length)
1231 /* Switch to "character" mode */
1232 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1237 while (len-- > 0 && s < strend) {
1242 aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1243 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1244 if (retlen == (STRLEN) -1)
1245 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1249 aint = *(U8 *)(s)++;
1250 if (aint >= 128 && datumtype != 'C') /* fake up signed chars */
1254 else if (checksum > bits_in_uv)
1255 cdouble += (NV)aint;
1263 while (len-- > 0 && s < strend) {
1265 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1266 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1267 if (retlen == (STRLEN) -1)
1268 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1272 else if (checksum > bits_in_uv)
1273 cdouble += (NV) val;
1277 } else if (!checksum)
1279 const U8 ch = *(U8 *) s++;
1282 else if (checksum > bits_in_uv)
1283 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1285 while (len-- > 0) cuv += *(U8 *) s++;
1289 if (explicit_length && howlen != e_star) {
1290 /* Switch to "bytes in UTF-8" mode */
1291 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
1293 /* Should be impossible due to the need_utf8() test */
1294 Perl_croak(aTHX_ "U0 mode on a byte string");
1298 if (len > strend - s) len = strend - s;
1300 if (len && unpack_only_one) len = 1;
1304 while (len-- > 0 && s < strend) {
1308 U8 result[UTF8_MAXLEN+1];
1309 const char *ptr = s;
1311 /* Bug: warns about bad utf8 even if we are short on bytes
1312 and will break out of the loop */
1313 if (!S_utf8_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1316 len = UTF8SKIP(result);
1317 if (!S_utf8_to_bytes(aTHX_ &ptr, strend,
1318 (char *) &result[1], len-1, 'U')) break;
1319 auv = NATIVE_TO_UNI(utf8n_to_uvchr(result,
1322 UTF8_ALLOW_DEFAULT));
1325 auv = NATIVE_TO_UNI(utf8n_to_uvchr((U8*)s,
1328 UTF8_ALLOW_DEFAULT));
1329 if (retlen == (STRLEN) -1)
1330 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1335 else if (checksum > bits_in_uv)
1336 cdouble += (NV) auv;
1341 case 's' | TYPE_IS_SHRIEKING:
1342 #if SHORTSIZE != SIZE16
1345 SHIFT_VAR(utf8, s, strend, ashort, datumtype, needs_swap);
1348 else if (checksum > bits_in_uv)
1349 cdouble += (NV)ashort;
1361 #if U16SIZE > SIZE16
1364 SHIFT16(utf8, s, strend, &ai16, datumtype, needs_swap);
1365 #if U16SIZE > SIZE16
1371 else if (checksum > bits_in_uv)
1372 cdouble += (NV)ai16;
1377 case 'S' | TYPE_IS_SHRIEKING:
1378 #if SHORTSIZE != SIZE16
1380 unsigned short aushort;
1381 SHIFT_VAR(utf8, s, strend, aushort, datumtype, needs_swap);
1384 else if (checksum > bits_in_uv)
1385 cdouble += (NV)aushort;
1398 #if U16SIZE > SIZE16
1401 SHIFT16(utf8, s, strend, &au16, datumtype, needs_swap);
1402 if (datumtype == 'n')
1403 au16 = PerlSock_ntohs(au16);
1404 if (datumtype == 'v')
1408 else if (checksum > bits_in_uv)
1409 cdouble += (NV) au16;
1414 case 'v' | TYPE_IS_SHRIEKING:
1415 case 'n' | TYPE_IS_SHRIEKING:
1418 # if U16SIZE > SIZE16
1421 SHIFT16(utf8, s, strend, &ai16, datumtype, needs_swap);
1422 /* There should never be any byte-swapping here. */
1423 assert(!TYPE_ENDIANNESS(datumtype));
1424 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
1425 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1426 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
1427 ai16 = (I16) vtohs((U16) ai16);
1430 else if (checksum > bits_in_uv)
1431 cdouble += (NV) ai16;
1437 case 'i' | TYPE_IS_SHRIEKING:
1440 SHIFT_VAR(utf8, s, strend, aint, datumtype, needs_swap);
1443 else if (checksum > bits_in_uv)
1444 cdouble += (NV)aint;
1450 case 'I' | TYPE_IS_SHRIEKING:
1453 SHIFT_VAR(utf8, s, strend, auint, datumtype, needs_swap);
1456 else if (checksum > bits_in_uv)
1457 cdouble += (NV)auint;
1465 SHIFT_VAR(utf8, s, strend, aiv, datumtype, needs_swap);
1468 else if (checksum > bits_in_uv)
1477 SHIFT_VAR(utf8, s, strend, auv, datumtype, needs_swap);
1480 else if (checksum > bits_in_uv)
1486 case 'l' | TYPE_IS_SHRIEKING:
1487 #if LONGSIZE != SIZE32
1490 SHIFT_VAR(utf8, s, strend, along, datumtype, needs_swap);
1493 else if (checksum > bits_in_uv)
1494 cdouble += (NV)along;
1505 #if U32SIZE > SIZE32
1508 SHIFT32(utf8, s, strend, &ai32, datumtype, needs_swap);
1509 #if U32SIZE > SIZE32
1510 if (ai32 > 2147483647) ai32 -= 4294967296;
1514 else if (checksum > bits_in_uv)
1515 cdouble += (NV)ai32;
1520 case 'L' | TYPE_IS_SHRIEKING:
1521 #if LONGSIZE != SIZE32
1523 unsigned long aulong;
1524 SHIFT_VAR(utf8, s, strend, aulong, datumtype, needs_swap);
1527 else if (checksum > bits_in_uv)
1528 cdouble += (NV)aulong;
1541 #if U32SIZE > SIZE32
1544 SHIFT32(utf8, s, strend, &au32, datumtype, needs_swap);
1545 if (datumtype == 'N')
1546 au32 = PerlSock_ntohl(au32);
1547 if (datumtype == 'V')
1551 else if (checksum > bits_in_uv)
1552 cdouble += (NV)au32;
1557 case 'V' | TYPE_IS_SHRIEKING:
1558 case 'N' | TYPE_IS_SHRIEKING:
1561 #if U32SIZE > SIZE32
1564 SHIFT32(utf8, s, strend, &ai32, datumtype, needs_swap);
1565 /* There should never be any byte swapping here. */
1566 assert(!TYPE_ENDIANNESS(datumtype));
1567 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1568 ai32 = (I32)PerlSock_ntohl((U32)ai32);
1569 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1570 ai32 = (I32)vtohl((U32)ai32);
1573 else if (checksum > bits_in_uv)
1574 cdouble += (NV)ai32;
1582 SHIFT_VAR(utf8, s, strend, aptr, datumtype, needs_swap);
1583 /* newSVpv generates undef if aptr is NULL */
1584 mPUSHs(newSVpv(aptr, 0));
1592 while (len > 0 && s < strend) {
1594 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1595 auv = (auv << 7) | (ch & 0x7f);
1596 /* UTF8_IS_XXXXX not right here because this is a BER, not
1597 * UTF-8 format - using constant 0x80 */
1605 if (++bytes >= sizeof(UV)) { /* promote to string */
1608 sv = Perl_newSVpvf(aTHX_ "%.*" UVuf,
1609 (int)TYPE_DIGITS(UV), auv);
1610 while (s < strend) {
1611 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1612 sv = mul128(sv, (U8)(ch & 0x7f));
1618 t = SvPV_nolen_const(sv);
1627 if ((s >= strend) && bytes)
1628 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
1632 if (symptr->howlen == e_star)
1633 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
1635 if (s + sizeof(char*) <= strend) {
1637 SHIFT_VAR(utf8, s, strend, aptr, datumtype, needs_swap);
1638 /* newSVpvn generates undef if aptr is NULL */
1639 PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP));
1642 #if defined(HAS_QUAD) && IVSIZE >= 8
1646 SHIFT_VAR(utf8, s, strend, aquad, datumtype, needs_swap);
1648 mPUSHs(newSViv((IV)aquad));
1649 else if (checksum > bits_in_uv)
1650 cdouble += (NV)aquad;
1658 SHIFT_VAR(utf8, s, strend, auquad, datumtype, needs_swap);
1660 mPUSHs(newSVuv((UV)auquad));
1661 else if (checksum > bits_in_uv)
1662 cdouble += (NV)auquad;
1668 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
1672 SHIFT_VAR(utf8, s, strend, afloat, datumtype, needs_swap);
1682 SHIFT_VAR(utf8, s, strend, adouble, datumtype, needs_swap);
1692 SHIFT_BYTES(utf8, s, strend, anv.bytes, sizeof(anv.bytes),
1693 datumtype, needs_swap);
1700 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
1704 SHIFT_BYTES(utf8, s, strend, aldouble.bytes,
1705 sizeof(aldouble.bytes), datumtype, needs_swap);
1706 /* The most common long double format, the x86 80-bit
1707 * extended precision, has either 2 or 6 unused bytes,
1708 * which may contain garbage, which may contain
1709 * unintentional data. While we do zero the bytes of
1710 * the long double data in pack(), here in unpack() we
1711 * don't, because it's really hard to envision that
1712 * reading the long double off aldouble would be
1713 * affected by the unused bytes.
1715 * Note that trying to unpack 'long doubles' of 'long
1716 * doubles' packed in another system is in the general
1717 * case doomed without having more detail. */
1719 mPUSHn(aldouble.ld);
1721 cdouble += aldouble.ld;
1727 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
1728 sv = sv_2mortal(newSV(l));
1735 /* Note that all legal uuencoded strings are ASCII printables, so
1736 * have the same representation under UTF-8 vs not. This means we
1737 * can ignore UTF8ness on legal input. For illegal we stop at the
1738 * first failure, and don't report where/what that is, so again we
1739 * can ignore UTF8ness */
1741 while (s < strend && *s != ' ' && ISUUCHAR(*s)) {
1745 len = PL_uudmap[*(U8*)s++] & 077;
1747 if (s < strend && ISUUCHAR(*s))
1748 a = PL_uudmap[*(U8*)s++] & 077;
1751 if (s < strend && ISUUCHAR(*s))
1752 b = PL_uudmap[*(U8*)s++] & 077;
1755 if (s < strend && ISUUCHAR(*s))
1756 c = PL_uudmap[*(U8*)s++] & 077;
1759 if (s < strend && ISUUCHAR(*s))
1760 d = PL_uudmap[*(U8*)s++] & 077;
1763 hunk[0] = (char)((a << 2) | (b >> 4));
1764 hunk[1] = (char)((b << 4) | (c >> 2));
1765 hunk[2] = (char)((c << 6) | d);
1767 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
1772 else /* possible checksum byte */
1773 if (s + 1 < strend && s[1] == '\n')
1779 } /* End of switch */
1782 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
1783 (checksum > bits_in_uv &&
1784 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
1787 anv = (NV) (1 << (checksum & 15));
1788 while (checksum >= 16) {
1792 while (cdouble < 0.0)
1794 cdouble = Perl_modf(cdouble / anv, &trouble);
1795 #ifdef LONGDOUBLE_DOUBLEDOUBLE
1796 /* Workaround for powerpc doubledouble modfl bug:
1797 * close to 1.0L and -1.0L cdouble is 0, and trouble
1798 * is cdouble / anv. */
1799 if (trouble != Perl_ceil(trouble)) {
1801 if (cdouble > 1.0L) cdouble -= 1.0L;
1802 if (cdouble < -1.0L) cdouble += 1.0L;
1806 sv = newSVnv(cdouble);
1809 if (checksum < bits_in_uv) {
1810 UV mask = ((UV)1 << checksum) - 1;
1819 if (symptr->flags & FLAG_SLASH){
1820 if (SP - PL_stack_base - start_sp_offset <= 0)
1822 if( next_symbol(symptr) ){
1823 if( symptr->howlen == e_number )
1824 Perl_croak(aTHX_ "Count after length/code in unpack" );
1826 /* ...end of char buffer then no decent length available */
1827 Perl_croak(aTHX_ "length/code after end of string in unpack" );
1829 /* take top of stack (hope it's numeric) */
1832 Perl_croak(aTHX_ "Negative '/' count in unpack" );
1835 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
1837 datumtype = symptr->code;
1838 explicit_length = FALSE;
1846 return SP - PL_stack_base - start_sp_offset;
1856 const char *pat = SvPV_const(left, llen);
1857 const char *s = SvPV_const(right, rlen);
1858 const char *strend = s + rlen;
1859 const char *patend = pat + llen;
1863 cnt = unpackstring(pat, patend, s, strend,
1864 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
1865 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
1868 if ( !cnt && gimme == G_SCALAR )
1869 PUSHs(&PL_sv_undef);
1874 doencodes(U8 *h, const U8 *s, SSize_t len)
1876 *h++ = PL_uuemap[len];
1878 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1879 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
1880 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
1881 *h++ = PL_uuemap[(077 & (s[2] & 077))];
1886 const U8 r = (len > 1 ? s[1] : '\0');
1887 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1888 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
1889 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
1890 *h++ = PL_uuemap[0];
1897 S_is_an_int(pTHX_ const char *s, STRLEN l)
1899 SV *result = newSVpvn(s, l);
1900 char *const result_c = SvPV_nolen(result); /* convenience */
1901 char *out = result_c;
1905 PERL_ARGS_ASSERT_IS_AN_INT;
1913 SvREFCNT_dec(result);
1936 SvREFCNT_dec(result);
1942 SvCUR_set(result, out - result_c);
1946 /* pnum must be '\0' terminated */
1948 S_div128(pTHX_ SV *pnum, bool *done)
1951 char * const s = SvPV(pnum, len);
1955 PERL_ARGS_ASSERT_DIV128;
1959 const int i = m * 10 + (*t - '0');
1960 const int r = (i >> 7); /* r < 10 */
1968 SvCUR_set(pnum, (STRLEN) (t - s));
1973 =for apidoc packlist
1975 The engine implementing C<pack()> Perl function.
1981 Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, SV **beglist, SV **endlist )
1985 PERL_ARGS_ASSERT_PACKLIST;
1987 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
1989 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
1990 Also make sure any UTF8 flag is loaded */
1991 SvPV_force_nolen(cat);
1993 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
1995 (void)pack_rec( cat, &sym, beglist, endlist );
1998 /* like sv_utf8_upgrade, but also repoint the group start markers */
2000 marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2003 const char *from_ptr, *from_start, *from_end, **marks, **m;
2004 char *to_start, *to_ptr;
2006 if (SvUTF8(sv)) return;
2008 from_start = SvPVX_const(sv);
2009 from_end = from_start + SvCUR(sv);
2010 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2011 if (!NATIVE_BYTE_IS_INVARIANT(*from_ptr)) break;
2012 if (from_ptr == from_end) {
2013 /* Simple case: no character needs to be changed */
2018 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
2019 Newx(to_start, len, char);
2020 Copy(from_start, to_start, from_ptr-from_start, char);
2021 to_ptr = to_start + (from_ptr-from_start);
2023 Newx(marks, sym_ptr->level+2, const char *);
2024 for (group=sym_ptr; group; group = group->previous)
2025 marks[group->level] = from_start + group->strbeg;
2026 marks[sym_ptr->level+1] = from_end+1;
2027 for (m = marks; *m < from_ptr; m++)
2028 *m = to_start + (*m-from_start);
2030 for (;from_ptr < from_end; from_ptr++) {
2031 while (*m == from_ptr) *m++ = to_ptr;
2032 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
2036 while (*m == from_ptr) *m++ = to_ptr;
2037 if (m != marks + sym_ptr->level+1) {
2040 Perl_croak(aTHX_ "panic: marks beyond string end, m=%p, marks=%p, "
2041 "level=%d", m, marks, sym_ptr->level);
2043 for (group=sym_ptr; group; group = group->previous)
2044 group->strbeg = marks[group->level] - to_start;
2049 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
2050 from_start -= SvIVX(sv);
2053 SvFLAGS(sv) &= ~SVf_OOK;
2056 Safefree(from_start);
2057 SvPV_set(sv, to_start);
2058 SvCUR_set(sv, to_ptr - to_start);
2063 /* Exponential string grower. Makes string extension effectively O(n)
2064 needed says how many extra bytes we need (not counting the final '\0')
2065 Only grows the string if there is an actual lack of space
2068 S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
2069 const STRLEN cur = SvCUR(sv);
2070 const STRLEN len = SvLEN(sv);
2073 PERL_ARGS_ASSERT_SV_EXP_GROW;
2075 if (len - cur > needed) return SvPVX(sv);
2076 extend = needed > len ? needed : len;
2077 return SvGROW(sv, len+extend+1);
2081 S_sv_check_infnan(pTHX_ SV *sv, I32 datumtype)
2084 if (UNLIKELY(SvAMAGIC(sv)))
2086 if (UNLIKELY(isinfnansv(sv))) {
2087 const I32 c = TYPE_NO_MODIFIERS(datumtype);
2088 const NV nv = SvNV_nomg(sv);
2090 Perl_croak(aTHX_ "Cannot compress %" NVgf " in pack", nv);
2092 Perl_croak(aTHX_ "Cannot pack %" NVgf " with '%c'", nv, (int) c);
2097 #define SvIV_no_inf(sv,d) \
2098 ((sv) = S_sv_check_infnan(aTHX_ sv,d), SvIV_nomg(sv))
2099 #define SvUV_no_inf(sv,d) \
2100 ((sv) = S_sv_check_infnan(aTHX_ sv,d), SvUV_nomg(sv))
2104 S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
2106 tempsym_t lookahead;
2107 SSize_t items = endlist - beglist;
2108 bool found = next_symbol(symptr);
2109 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
2110 bool warn_utf8 = ckWARN(WARN_UTF8);
2113 PERL_ARGS_ASSERT_PACK_REC;
2115 if (symptr->level == 0 && found && symptr->code == 'U') {
2116 marked_upgrade(aTHX_ cat, symptr);
2117 symptr->flags |= FLAG_DO_UTF8;
2120 symptr->strbeg = SvCUR(cat);
2126 SV *lengthcode = NULL;
2127 I32 datumtype = symptr->code;
2128 howlen_t howlen = symptr->howlen;
2129 char *start = SvPVX(cat);
2130 char *cur = start + SvCUR(cat);
2133 #define NEXTFROM (lengthcode ? lengthcode : items > 0 ? (--items, *beglist++) : &PL_sv_no)
2134 #define PEEKFROM (lengthcode ? lengthcode : items > 0 ? *beglist : &PL_sv_no)
2138 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2142 /* e_no_len and e_number */
2143 len = symptr->length;
2148 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
2150 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2151 /* We can process this letter. */
2152 STRLEN size = props & PACK_SIZE_MASK;
2153 GROWING2(utf8, cat, start, cur, size, (STRLEN)len);
2157 /* Look ahead for next symbol. Do we have code/code? */
2158 lookahead = *symptr;
2159 found = next_symbol(&lookahead);
2160 if (symptr->flags & FLAG_SLASH) {
2162 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
2163 if (strchr("aAZ", lookahead.code)) {
2164 if (lookahead.howlen == e_number) count = lookahead.length;
2167 count = sv_len_utf8(*beglist);
2170 if (lookahead.code == 'Z') count++;
2173 if (lookahead.howlen == e_number && lookahead.length < items)
2174 count = lookahead.length;
2177 lookahead.howlen = e_number;
2178 lookahead.length = count;
2179 lengthcode = sv_2mortal(newSViv(count));
2182 needs_swap = NEEDS_SWAP(datumtype);
2184 /* Code inside the switch must take care to properly update
2185 cat (CUR length and '\0' termination) if it updated *cur and
2186 doesn't simply leave using break */
2187 switch (TYPE_NO_ENDIANNESS(datumtype)) {
2189 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2190 (int) TYPE_NO_MODIFIERS(datumtype));
2192 Perl_croak(aTHX_ "'%%' may not be used in pack");
2194 case '.' | TYPE_IS_SHRIEKING:
2196 if (howlen == e_star) from = start;
2197 else if (len == 0) from = cur;
2199 tempsym_t *group = symptr;
2201 while (--len && group) group = group->previous;
2202 from = group ? start + group->strbeg : start;
2205 len = SvIV_no_inf(fromstr, datumtype);
2207 case '@' | TYPE_IS_SHRIEKING:
2209 from = start + symptr->strbeg;
2211 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
2213 while (len && from < cur) {
2214 from += UTF8SKIP(from);
2218 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2220 /* Here we know from == cur */
2222 GROWING(0, cat, start, cur, len);
2223 Zero(cur, len, char);
2225 } else if (from < cur) {
2228 } else goto no_change;
2236 if (len > 0) goto grow;
2237 if (len == 0) goto no_change;
2244 tempsym_t savsym = *symptr;
2245 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2246 symptr->flags |= group_modifiers;
2247 symptr->patend = savsym.grpend;
2249 symptr->previous = &lookahead;
2252 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2253 else symptr->flags &= ~FLAG_PARSE_UTF8;
2254 was_utf8 = SvUTF8(cat);
2255 symptr->patptr = savsym.grpbeg;
2256 beglist = pack_rec(cat, symptr, beglist, endlist);
2257 if (SvUTF8(cat) != was_utf8)
2258 /* This had better be an upgrade while in utf8==0 mode */
2261 if (savsym.howlen == e_star && beglist == endlist)
2262 break; /* No way to continue */
2264 items = endlist - beglist;
2265 lookahead.flags = symptr->flags & ~group_modifiers;
2268 case 'X' | TYPE_IS_SHRIEKING:
2269 if (!len) /* Avoid division by 0 */
2276 hop += UTF8SKIP(hop);
2283 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2287 len = (cur-start) % len;
2291 if (len < 1) goto no_change;
2295 Perl_croak(aTHX_ "'%c' outside of string in pack",
2296 (int) TYPE_NO_MODIFIERS(datumtype));
2297 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2299 Perl_croak(aTHX_ "'%c' outside of string in pack",
2300 (int) TYPE_NO_MODIFIERS(datumtype));
2306 if (cur - start < len)
2307 Perl_croak(aTHX_ "'%c' outside of string in pack",
2308 (int) TYPE_NO_MODIFIERS(datumtype));
2311 if (cur < start+symptr->strbeg) {
2312 /* Make sure group starts don't point into the void */
2314 const STRLEN length = cur-start;
2315 for (group = symptr;
2316 group && length < group->strbeg;
2317 group = group->previous) group->strbeg = length;
2318 lookahead.strbeg = length;
2321 case 'x' | TYPE_IS_SHRIEKING: {
2323 if (!len) /* Avoid division by 0 */
2325 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
2326 else ai32 = (cur - start) % len;
2327 if (ai32 == 0) goto no_change;
2339 aptr = SvPV_const(fromstr, fromlen);
2340 if (DO_UTF8(fromstr)) {
2341 const char *end, *s;
2343 if (!utf8 && !SvUTF8(cat)) {
2344 marked_upgrade(aTHX_ cat, symptr);
2345 lookahead.flags |= FLAG_DO_UTF8;
2346 lookahead.strbeg = symptr->strbeg;
2349 cur = start + SvCUR(cat);
2351 if (howlen == e_star) {
2352 if (utf8) goto string_copy;
2356 end = aptr + fromlen;
2357 fromlen = datumtype == 'Z' ? len-1 : len;
2358 while ((SSize_t) fromlen > 0 && s < end) {
2363 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2366 if (datumtype == 'Z') len++;
2372 fromlen = len - fromlen;
2373 if (datumtype == 'Z') fromlen--;
2374 if (howlen == e_star) {
2376 if (datumtype == 'Z') len++;
2378 GROWING(0, cat, start, cur, len);
2379 if (!S_utf8_to_bytes(aTHX_ &aptr, end, cur, fromlen,
2380 datumtype | TYPE_IS_PACK))
2381 Perl_croak(aTHX_ "panic: predicted utf8 length not available, "
2382 "for '%c', aptr=%p end=%p cur=%p, fromlen=%zu",
2383 (int)datumtype, aptr, end, cur, fromlen);
2387 if (howlen == e_star) {
2389 if (datumtype == 'Z') len++;
2391 if (len <= (SSize_t) fromlen) {
2393 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2395 /* assumes a byte expands to at most UTF8_EXPAND bytes on
2397 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2398 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
2400 while (fromlen > 0) {
2401 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
2407 if (howlen == e_star) {
2409 if (datumtype == 'Z') len++;
2411 if (len <= (SSize_t) fromlen) {
2413 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2415 GROWING(0, cat, start, cur, len);
2416 Copy(aptr, cur, fromlen, char);
2420 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2427 const char *str, *end;
2428 SSize_t l, field_len;
2434 str = SvPV_const(fromstr, fromlen);
2435 end = str + fromlen;
2436 if (DO_UTF8(fromstr)) {
2438 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2440 utf8_source = FALSE;
2441 utf8_flags = 0; /* Unused, but keep compilers happy */
2443 if (howlen == e_star) len = fromlen;
2444 field_len = (len+7)/8;
2445 GROWING(utf8, cat, start, cur, field_len);
2446 if (len > (SSize_t)fromlen) len = fromlen;
2449 if (datumtype == 'B')
2453 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2455 } else bits |= *str++ & 1;
2456 if (l & 7) bits <<= 1;
2458 PUSH_BYTE(utf8, cur, bits);
2463 /* datumtype == 'b' */
2467 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2468 if (val & 1) bits |= 0x80;
2469 } else if (*str++ & 1)
2471 if (l & 7) bits >>= 1;
2473 PUSH_BYTE(utf8, cur, bits);
2479 if (datumtype == 'B')
2480 bits <<= 7 - (l & 7);
2482 bits >>= 7 - (l & 7);
2483 PUSH_BYTE(utf8, cur, bits);
2486 /* Determine how many chars are left in the requested field */
2488 if (howlen == e_star) field_len = 0;
2489 else field_len -= l;
2490 Zero(cur, field_len, char);
2496 const char *str, *end;
2497 SSize_t l, field_len;
2503 str = SvPV_const(fromstr, fromlen);
2504 end = str + fromlen;
2505 if (DO_UTF8(fromstr)) {
2507 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2509 utf8_source = FALSE;
2510 utf8_flags = 0; /* Unused, but keep compilers happy */
2512 if (howlen == e_star) len = fromlen;
2513 field_len = (len+1)/2;
2514 GROWING(utf8, cat, start, cur, field_len);
2515 if (!utf8_source && len > (SSize_t)fromlen) len = fromlen;
2518 if (datumtype == 'H')
2522 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2523 if (val < 256 && isALPHA(val))
2524 bits |= (val + 9) & 0xf;
2527 } else if (isALPHA(*str))
2528 bits |= (*str++ + 9) & 0xf;
2530 bits |= *str++ & 0xf;
2531 if (l & 1) bits <<= 4;
2533 PUSH_BYTE(utf8, cur, bits);
2541 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2542 if (val < 256 && isALPHA(val))
2543 bits |= ((val + 9) & 0xf) << 4;
2545 bits |= (val & 0xf) << 4;
2546 } else if (isALPHA(*str))
2547 bits |= ((*str++ + 9) & 0xf) << 4;
2549 bits |= (*str++ & 0xf) << 4;
2550 if (l & 1) bits >>= 4;
2552 PUSH_BYTE(utf8, cur, bits);
2558 PUSH_BYTE(utf8, cur, bits);
2561 /* Determine how many chars are left in the requested field */
2563 if (howlen == e_star) field_len = 0;
2564 else field_len -= l;
2565 Zero(cur, field_len, char);
2573 aiv = SvIV_no_inf(fromstr, datumtype);
2574 if ((-128 > aiv || aiv > 127))
2575 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2576 "Character in 'c' format wrapped in pack");
2577 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
2582 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2588 aiv = SvIV_no_inf(fromstr, datumtype);
2589 if ((0 > aiv || aiv > 0xff))
2590 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2591 "Character in 'C' format wrapped in pack");
2592 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
2597 U8 in_bytes = (U8)IN_BYTES;
2599 end = start+SvLEN(cat)-1;
2600 if (utf8) end -= UTF8_MAXLEN-1;
2604 auv = SvUV_no_inf(fromstr, datumtype);
2605 if (in_bytes) auv = auv % 0x100;
2610 SvCUR_set(cat, cur - start);
2612 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2613 end = start+SvLEN(cat)-UTF8_MAXLEN;
2615 cur = (char *) uvchr_to_utf8_flags((U8 *) cur, auv, 0);
2620 SvCUR_set(cat, cur - start);
2621 marked_upgrade(aTHX_ cat, symptr);
2622 lookahead.flags |= FLAG_DO_UTF8;
2623 lookahead.strbeg = symptr->strbeg;
2626 cur = start + SvCUR(cat);
2627 end = start+SvLEN(cat)-UTF8_MAXLEN;
2630 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2631 "Character in 'W' format wrapped in pack");
2636 SvCUR_set(cat, cur - start);
2637 GROWING(0, cat, start, cur, len+1);
2638 end = start+SvLEN(cat)-1;
2640 *(U8 *) cur++ = (U8)auv;
2649 if (!(symptr->flags & FLAG_DO_UTF8)) {
2650 marked_upgrade(aTHX_ cat, symptr);
2651 lookahead.flags |= FLAG_DO_UTF8;
2652 lookahead.strbeg = symptr->strbeg;
2658 end = start+SvLEN(cat);
2659 if (!utf8) end -= UTF8_MAXLEN;
2663 auv = SvUV_no_inf(fromstr, datumtype);
2665 U8 buffer[UTF8_MAXLEN+1], *endb;
2666 endb = uvchr_to_utf8_flags(buffer, UNI_TO_NATIVE(auv), 0);
2667 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
2669 SvCUR_set(cat, cur - start);
2670 GROWING(0, cat, start, cur,
2671 len+(endb-buffer)*UTF8_EXPAND);
2672 end = start+SvLEN(cat);
2674 cur = my_bytes_to_utf8(buffer, endb-buffer, cur, 0);
2678 SvCUR_set(cat, cur - start);
2679 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2680 end = start+SvLEN(cat)-UTF8_MAXLEN;
2682 cur = (char *) uvchr_to_utf8_flags((U8 *) cur,
2689 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
2695 anv = SvNV(fromstr);
2696 # if (defined(VMS) && !defined(_IEEE_FP)) || defined(DOUBLE_IS_VAX_FLOAT)
2697 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
2698 * on Alpha; fake it if we don't have them.
2702 else if (anv < -FLT_MAX)
2704 else afloat = (float)anv;
2706 # if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
2708 afloat = (float)NV_NAN;
2712 /* a simple cast to float is undefined if outside
2713 * the range of values that can be represented */
2714 afloat = (float)(anv > FLT_MAX ? NV_INF :
2715 anv < -FLT_MAX ? -NV_INF : anv);
2718 PUSH_VAR(utf8, cur, afloat, needs_swap);
2726 anv = SvNV(fromstr);
2727 # if (defined(VMS) && !defined(_IEEE_FP)) || defined(DOUBLE_IS_VAX_FLOAT)
2728 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
2729 * on Alpha; fake it if we don't have them.
2733 else if (anv < -DBL_MAX)
2735 else adouble = (double)anv;
2737 adouble = (double)anv;
2739 PUSH_VAR(utf8, cur, adouble, needs_swap);
2744 Zero(&anv, 1, NV); /* can be long double with unused bits */
2748 /* to work round a gcc/x86 bug; don't use SvNV */
2749 anv.nv = sv_2nv(fromstr);
2750 # if defined(LONGDOUBLE_X86_80_BIT) && defined(USE_LONG_DOUBLE) \
2751 && LONG_DOUBLESIZE > 10
2752 /* GCC sometimes overwrites the padding in the
2754 Zero(anv.bytes+10, sizeof(anv.bytes) - 10, U8);
2757 anv.nv = SvNV(fromstr);
2759 PUSH_BYTES(utf8, cur, anv.bytes, sizeof(anv.bytes), needs_swap);
2763 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2766 /* long doubles can have unused bits, which may be nonzero */
2767 Zero(&aldouble, 1, long double);
2771 /* to work round a gcc/x86 bug; don't use SvNV */
2772 aldouble.ld = (long double)sv_2nv(fromstr);
2773 # if defined(LONGDOUBLE_X86_80_BIT) && LONG_DOUBLESIZE > 10
2774 /* GCC sometimes overwrites the padding in the
2776 Zero(aldouble.bytes+10, sizeof(aldouble.bytes) - 10, U8);
2779 aldouble.ld = (long double)SvNV(fromstr);
2781 PUSH_BYTES(utf8, cur, aldouble.bytes, sizeof(aldouble.bytes),
2787 case 'n' | TYPE_IS_SHRIEKING:
2792 ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
2793 ai16 = PerlSock_htons(ai16);
2794 PUSH16(utf8, cur, &ai16, FALSE);
2797 case 'v' | TYPE_IS_SHRIEKING:
2802 ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
2804 PUSH16(utf8, cur, &ai16, FALSE);
2807 case 'S' | TYPE_IS_SHRIEKING:
2808 #if SHORTSIZE != SIZE16
2810 unsigned short aushort;
2812 aushort = SvUV_no_inf(fromstr, datumtype);
2813 PUSH_VAR(utf8, cur, aushort, needs_swap);
2823 au16 = (U16)SvUV_no_inf(fromstr, datumtype);
2824 PUSH16(utf8, cur, &au16, needs_swap);
2827 case 's' | TYPE_IS_SHRIEKING:
2828 #if SHORTSIZE != SIZE16
2832 ashort = SvIV_no_inf(fromstr, datumtype);
2833 PUSH_VAR(utf8, cur, ashort, needs_swap);
2843 ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
2844 PUSH16(utf8, cur, &ai16, needs_swap);
2848 case 'I' | TYPE_IS_SHRIEKING:
2852 auint = SvUV_no_inf(fromstr, datumtype);
2853 PUSH_VAR(utf8, cur, auint, needs_swap);
2860 aiv = SvIV_no_inf(fromstr, datumtype);
2861 PUSH_VAR(utf8, cur, aiv, needs_swap);
2868 auv = SvUV_no_inf(fromstr, datumtype);
2869 PUSH_VAR(utf8, cur, auv, needs_swap);
2876 S_sv_check_infnan(aTHX_ fromstr, datumtype);
2877 anv = SvNV_nomg(fromstr);
2881 SvCUR_set(cat, cur - start);
2882 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
2885 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
2886 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
2887 any negative IVs will have already been got by the croak()
2888 above. IOK is untrue for fractions, so we test them
2889 against UV_MAX_P1. */
2890 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
2891 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
2892 char *in = buf + sizeof(buf);
2893 UV auv = SvUV_nomg(fromstr);
2896 *--in = (char)((auv & 0x7f) | 0x80);
2899 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
2900 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2901 in, (buf + sizeof(buf)) - in);
2902 } else if (SvPOKp(fromstr))
2904 else if (SvNOKp(fromstr)) {
2905 /* 10**NV_MAX_10_EXP is the largest power of 10
2906 so 10**(NV_MAX_10_EXP+1) is definitely unrepresentable
2907 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
2908 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
2909 And with that many bytes only Inf can overflow.
2910 Some C compilers are strict about integral constant
2911 expressions so we conservatively divide by a slightly
2912 smaller integer instead of multiplying by the exact
2913 floating-point value.
2915 #ifdef NV_MAX_10_EXP
2916 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
2917 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
2919 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
2920 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
2922 char *in = buf + sizeof(buf);
2924 anv = Perl_floor(anv);
2926 const NV next = Perl_floor(anv / 128);
2927 if (in <= buf) /* this cannot happen ;-) */
2928 Perl_croak(aTHX_ "Cannot compress integer in pack");
2929 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
2932 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
2933 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2934 in, (buf + sizeof(buf)) - in);
2943 /* Copy string and check for compliance */
2944 from = SvPV_nomg_const(fromstr, len);
2945 if ((norm = is_an_int(from, len)) == NULL)
2946 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
2948 Newx(result, len, char);
2951 while (!done) *--in = div128(norm, &done) | 0x80;
2952 result[len - 1] &= 0x7F; /* clear continue bit */
2953 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2954 in, (result + len) - in);
2956 SvREFCNT_dec(norm); /* free norm */
2961 case 'i' | TYPE_IS_SHRIEKING:
2965 aint = SvIV_no_inf(fromstr, datumtype);
2966 PUSH_VAR(utf8, cur, aint, needs_swap);
2969 case 'N' | TYPE_IS_SHRIEKING:
2974 au32 = SvUV_no_inf(fromstr, datumtype);
2975 au32 = PerlSock_htonl(au32);
2976 PUSH32(utf8, cur, &au32, FALSE);
2979 case 'V' | TYPE_IS_SHRIEKING:
2984 au32 = SvUV_no_inf(fromstr, datumtype);
2986 PUSH32(utf8, cur, &au32, FALSE);
2989 case 'L' | TYPE_IS_SHRIEKING:
2990 #if LONGSIZE != SIZE32
2992 unsigned long aulong;
2994 aulong = SvUV_no_inf(fromstr, datumtype);
2995 PUSH_VAR(utf8, cur, aulong, needs_swap);
3005 au32 = SvUV_no_inf(fromstr, datumtype);
3006 PUSH32(utf8, cur, &au32, needs_swap);
3009 case 'l' | TYPE_IS_SHRIEKING:
3010 #if LONGSIZE != SIZE32
3014 along = SvIV_no_inf(fromstr, datumtype);
3015 PUSH_VAR(utf8, cur, along, needs_swap);
3025 ai32 = SvIV_no_inf(fromstr, datumtype);
3026 PUSH32(utf8, cur, &ai32, needs_swap);
3029 #if defined(HAS_QUAD) && IVSIZE >= 8
3034 auquad = (Uquad_t) SvUV_no_inf(fromstr, datumtype);
3035 PUSH_VAR(utf8, cur, auquad, needs_swap);
3042 aquad = (Quad_t)SvIV_no_inf(fromstr, datumtype);
3043 PUSH_VAR(utf8, cur, aquad, needs_swap);
3048 len = 1; /* assume SV is correct length */
3049 GROWING(utf8, cat, start, cur, sizeof(char *));
3056 SvGETMAGIC(fromstr);
3057 if (!SvOK(fromstr)) aptr = NULL;
3059 /* XXX better yet, could spirit away the string to
3060 * a safe spot and hang on to it until the result
3061 * of pack() (and all copies of the result) are
3064 if (((SvTEMP(fromstr) && SvREFCNT(fromstr) == 1)
3065 || (SvPADTMP(fromstr) &&
3066 !SvREADONLY(fromstr)))) {
3067 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3068 "Attempt to pack pointer to temporary value");
3070 if (SvPOK(fromstr) || SvNIOK(fromstr))
3071 aptr = SvPV_nomg_const_nolen(fromstr);
3073 aptr = SvPV_force_flags_nolen(fromstr, 0);
3075 PUSH_VAR(utf8, cur, aptr, needs_swap);
3079 const char *aptr, *aend;
3083 if (len <= 2) len = 45;
3084 else len = len / 3 * 3;
3086 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3087 "Field too wide in 'u' format in pack");
3090 aptr = SvPV_const(fromstr, fromlen);
3091 from_utf8 = DO_UTF8(fromstr);
3093 aend = aptr + fromlen;
3094 fromlen = sv_len_utf8_nomg(fromstr);
3095 } else aend = NULL; /* Unused, but keep compilers happy */
3096 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
3097 while (fromlen > 0) {
3100 U8 hunk[1+63/3*4+1];
3102 if ((SSize_t)fromlen > len)
3108 if (!S_utf8_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3109 'u' | TYPE_IS_PACK)) {
3111 SvCUR_set(cat, cur - start);
3112 Perl_croak(aTHX_ "panic: string is shorter than advertised, "
3113 "aptr=%p, aend=%p, buffer=%p, todo=%zd",
3114 aptr, aend, buffer, todo);
3116 end = doencodes(hunk, (const U8 *)buffer, todo);
3118 end = doencodes(hunk, (const U8 *)aptr, todo);
3121 PUSH_BYTES(utf8, cur, hunk, end-hunk, 0);
3128 SvCUR_set(cat, cur - start);
3130 *symptr = lookahead;
3139 dSP; dMARK; dORIGMARK; dTARGET;
3142 SV *pat_sv = *++MARK;
3143 const char *pat = SvPV_const(pat_sv, fromlen);
3144 const char *patend = pat + fromlen;
3150 packlist(cat, pat, patend, MARK, SP + 1);
3154 const char * result = SvPV_nomg(cat, result_len);
3155 const U8 * error_pos;
3157 if (! is_utf8_string_loc((U8 *) result, result_len, &error_pos)) {
3158 _force_out_malformed_utf8_message(error_pos,
3159 (U8 *) result + result_len,
3163 NOT_REACHED; /* NOTREACHED */
3174 * ex: set ts=8 sts=4 sw=4 et: