3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
18 * [p.653 of _The Lord of the Rings_, IV/iv: "Of Herbs and Stewed Rabbit"]
21 /* This file contains pp ("push/pop") functions that
22 * execute the opcodes that make up a perl program. A typical pp function
23 * expects to find its arguments on the stack, and usually pushes its
24 * results onto the stack, hence the 'pp' terminology. Each OP structure
25 * contains a pointer to the relevant pp_foo() function.
27 * This particular file just contains pp_pack() and pp_unpack(). See the
28 * other pp*.c files for the rest of the pp_ functions.
32 #define PERL_IN_PP_PACK_C
35 /* Types used by pack/unpack */
37 e_no_len, /* no length */
38 e_number, /* number, [] */
42 typedef struct tempsym {
43 const char* patptr; /* current template char */
44 const char* patend; /* one after last char */
45 const char* grpbeg; /* 1st char of ()-group */
46 const char* grpend; /* end of ()-group */
47 I32 code; /* template code (!<>) */
48 I32 length; /* length/repeat count */
49 howlen_t howlen; /* how length is given */
50 int level; /* () nesting level */
51 U32 flags; /* /=4, comma=2, pack=1 */
52 /* and group modifiers */
53 STRLEN strbeg; /* offset of group start */
54 struct tempsym *previous; /* previous group */
57 #define TEMPSYM_INIT(symptr, p, e, f) \
59 (symptr)->patptr = (p); \
60 (symptr)->patend = (e); \
61 (symptr)->grpbeg = NULL; \
62 (symptr)->grpend = NULL; \
63 (symptr)->grpend = NULL; \
65 (symptr)->length = 0; \
66 (symptr)->howlen = e_no_len; \
67 (symptr)->level = 0; \
68 (symptr)->flags = (f); \
69 (symptr)->strbeg = 0; \
70 (symptr)->previous = NULL; \
78 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
81 U8 bytes[sizeof(long double)];
88 /* Maximum number of bytes to which a byte can grow due to upgrade */
92 * Offset for integer pack/unpack.
94 * On architectures where I16 and I32 aren't really 16 and 32 bits,
95 * which for now are all Crays, pack and unpack have to play games.
99 * These values are required for portability of pack() output.
100 * If they're not right on your machine, then pack() and unpack()
101 * wouldn't work right anyway; you'll need to apply the Cray hack.
102 * (I'd like to check them with #if, but you can't use sizeof() in
103 * the preprocessor.) --???
106 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
107 defines are now in config.h. --Andy Dougherty April 1998
112 /* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
115 #if U16SIZE > SIZE16 || U32SIZE > SIZE32
116 # if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
117 # define OFF16(p) ((char*)(p))
118 # define OFF32(p) ((char*)(p))
120 # if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
121 # define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
122 # define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
124 ++++ bad cray byte order
128 # define OFF16(p) ((char *) (p))
129 # define OFF32(p) ((char *) (p))
132 #define PUSH16(utf8, cur, p, needs_swap) \
133 PUSH_BYTES(utf8, cur, OFF16(p), SIZE16, needs_swap)
134 #define PUSH32(utf8, cur, p, needs_swap) \
135 PUSH_BYTES(utf8, cur, OFF32(p), SIZE32, needs_swap)
137 #if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
138 # define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_LITTLE_ENDIAN)
139 #elif BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
140 # define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_BIG_ENDIAN)
142 # error "Unsupported byteorder"
143 /* Need to add code here to re-instate mixed endian support.
144 NEEDS_SWAP would need to hold a flag indicating which action to
145 take, and S_reverse_copy and the code in uni_to_bytes would need
146 logic adding to deal with any mixed-endian transformations needed.
150 /* Only to be used inside a loop (see the break) */
151 #define SHIFT_BYTES(utf8, s, strend, buf, len, datumtype, needs_swap) \
154 if (!uni_to_bytes(aTHX_ &s, strend, \
155 (char *) (buf), len, datumtype)) break; \
158 S_reverse_copy(s, (char *) (buf), len); \
160 Copy(s, (char *) (buf), len, char); \
165 #define SHIFT16(utf8, s, strend, p, datumtype, needs_swap) \
166 SHIFT_BYTES(utf8, s, strend, OFF16(p), SIZE16, datumtype, needs_swap)
168 #define SHIFT32(utf8, s, strend, p, datumtype, needs_swap) \
169 SHIFT_BYTES(utf8, s, strend, OFF32(p), SIZE32, datumtype, needs_swap)
171 #define SHIFT_VAR(utf8, s, strend, var, datumtype, needs_swap) \
172 SHIFT_BYTES(utf8, s, strend, &(var), sizeof(var), datumtype, needs_swap)
174 #define PUSH_VAR(utf8, aptr, var, needs_swap) \
175 PUSH_BYTES(utf8, aptr, &(var), sizeof(var), needs_swap)
177 /* Avoid stack overflow due to pathological templates. 100 should be plenty. */
178 #define MAX_SUB_TEMPLATE_LEVEL 100
180 /* flags (note that type modifiers can also be used as flags!) */
181 #define FLAG_WAS_UTF8 0x40
182 #define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
183 #define FLAG_UNPACK_ONLY_ONE 0x10
184 #define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
185 #define FLAG_SLASH 0x04
186 #define FLAG_COMMA 0x02
187 #define FLAG_PACK 0x01
190 S_mul128(pTHX_ SV *sv, U8 m)
193 char *s = SvPV(sv, len);
196 PERL_ARGS_ASSERT_MUL128;
198 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
199 SV * const tmpNew = newSVpvs("0000000000");
201 sv_catsv(tmpNew, sv);
202 SvREFCNT_dec(sv); /* free old sv */
207 while (!*t) /* trailing '\0'? */
210 const U32 i = ((*t - '0') << 7) + m;
211 *(t--) = '0' + (char)(i % 10);
217 /* Explosives and implosives. */
219 #if 'I' == 73 && 'J' == 74
220 /* On an ASCII/ISO kind of system */
221 #define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
224 Some other sort of character set - use memchr() so we don't match
227 #define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
231 #define TYPE_IS_SHRIEKING 0x100
232 #define TYPE_IS_BIG_ENDIAN 0x200
233 #define TYPE_IS_LITTLE_ENDIAN 0x400
234 #define TYPE_IS_PACK 0x800
235 #define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
236 #define TYPE_MODIFIERS(t) ((t) & ~0xFF)
237 #define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
239 # define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
240 # define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
242 # define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
244 #define PACK_SIZE_CANNOT_CSUM 0x80
245 #define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
246 #define PACK_SIZE_MASK 0x3F
248 #include "packsizetables.c"
251 S_reverse_copy(const char *src, char *dest, STRLEN len)
259 uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
262 UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
263 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
264 /* We try to process malformed UTF-8 as much as possible (preferably with
265 warnings), but these two mean we make no progress in the string and
266 might enter an infinite loop */
267 if (retlen == (STRLEN) -1 || retlen == 0)
268 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
269 (int) TYPE_NO_MODIFIERS(datumtype));
271 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
272 "Character in '%c' format wrapped in unpack",
273 (int) TYPE_NO_MODIFIERS(datumtype));
280 #define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
281 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
285 uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
289 const char *from = *s;
291 const U32 flags = ckWARN(WARN_UTF8) ?
292 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
293 const bool needs_swap = NEEDS_SWAP(datumtype);
298 for (;buf_len > 0; buf_len--) {
299 if (from >= end) return FALSE;
300 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
301 if (retlen == (STRLEN) -1 || retlen == 0) {
302 from += UTF8SKIP(from);
304 } else from += retlen;
310 *(U8 *)--buf = (U8)val;
312 *(U8 *)buf++ = (U8)val;
314 /* We have enough characters for the buffer. Did we have problems ? */
317 /* Rewalk the string fragment while warning */
319 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
320 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
321 if (ptr >= end) break;
322 utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
324 if (from > end) from = end;
327 Perl_ck_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
328 WARN_PACK : WARN_UNPACK),
329 "Character(s) in '%c' format wrapped in %s",
330 (int) TYPE_NO_MODIFIERS(datumtype),
331 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
338 next_uni_uu(pTHX_ const char **s, const char *end, I32 *out)
342 const UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
343 if (val >= 0x100 || !ISUUCHAR(val) ||
344 retlen == (STRLEN) -1 || retlen == 0) {
348 *out = PL_uudmap[val] & 077;
354 S_bytes_to_uni(const U8 *start, STRLEN len, char *dest, const bool needs_swap) {
355 PERL_ARGS_ASSERT_BYTES_TO_UNI;
358 const U8 *p = start + len;
359 while (p-- > start) {
360 const UV uv = NATIVE_TO_ASCII(*p);
361 if (UNI_IS_INVARIANT(uv))
362 *dest++ = (char)(U8)UTF_TO_NATIVE(uv);
364 *dest++ = (char)(U8)UTF8_EIGHT_BIT_HI(uv);
365 *dest++ = (char)(U8)UTF8_EIGHT_BIT_LO(uv);
369 const U8 * const end = start + len;
370 while (start < end) {
371 const UV uv = NATIVE_TO_ASCII(*start);
372 if (UNI_IS_INVARIANT(uv))
373 *dest++ = (char)(U8)UTF_TO_NATIVE(uv);
375 *dest++ = (char)(U8)UTF8_EIGHT_BIT_HI(uv);
376 *dest++ = (char)(U8)UTF8_EIGHT_BIT_LO(uv);
384 #define PUSH_BYTES(utf8, cur, buf, len, needs_swap) \
387 (cur) = S_bytes_to_uni((U8 *) buf, len, (cur), needs_swap); \
390 S_reverse_copy((char *)(buf), cur, len); \
392 Copy(buf, cur, len, char); \
397 #define GROWING(utf8, cat, start, cur, in_len) \
399 STRLEN glen = (in_len); \
400 if (utf8) glen *= UTF8_EXPAND; \
401 if ((cur) + glen >= (start) + SvLEN(cat)) { \
402 (start) = sv_exp_grow(cat, glen); \
403 (cur) = (start) + SvCUR(cat); \
407 #define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
409 const STRLEN glen = (in_len); \
411 if (utf8) gl *= UTF8_EXPAND; \
412 if ((cur) + gl >= (start) + SvLEN(cat)) { \
414 SvCUR_set((cat), (cur) - (start)); \
415 (start) = sv_exp_grow(cat, gl); \
416 (cur) = (start) + SvCUR(cat); \
418 PUSH_BYTES(utf8, cur, buf, glen, 0); \
421 #define PUSH_BYTE(utf8, s, byte) \
424 const U8 au8 = (byte); \
425 (s) = S_bytes_to_uni(&au8, 1, (s), 0); \
426 } else *(U8 *)(s)++ = (byte); \
429 /* Only to be used inside a loop (see the break) */
430 #define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
433 if (str >= end) break; \
434 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
435 if (retlen == (STRLEN) -1 || retlen == 0) { \
437 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
442 static const char *_action( const tempsym_t* symptr )
444 return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
447 /* Returns the sizeof() struct described by pat */
449 S_measure_struct(pTHX_ tempsym_t* symptr)
453 PERL_ARGS_ASSERT_MEASURE_STRUCT;
455 while (next_symbol(symptr)) {
459 switch (symptr->howlen) {
461 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
465 /* e_no_len and e_number */
466 len = symptr->length;
470 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
473 /* endianness doesn't influence the size of a type */
474 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
476 Perl_croak(aTHX_ "Invalid type '%c' in %s",
477 (int)TYPE_NO_MODIFIERS(symptr->code),
479 case '.' | TYPE_IS_SHRIEKING:
480 case '@' | TYPE_IS_SHRIEKING:
484 case 'U': /* XXXX Is it correct? */
487 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
488 (int) TYPE_NO_MODIFIERS(symptr->code),
495 tempsym_t savsym = *symptr;
496 symptr->patptr = savsym.grpbeg;
497 symptr->patend = savsym.grpend;
498 /* XXXX Theoretically, we need to measure many times at
499 different positions, since the subexpression may contain
500 alignment commands, but be not of aligned length.
501 Need to detect this and croak(). */
502 size = measure_struct(symptr);
506 case 'X' | TYPE_IS_SHRIEKING:
507 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
509 if (!len) /* Avoid division by 0 */
511 len = total % len; /* Assumed: the start is aligned. */
516 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
518 case 'x' | TYPE_IS_SHRIEKING:
519 if (!len) /* Avoid division by 0 */
521 star = total % len; /* Assumed: the start is aligned. */
522 if (star) /* Other portable ways? */
546 size = sizeof(char*);
556 /* locate matching closing parenthesis or bracket
557 * returns char pointer to char after match, or NULL
560 S_group_end(pTHX_ const char *patptr, const char *patend, char ender)
562 PERL_ARGS_ASSERT_GROUP_END;
564 while (patptr < patend) {
565 const char c = *patptr++;
572 while (patptr < patend && *patptr != '\n')
576 patptr = group_end(patptr, patend, ')') + 1;
578 patptr = group_end(patptr, patend, ']') + 1;
580 Perl_croak(aTHX_ "No group ending character '%c' found in template",
586 /* Convert unsigned decimal number to binary.
587 * Expects a pointer to the first digit and address of length variable
588 * Advances char pointer to 1st non-digit char and returns number
591 S_get_num(pTHX_ const char *patptr, I32 *lenptr )
593 I32 len = *patptr++ - '0';
595 PERL_ARGS_ASSERT_GET_NUM;
597 while (isDIGIT(*patptr)) {
598 if (len >= 0x7FFFFFFF/10)
599 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
600 len = (len * 10) + (*patptr++ - '0');
606 /* The marvellous template parsing routine: Using state stored in *symptr,
607 * locates next template code and count
610 S_next_symbol(pTHX_ tempsym_t* symptr )
612 const char* patptr = symptr->patptr;
613 const char* const patend = symptr->patend;
615 PERL_ARGS_ASSERT_NEXT_SYMBOL;
617 symptr->flags &= ~FLAG_SLASH;
619 while (patptr < patend) {
620 if (isSPACE(*patptr))
622 else if (*patptr == '#') {
624 while (patptr < patend && *patptr != '\n')
629 /* We should have found a template code */
630 I32 code = *patptr++ & 0xFF;
631 U32 inherited_modifiers = 0;
633 if (code == ','){ /* grandfather in commas but with a warning */
634 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
635 symptr->flags |= FLAG_COMMA;
636 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
637 "Invalid type ',' in %s", _action( symptr ) );
642 /* for '(', skip to ')' */
644 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
645 Perl_croak(aTHX_ "()-group starts with a count in %s",
647 symptr->grpbeg = patptr;
648 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
649 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
650 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
654 /* look for group modifiers to inherit */
655 if (TYPE_ENDIANNESS(symptr->flags)) {
656 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
657 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
660 /* look for modifiers */
661 while (patptr < patend) {
666 modifier = TYPE_IS_SHRIEKING;
667 allowed = "sSiIlLxXnNvV@.";
670 modifier = TYPE_IS_BIG_ENDIAN;
671 allowed = ENDIANNESS_ALLOWED_TYPES;
674 modifier = TYPE_IS_LITTLE_ENDIAN;
675 allowed = ENDIANNESS_ALLOWED_TYPES;
686 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
687 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
688 allowed, _action( symptr ) );
690 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
691 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
692 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
693 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
694 TYPE_ENDIANNESS_MASK)
695 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
696 *patptr, _action( symptr ) );
698 if ((code & modifier)) {
699 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
700 "Duplicate modifier '%c' after '%c' in %s",
701 *patptr, (int) TYPE_NO_MODIFIERS(code),
709 /* inherit modifiers */
710 code |= inherited_modifiers;
712 /* look for count and/or / */
713 if (patptr < patend) {
714 if (isDIGIT(*patptr)) {
715 patptr = get_num( patptr, &symptr->length );
716 symptr->howlen = e_number;
718 } else if (*patptr == '*') {
720 symptr->howlen = e_star;
722 } else if (*patptr == '[') {
723 const char* lenptr = ++patptr;
724 symptr->howlen = e_number;
725 patptr = group_end( patptr, patend, ']' ) + 1;
726 /* what kind of [] is it? */
727 if (isDIGIT(*lenptr)) {
728 lenptr = get_num( lenptr, &symptr->length );
730 Perl_croak(aTHX_ "Malformed integer in [] in %s",
733 tempsym_t savsym = *symptr;
734 symptr->patend = patptr-1;
735 symptr->patptr = lenptr;
736 savsym.length = measure_struct(symptr);
740 symptr->howlen = e_no_len;
745 while (patptr < patend) {
746 if (isSPACE(*patptr))
748 else if (*patptr == '#') {
750 while (patptr < patend && *patptr != '\n')
755 if (*patptr == '/') {
756 symptr->flags |= FLAG_SLASH;
758 if (patptr < patend &&
759 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
760 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
767 /* at end - no count, no / */
768 symptr->howlen = e_no_len;
773 symptr->patptr = patptr;
777 symptr->patptr = patptr;
782 There is no way to cleanly handle the case where we should process the
783 string per byte in its upgraded form while it's really in downgraded form
784 (e.g. estimates like strend-s as an upper bound for the number of
785 characters left wouldn't work). So if we foresee the need of this
786 (pattern starts with U or contains U0), we want to work on the encoded
787 version of the string. Users are advised to upgrade their pack string
788 themselves if they need to do a lot of unpacks like this on it
791 need_utf8(const char *pat, const char *patend)
795 PERL_ARGS_ASSERT_NEED_UTF8;
797 while (pat < patend) {
800 pat = (const char *) memchr(pat, '\n', patend-pat);
801 if (!pat) return FALSE;
802 } else if (pat[0] == 'U') {
803 if (first || pat[1] == '0') return TRUE;
804 } else first = FALSE;
811 first_symbol(const char *pat, const char *patend) {
812 PERL_ARGS_ASSERT_FIRST_SYMBOL;
814 while (pat < patend) {
815 if (pat[0] != '#') return pat[0];
817 pat = (const char *) memchr(pat, '\n', patend-pat);
825 =for apidoc unpackstring
827 The engine implementing the unpack() Perl function.
829 Using the template pat..patend, this function unpacks the string
830 s..strend into a number of mortal SVs, which it pushes onto the perl
831 argument (@_) stack (so you will need to issue a C<PUTBACK> before and
832 C<SPAGAIN> after the call to this function). It returns the number of
835 The strend and patend pointers should point to the byte following the last
836 character of each string.
838 Although this function returns its values on the perl argument stack, it
839 doesn't take any parameters from that stack (and thus in particular
840 there's no need to do a PUSHMARK before calling it, unlike L</call_pv> for
846 Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
850 PERL_ARGS_ASSERT_UNPACKSTRING;
852 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
853 else if (need_utf8(pat, patend)) {
854 /* We probably should try to avoid this in case a scalar context call
855 wouldn't get to the "U0" */
856 STRLEN len = strend - s;
857 s = (char *) bytes_to_utf8((U8 *) s, &len);
860 flags |= FLAG_DO_UTF8;
863 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
864 flags |= FLAG_PARSE_UTF8;
866 TEMPSYM_INIT(&sym, pat, patend, flags);
868 return unpack_rec(&sym, s, s, strend, NULL );
872 S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
876 const I32 start_sp_offset = SP - PL_stack_base;
881 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
883 bool explicit_length;
884 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
885 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
887 PERL_ARGS_ASSERT_UNPACK_REC;
889 symptr->strbeg = s - strbeg;
891 while (next_symbol(symptr)) {
894 I32 datumtype = symptr->code;
896 /* do first one only unless in list context
897 / is implemented by unpacking the count, then popping it from the
898 stack, so must check that we're not in the middle of a / */
900 && (SP - PL_stack_base == start_sp_offset + 1)
901 && (datumtype != '/') ) /* XXX can this be omitted */
904 switch (howlen = symptr->howlen) {
906 len = strend - strbeg; /* long enough */
909 /* e_no_len and e_number */
910 len = symptr->length;
914 explicit_length = TRUE;
916 beyond = s >= strend;
918 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
920 /* props nonzero means we can process this letter. */
921 const long size = props & PACK_SIZE_MASK;
922 const long howmany = (strend - s) / size;
926 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
927 if (len && unpack_only_one) len = 1;
933 needs_swap = NEEDS_SWAP(datumtype);
935 switch(TYPE_NO_ENDIANNESS(datumtype)) {
937 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
940 if (howlen == e_no_len)
941 len = 16; /* len is not specified */
949 tempsym_t savsym = *symptr;
950 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
951 symptr->flags |= group_modifiers;
952 symptr->patend = savsym.grpend;
953 symptr->previous = &savsym;
956 if (len && unpack_only_one) len = 1;
958 symptr->patptr = savsym.grpbeg;
959 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
960 else symptr->flags &= ~FLAG_PARSE_UTF8;
961 unpack_rec(symptr, s, strbeg, strend, &s);
962 if (s == strend && savsym.howlen == e_star)
963 break; /* No way to continue */
966 savsym.flags = symptr->flags & ~group_modifiers;
970 case '.' | TYPE_IS_SHRIEKING:
974 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
975 if (howlen == e_star) from = strbeg;
976 else if (len <= 0) from = s;
978 tempsym_t *group = symptr;
980 while (--len && group) group = group->previous;
981 from = group ? strbeg + group->strbeg : strbeg;
984 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
985 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
989 case '@' | TYPE_IS_SHRIEKING:
991 s = strbeg + symptr->strbeg;
992 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
996 Perl_croak(aTHX_ "'@' outside of string in unpack");
1001 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
1004 Perl_croak(aTHX_ "'@' outside of string in unpack");
1008 case 'X' | TYPE_IS_SHRIEKING:
1009 if (!len) /* Avoid division by 0 */
1012 const char *hop, *last;
1014 hop = last = strbeg;
1016 hop += UTF8SKIP(hop);
1023 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1027 len = (s - strbeg) % len;
1033 Perl_croak(aTHX_ "'X' outside of string in unpack");
1034 while (--s, UTF8_IS_CONTINUATION(*s)) {
1036 Perl_croak(aTHX_ "'X' outside of string in unpack");
1041 if (len > s - strbeg)
1042 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1046 case 'x' | TYPE_IS_SHRIEKING: {
1048 if (!len) /* Avoid division by 0 */
1050 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1051 else ai32 = (s - strbeg) % len;
1052 if (ai32 == 0) break;
1060 Perl_croak(aTHX_ "'x' outside of string in unpack");
1065 if (len > strend - s)
1066 Perl_croak(aTHX_ "'x' outside of string in unpack");
1071 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1077 /* Preliminary length estimate is assumed done in 'W' */
1078 if (len > strend - s) len = strend - s;
1084 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1085 if (hop >= strend) {
1087 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1092 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1094 } else if (len > strend - s)
1097 if (datumtype == 'Z') {
1098 /* 'Z' strips stuff after first null */
1099 const char *ptr, *end;
1101 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
1102 sv = newSVpvn(s, ptr-s);
1103 if (howlen == e_star) /* exact for 'Z*' */
1104 len = ptr-s + (ptr != strend ? 1 : 0);
1105 } else if (datumtype == 'A') {
1106 /* 'A' strips both nulls and spaces */
1108 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1109 for (ptr = s+len-1; ptr >= s; ptr--)
1110 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
1111 !isSPACE_utf8(ptr)) break;
1112 if (ptr >= s) ptr += UTF8SKIP(ptr);
1115 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1117 for (ptr = s+len-1; ptr >= s; ptr--)
1118 if (*ptr != 0 && !isSPACE(*ptr)) break;
1121 sv = newSVpvn(s, ptr-s);
1122 } else sv = newSVpvn(s, len);
1126 /* Undo any upgrade done due to need_utf8() */
1127 if (!(symptr->flags & FLAG_WAS_UTF8))
1128 sv_utf8_downgrade(sv, 0);
1136 if (howlen == e_star || len > (strend - s) * 8)
1137 len = (strend - s) * 8;
1140 while (len >= 8 && s < strend) {
1141 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
1146 cuv += PL_bitcount[*(U8 *)s++];
1149 if (len && s < strend) {
1151 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1152 if (datumtype == 'b')
1154 if (bits & 1) cuv++;
1159 if (bits & 0x80) cuv++;
1166 sv = sv_2mortal(newSV(len ? len : 1));
1169 if (datumtype == 'b') {
1171 const I32 ai32 = len;
1172 for (len = 0; len < ai32; len++) {
1173 if (len & 7) bits >>= 1;
1175 if (s >= strend) break;
1176 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1177 } else bits = *(U8 *) s++;
1178 *str++ = bits & 1 ? '1' : '0';
1182 const I32 ai32 = len;
1183 for (len = 0; len < ai32; len++) {
1184 if (len & 7) bits <<= 1;
1186 if (s >= strend) break;
1187 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1188 } else bits = *(U8 *) s++;
1189 *str++ = bits & 0x80 ? '1' : '0';
1193 SvCUR_set(sv, str - SvPVX_const(sv));
1200 /* Preliminary length estimate, acceptable for utf8 too */
1201 if (howlen == e_star || len > (strend - s) * 2)
1202 len = (strend - s) * 2;
1204 sv = sv_2mortal(newSV(len ? len : 1));
1208 if (datumtype == 'h') {
1211 for (len = 0; len < ai32; len++) {
1212 if (len & 1) bits >>= 4;
1214 if (s >= strend) break;
1215 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1216 } else bits = * (U8 *) s++;
1218 *str++ = PL_hexdigit[bits & 15];
1222 const I32 ai32 = len;
1223 for (len = 0; len < ai32; len++) {
1224 if (len & 1) bits <<= 4;
1226 if (s >= strend) break;
1227 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1228 } else bits = *(U8 *) s++;
1230 *str++ = PL_hexdigit[(bits >> 4) & 15];
1235 SvCUR_set(sv, str - SvPVX_const(sv));
1242 if (explicit_length)
1243 /* Switch to "character" mode */
1244 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1249 while (len-- > 0 && s < strend) {
1254 aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1255 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1256 if (retlen == (STRLEN) -1 || retlen == 0)
1257 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1261 aint = *(U8 *)(s)++;
1262 if (aint >= 128 && datumtype != 'C') /* fake up signed chars */
1266 else if (checksum > bits_in_uv)
1267 cdouble += (NV)aint;
1275 while (len-- > 0 && s < strend) {
1277 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1278 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1279 if (retlen == (STRLEN) -1 || retlen == 0)
1280 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1284 else if (checksum > bits_in_uv)
1285 cdouble += (NV) val;
1289 } else if (!checksum)
1291 const U8 ch = *(U8 *) s++;
1294 else if (checksum > bits_in_uv)
1295 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1297 while (len-- > 0) cuv += *(U8 *) s++;
1301 if (explicit_length && howlen != e_star) {
1302 /* Switch to "bytes in UTF-8" mode */
1303 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
1305 /* Should be impossible due to the need_utf8() test */
1306 Perl_croak(aTHX_ "U0 mode on a byte string");
1310 if (len > strend - s) len = strend - s;
1312 if (len && unpack_only_one) len = 1;
1316 while (len-- > 0 && s < strend) {
1320 U8 result[UTF8_MAXLEN];
1321 const char *ptr = s;
1323 /* Bug: warns about bad utf8 even if we are short on bytes
1324 and will break out of the loop */
1325 if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1328 len = UTF8SKIP(result);
1329 if (!uni_to_bytes(aTHX_ &ptr, strend,
1330 (char *) &result[1], len-1, 'U')) break;
1331 auv = utf8n_to_uvuni(result, len, &retlen, UTF8_ALLOW_DEFAULT);
1334 auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, UTF8_ALLOW_DEFAULT);
1335 if (retlen == (STRLEN) -1 || retlen == 0)
1336 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1341 else if (checksum > bits_in_uv)
1342 cdouble += (NV) auv;
1347 case 's' | TYPE_IS_SHRIEKING:
1348 #if SHORTSIZE != SIZE16
1351 SHIFT_VAR(utf8, s, strend, ashort, datumtype, needs_swap);
1354 else if (checksum > bits_in_uv)
1355 cdouble += (NV)ashort;
1367 #if U16SIZE > SIZE16
1370 SHIFT16(utf8, s, strend, &ai16, datumtype, needs_swap);
1371 #if U16SIZE > SIZE16
1377 else if (checksum > bits_in_uv)
1378 cdouble += (NV)ai16;
1383 case 'S' | TYPE_IS_SHRIEKING:
1384 #if SHORTSIZE != SIZE16
1386 unsigned short aushort;
1387 SHIFT_VAR(utf8, s, strend, aushort, datumtype, needs_swap,
1391 else if (checksum > bits_in_uv)
1392 cdouble += (NV)aushort;
1405 #if U16SIZE > SIZE16
1408 SHIFT16(utf8, s, strend, &au16, datumtype, needs_swap);
1409 if (datumtype == 'n')
1410 au16 = PerlSock_ntohs(au16);
1411 if (datumtype == 'v')
1415 else if (checksum > bits_in_uv)
1416 cdouble += (NV) au16;
1421 case 'v' | TYPE_IS_SHRIEKING:
1422 case 'n' | TYPE_IS_SHRIEKING:
1425 # if U16SIZE > SIZE16
1428 SHIFT16(utf8, s, strend, &ai16, datumtype, needs_swap);
1429 /* There should never be any byte-swapping here. */
1430 assert(!TYPE_ENDIANNESS(datumtype));
1431 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
1432 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1433 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
1434 ai16 = (I16) vtohs((U16) ai16);
1437 else if (checksum > bits_in_uv)
1438 cdouble += (NV) ai16;
1444 case 'i' | TYPE_IS_SHRIEKING:
1447 SHIFT_VAR(utf8, s, strend, aint, datumtype, needs_swap);
1450 else if (checksum > bits_in_uv)
1451 cdouble += (NV)aint;
1457 case 'I' | TYPE_IS_SHRIEKING:
1460 SHIFT_VAR(utf8, s, strend, auint, datumtype, needs_swap);
1463 else if (checksum > bits_in_uv)
1464 cdouble += (NV)auint;
1472 SHIFT_VAR(utf8, s, strend, aiv, datumtype, needs_swap);
1475 else if (checksum > bits_in_uv)
1484 SHIFT_VAR(utf8, s, strend, auv, datumtype, needs_swap);
1487 else if (checksum > bits_in_uv)
1493 case 'l' | TYPE_IS_SHRIEKING:
1494 #if LONGSIZE != SIZE32
1497 SHIFT_VAR(utf8, s, strend, along, datumtype, needs_swap);
1500 else if (checksum > bits_in_uv)
1501 cdouble += (NV)along;
1512 #if U32SIZE > SIZE32
1515 SHIFT32(utf8, s, strend, &ai32, datumtype, needs_swap);
1516 #if U32SIZE > SIZE32
1517 if (ai32 > 2147483647) ai32 -= 4294967296;
1521 else if (checksum > bits_in_uv)
1522 cdouble += (NV)ai32;
1527 case 'L' | TYPE_IS_SHRIEKING:
1528 #if LONGSIZE != SIZE32
1530 unsigned long aulong;
1531 SHIFT_VAR(utf8, s, strend, aulong, datumtype, needs_swap);
1534 else if (checksum > bits_in_uv)
1535 cdouble += (NV)aulong;
1548 #if U32SIZE > SIZE32
1551 SHIFT32(utf8, s, strend, &au32, datumtype, needs_swap);
1552 if (datumtype == 'N')
1553 au32 = PerlSock_ntohl(au32);
1554 if (datumtype == 'V')
1558 else if (checksum > bits_in_uv)
1559 cdouble += (NV)au32;
1564 case 'V' | TYPE_IS_SHRIEKING:
1565 case 'N' | TYPE_IS_SHRIEKING:
1568 #if U32SIZE > SIZE32
1571 SHIFT32(utf8, s, strend, &ai32, datumtype, needs_swap);
1572 /* There should never be any byte swapping here. */
1573 assert(!TYPE_ENDIANNESS(datumtype));
1574 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1575 ai32 = (I32)PerlSock_ntohl((U32)ai32);
1576 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1577 ai32 = (I32)vtohl((U32)ai32);
1580 else if (checksum > bits_in_uv)
1581 cdouble += (NV)ai32;
1589 SHIFT_VAR(utf8, s, strend, aptr, datumtype, needs_swap);
1590 /* newSVpv generates undef if aptr is NULL */
1591 mPUSHs(newSVpv(aptr, 0));
1599 while (len > 0 && s < strend) {
1601 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1602 auv = (auv << 7) | (ch & 0x7f);
1603 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
1611 if (++bytes >= sizeof(UV)) { /* promote to string */
1614 sv = Perl_newSVpvf(aTHX_ "%.*"UVuf, (int)TYPE_DIGITS(UV), auv);
1615 while (s < strend) {
1616 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1617 sv = mul128(sv, (U8)(ch & 0x7f));
1623 t = SvPV_nolen_const(sv);
1632 if ((s >= strend) && bytes)
1633 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
1637 if (symptr->howlen == e_star)
1638 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
1640 if (s + sizeof(char*) <= strend) {
1642 SHIFT_VAR(utf8, s, strend, aptr, datumtype, needs_swap);
1643 /* newSVpvn generates undef if aptr is NULL */
1644 PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP));
1651 SHIFT_VAR(utf8, s, strend, aquad, datumtype, needs_swap);
1653 mPUSHs(aquad >= IV_MIN && aquad <= IV_MAX ?
1654 newSViv((IV)aquad) : newSVnv((NV)aquad));
1655 else if (checksum > bits_in_uv)
1656 cdouble += (NV)aquad;
1664 SHIFT_VAR(utf8, s, strend, auquad, datumtype, needs_swap);
1666 mPUSHs(auquad <= UV_MAX ?
1667 newSVuv((UV)auquad) : newSVnv((NV)auquad));
1668 else if (checksum > bits_in_uv)
1669 cdouble += (NV)auquad;
1674 #endif /* HAS_QUAD */
1675 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
1679 SHIFT_VAR(utf8, s, strend, afloat, datumtype, needs_swap);
1689 SHIFT_VAR(utf8, s, strend, adouble, datumtype, needs_swap);
1699 SHIFT_BYTES(utf8, s, strend, anv.bytes, sizeof(anv.bytes),
1700 datumtype, needs_swap);
1707 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
1711 SHIFT_BYTES(utf8, s, strend, aldouble.bytes,
1712 sizeof(aldouble.bytes), datumtype, needs_swap);
1714 mPUSHn(aldouble.ld);
1716 cdouble += aldouble.ld;
1722 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
1723 sv = sv_2mortal(newSV(l));
1724 if (l) SvPOK_on(sv);
1727 while (next_uni_uu(aTHX_ &s, strend, &len)) {
1732 next_uni_uu(aTHX_ &s, strend, &a);
1733 next_uni_uu(aTHX_ &s, strend, &b);
1734 next_uni_uu(aTHX_ &s, strend, &c);
1735 next_uni_uu(aTHX_ &s, strend, &d);
1736 hunk[0] = (char)((a << 2) | (b >> 4));
1737 hunk[1] = (char)((b << 4) | (c >> 2));
1738 hunk[2] = (char)((c << 6) | d);
1740 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
1748 /* possible checksum byte */
1749 const char *skip = s+UTF8SKIP(s);
1750 if (skip < strend && *skip == '\n')
1756 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
1760 len = PL_uudmap[*(U8*)s++] & 077;
1762 if (s < strend && ISUUCHAR(*s))
1763 a = PL_uudmap[*(U8*)s++] & 077;
1766 if (s < strend && ISUUCHAR(*s))
1767 b = PL_uudmap[*(U8*)s++] & 077;
1770 if (s < strend && ISUUCHAR(*s))
1771 c = PL_uudmap[*(U8*)s++] & 077;
1774 if (s < strend && ISUUCHAR(*s))
1775 d = PL_uudmap[*(U8*)s++] & 077;
1778 hunk[0] = (char)((a << 2) | (b >> 4));
1779 hunk[1] = (char)((b << 4) | (c >> 2));
1780 hunk[2] = (char)((c << 6) | d);
1782 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
1787 else /* possible checksum byte */
1788 if (s + 1 < strend && s[1] == '\n')
1798 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
1799 (checksum > bits_in_uv &&
1800 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
1803 anv = (NV) (1 << (checksum & 15));
1804 while (checksum >= 16) {
1808 while (cdouble < 0.0)
1810 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
1811 sv = newSVnv(cdouble);
1814 if (checksum < bits_in_uv) {
1815 UV mask = ((UV)1 << checksum) - 1;
1824 if (symptr->flags & FLAG_SLASH){
1825 if (SP - PL_stack_base - start_sp_offset <= 0)
1827 if( next_symbol(symptr) ){
1828 if( symptr->howlen == e_number )
1829 Perl_croak(aTHX_ "Count after length/code in unpack" );
1831 /* ...end of char buffer then no decent length available */
1832 Perl_croak(aTHX_ "length/code after end of string in unpack" );
1834 /* take top of stack (hope it's numeric) */
1837 Perl_croak(aTHX_ "Negative '/' count in unpack" );
1840 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
1842 datumtype = symptr->code;
1843 explicit_length = FALSE;
1851 return SP - PL_stack_base - start_sp_offset;
1859 I32 gimme = GIMME_V;
1862 const char *pat = SvPV_const(left, llen);
1863 const char *s = SvPV_const(right, rlen);
1864 const char *strend = s + rlen;
1865 const char *patend = pat + llen;
1869 cnt = unpackstring(pat, patend, s, strend,
1870 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
1871 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
1874 if ( !cnt && gimme == G_SCALAR )
1875 PUSHs(&PL_sv_undef);
1880 doencodes(U8 *h, const char *s, I32 len)
1882 *h++ = PL_uuemap[len];
1884 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1885 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
1886 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
1887 *h++ = PL_uuemap[(077 & (s[2] & 077))];
1892 const char r = (len > 1 ? s[1] : '\0');
1893 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1894 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
1895 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
1896 *h++ = PL_uuemap[0];
1903 S_is_an_int(pTHX_ const char *s, STRLEN l)
1905 SV *result = newSVpvn(s, l);
1906 char *const result_c = SvPV_nolen(result); /* convenience */
1907 char *out = result_c;
1911 PERL_ARGS_ASSERT_IS_AN_INT;
1919 SvREFCNT_dec(result);
1942 SvREFCNT_dec(result);
1948 SvCUR_set(result, out - result_c);
1952 /* pnum must be '\0' terminated */
1954 S_div128(pTHX_ SV *pnum, bool *done)
1957 char * const s = SvPV(pnum, len);
1961 PERL_ARGS_ASSERT_DIV128;
1965 const int i = m * 10 + (*t - '0');
1966 const int r = (i >> 7); /* r < 10 */
1974 SvCUR_set(pnum, (STRLEN) (t - s));
1979 =for apidoc packlist
1981 The engine implementing pack() Perl function.
1987 Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, SV **beglist, SV **endlist )
1992 PERL_ARGS_ASSERT_PACKLIST;
1994 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
1996 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
1997 Also make sure any UTF8 flag is loaded */
1998 SvPV_force_nolen(cat);
2000 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
2002 (void)pack_rec( cat, &sym, beglist, endlist );
2005 /* like sv_utf8_upgrade, but also repoint the group start markers */
2007 marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2010 const char *from_ptr, *from_start, *from_end, **marks, **m;
2011 char *to_start, *to_ptr;
2013 if (SvUTF8(sv)) return;
2015 from_start = SvPVX_const(sv);
2016 from_end = from_start + SvCUR(sv);
2017 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2018 if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
2019 if (from_ptr == from_end) {
2020 /* Simple case: no character needs to be changed */
2025 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
2026 Newx(to_start, len, char);
2027 Copy(from_start, to_start, from_ptr-from_start, char);
2028 to_ptr = to_start + (from_ptr-from_start);
2030 Newx(marks, sym_ptr->level+2, const char *);
2031 for (group=sym_ptr; group; group = group->previous)
2032 marks[group->level] = from_start + group->strbeg;
2033 marks[sym_ptr->level+1] = from_end+1;
2034 for (m = marks; *m < from_ptr; m++)
2035 *m = to_start + (*m-from_start);
2037 for (;from_ptr < from_end; from_ptr++) {
2038 while (*m == from_ptr) *m++ = to_ptr;
2039 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
2043 while (*m == from_ptr) *m++ = to_ptr;
2044 if (m != marks + sym_ptr->level+1) {
2047 Perl_croak(aTHX_ "panic: marks beyond string end, m=%p, marks=%p, "
2048 "level=%d", m, marks, sym_ptr->level);
2050 for (group=sym_ptr; group; group = group->previous)
2051 group->strbeg = marks[group->level] - to_start;
2056 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
2057 from_start -= SvIVX(sv);
2060 SvFLAGS(sv) &= ~SVf_OOK;
2063 Safefree(from_start);
2064 SvPV_set(sv, to_start);
2065 SvCUR_set(sv, to_ptr - to_start);
2070 /* Exponential string grower. Makes string extension effectively O(n)
2071 needed says how many extra bytes we need (not counting the final '\0')
2072 Only grows the string if there is an actual lack of space
2075 S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
2076 const STRLEN cur = SvCUR(sv);
2077 const STRLEN len = SvLEN(sv);
2080 PERL_ARGS_ASSERT_SV_EXP_GROW;
2082 if (len - cur > needed) return SvPVX(sv);
2083 extend = needed > len ? needed : len;
2084 return SvGROW(sv, len+extend+1);
2089 S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
2092 tempsym_t lookahead;
2093 I32 items = endlist - beglist;
2094 bool found = next_symbol(symptr);
2095 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
2096 bool warn_utf8 = ckWARN(WARN_UTF8);
2098 PERL_ARGS_ASSERT_PACK_REC;
2100 if (symptr->level == 0 && found && symptr->code == 'U') {
2101 marked_upgrade(aTHX_ cat, symptr);
2102 symptr->flags |= FLAG_DO_UTF8;
2105 symptr->strbeg = SvCUR(cat);
2111 SV *lengthcode = NULL;
2112 I32 datumtype = symptr->code;
2113 howlen_t howlen = symptr->howlen;
2114 char *start = SvPVX(cat);
2115 char *cur = start + SvCUR(cat);
2118 #define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2122 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2126 /* e_no_len and e_number */
2127 len = symptr->length;
2132 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
2134 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2135 /* We can process this letter. */
2136 STRLEN size = props & PACK_SIZE_MASK;
2137 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2141 /* Look ahead for next symbol. Do we have code/code? */
2142 lookahead = *symptr;
2143 found = next_symbol(&lookahead);
2144 if (symptr->flags & FLAG_SLASH) {
2146 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
2147 if (strchr("aAZ", lookahead.code)) {
2148 if (lookahead.howlen == e_number) count = lookahead.length;
2151 count = sv_len_utf8(*beglist);
2154 if (lookahead.code == 'Z') count++;
2157 if (lookahead.howlen == e_number && lookahead.length < items)
2158 count = lookahead.length;
2161 lookahead.howlen = e_number;
2162 lookahead.length = count;
2163 lengthcode = sv_2mortal(newSViv(count));
2166 needs_swap = NEEDS_SWAP(datumtype);
2168 /* Code inside the switch must take care to properly update
2169 cat (CUR length and '\0' termination) if it updated *cur and
2170 doesn't simply leave using break */
2171 switch(TYPE_NO_ENDIANNESS(datumtype)) {
2173 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2174 (int) TYPE_NO_MODIFIERS(datumtype));
2176 Perl_croak(aTHX_ "'%%' may not be used in pack");
2179 case '.' | TYPE_IS_SHRIEKING:
2181 if (howlen == e_star) from = start;
2182 else if (len == 0) from = cur;
2184 tempsym_t *group = symptr;
2186 while (--len && group) group = group->previous;
2187 from = group ? start + group->strbeg : start;
2190 len = SvIV(fromstr);
2192 case '@' | TYPE_IS_SHRIEKING:
2194 from = start + symptr->strbeg;
2196 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
2198 while (len && from < cur) {
2199 from += UTF8SKIP(from);
2203 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2205 /* Here we know from == cur */
2207 GROWING(0, cat, start, cur, len);
2208 Zero(cur, len, char);
2210 } else if (from < cur) {
2213 } else goto no_change;
2221 if (len > 0) goto grow;
2222 if (len == 0) goto no_change;
2229 tempsym_t savsym = *symptr;
2230 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2231 symptr->flags |= group_modifiers;
2232 symptr->patend = savsym.grpend;
2234 symptr->previous = &lookahead;
2237 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2238 else symptr->flags &= ~FLAG_PARSE_UTF8;
2239 was_utf8 = SvUTF8(cat);
2240 symptr->patptr = savsym.grpbeg;
2241 beglist = pack_rec(cat, symptr, beglist, endlist);
2242 if (SvUTF8(cat) != was_utf8)
2243 /* This had better be an upgrade while in utf8==0 mode */
2246 if (savsym.howlen == e_star && beglist == endlist)
2247 break; /* No way to continue */
2249 items = endlist - beglist;
2250 lookahead.flags = symptr->flags & ~group_modifiers;
2253 case 'X' | TYPE_IS_SHRIEKING:
2254 if (!len) /* Avoid division by 0 */
2261 hop += UTF8SKIP(hop);
2268 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2272 len = (cur-start) % len;
2276 if (len < 1) goto no_change;
2280 Perl_croak(aTHX_ "'%c' outside of string in pack",
2281 (int) TYPE_NO_MODIFIERS(datumtype));
2282 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2284 Perl_croak(aTHX_ "'%c' outside of string in pack",
2285 (int) TYPE_NO_MODIFIERS(datumtype));
2291 if (cur - start < len)
2292 Perl_croak(aTHX_ "'%c' outside of string in pack",
2293 (int) TYPE_NO_MODIFIERS(datumtype));
2296 if (cur < start+symptr->strbeg) {
2297 /* Make sure group starts don't point into the void */
2299 const STRLEN length = cur-start;
2300 for (group = symptr;
2301 group && length < group->strbeg;
2302 group = group->previous) group->strbeg = length;
2303 lookahead.strbeg = length;
2306 case 'x' | TYPE_IS_SHRIEKING: {
2308 if (!len) /* Avoid division by 0 */
2310 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
2311 else ai32 = (cur - start) % len;
2312 if (ai32 == 0) goto no_change;
2324 aptr = SvPV_const(fromstr, fromlen);
2325 if (DO_UTF8(fromstr)) {
2326 const char *end, *s;
2328 if (!utf8 && !SvUTF8(cat)) {
2329 marked_upgrade(aTHX_ cat, symptr);
2330 lookahead.flags |= FLAG_DO_UTF8;
2331 lookahead.strbeg = symptr->strbeg;
2334 cur = start + SvCUR(cat);
2336 if (howlen == e_star) {
2337 if (utf8) goto string_copy;
2341 end = aptr + fromlen;
2342 fromlen = datumtype == 'Z' ? len-1 : len;
2343 while ((I32) fromlen > 0 && s < end) {
2348 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2351 if (datumtype == 'Z') len++;
2357 fromlen = len - fromlen;
2358 if (datumtype == 'Z') fromlen--;
2359 if (howlen == e_star) {
2361 if (datumtype == 'Z') len++;
2363 GROWING(0, cat, start, cur, len);
2364 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
2365 datumtype | TYPE_IS_PACK))
2366 Perl_croak(aTHX_ "panic: predicted utf8 length not available, "
2367 "for '%c', aptr=%p end=%p cur=%p, fromlen=%"UVuf,
2368 (int)datumtype, aptr, end, cur, (UV)fromlen);
2372 if (howlen == e_star) {
2374 if (datumtype == 'Z') len++;
2376 if (len <= (I32) fromlen) {
2378 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2380 /* assumes a byte expands to at most UTF8_EXPAND bytes on
2382 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2383 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
2385 while (fromlen > 0) {
2386 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
2392 if (howlen == e_star) {
2394 if (datumtype == 'Z') len++;
2396 if (len <= (I32) fromlen) {
2398 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2400 GROWING(0, cat, start, cur, len);
2401 Copy(aptr, cur, fromlen, char);
2405 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2412 const char *str, *end;
2419 str = SvPV_const(fromstr, fromlen);
2420 end = str + fromlen;
2421 if (DO_UTF8(fromstr)) {
2423 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2425 utf8_source = FALSE;
2426 utf8_flags = 0; /* Unused, but keep compilers happy */
2428 if (howlen == e_star) len = fromlen;
2429 field_len = (len+7)/8;
2430 GROWING(utf8, cat, start, cur, field_len);
2431 if (len > (I32)fromlen) len = fromlen;
2434 if (datumtype == 'B')
2438 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2440 } else bits |= *str++ & 1;
2441 if (l & 7) bits <<= 1;
2443 PUSH_BYTE(utf8, cur, bits);
2448 /* datumtype == 'b' */
2452 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2453 if (val & 1) bits |= 0x80;
2454 } else if (*str++ & 1)
2456 if (l & 7) bits >>= 1;
2458 PUSH_BYTE(utf8, cur, bits);
2464 if (datumtype == 'B')
2465 bits <<= 7 - (l & 7);
2467 bits >>= 7 - (l & 7);
2468 PUSH_BYTE(utf8, cur, bits);
2471 /* Determine how many chars are left in the requested field */
2473 if (howlen == e_star) field_len = 0;
2474 else field_len -= l;
2475 Zero(cur, field_len, char);
2481 const char *str, *end;
2488 str = SvPV_const(fromstr, fromlen);
2489 end = str + fromlen;
2490 if (DO_UTF8(fromstr)) {
2492 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2494 utf8_source = FALSE;
2495 utf8_flags = 0; /* Unused, but keep compilers happy */
2497 if (howlen == e_star) len = fromlen;
2498 field_len = (len+1)/2;
2499 GROWING(utf8, cat, start, cur, field_len);
2500 if (!utf8 && len > (I32)fromlen) len = fromlen;
2503 if (datumtype == 'H')
2507 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2508 if (val < 256 && isALPHA(val))
2509 bits |= (val + 9) & 0xf;
2512 } else if (isALPHA(*str))
2513 bits |= (*str++ + 9) & 0xf;
2515 bits |= *str++ & 0xf;
2516 if (l & 1) bits <<= 4;
2518 PUSH_BYTE(utf8, cur, bits);
2526 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2527 if (val < 256 && isALPHA(val))
2528 bits |= ((val + 9) & 0xf) << 4;
2530 bits |= (val & 0xf) << 4;
2531 } else if (isALPHA(*str))
2532 bits |= ((*str++ + 9) & 0xf) << 4;
2534 bits |= (*str++ & 0xf) << 4;
2535 if (l & 1) bits >>= 4;
2537 PUSH_BYTE(utf8, cur, bits);
2543 PUSH_BYTE(utf8, cur, bits);
2546 /* Determine how many chars are left in the requested field */
2548 if (howlen == e_star) field_len = 0;
2549 else field_len -= l;
2550 Zero(cur, field_len, char);
2558 aiv = SvIV(fromstr);
2559 if ((-128 > aiv || aiv > 127))
2560 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2561 "Character in 'c' format wrapped in pack");
2562 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
2567 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2573 aiv = SvIV(fromstr);
2574 if ((0 > aiv || aiv > 0xff))
2575 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2576 "Character in 'C' format wrapped in pack");
2577 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
2582 U8 in_bytes = (U8)IN_BYTES;
2584 end = start+SvLEN(cat)-1;
2585 if (utf8) end -= UTF8_MAXLEN-1;
2589 auv = SvUV(fromstr);
2590 if (in_bytes) auv = auv % 0x100;
2595 SvCUR_set(cat, cur - start);
2597 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2598 end = start+SvLEN(cat)-UTF8_MAXLEN;
2600 cur = (char *) uvuni_to_utf8_flags((U8 *) cur,
2603 0 : UNICODE_ALLOW_ANY);
2608 SvCUR_set(cat, cur - start);
2609 marked_upgrade(aTHX_ cat, symptr);
2610 lookahead.flags |= FLAG_DO_UTF8;
2611 lookahead.strbeg = symptr->strbeg;
2614 cur = start + SvCUR(cat);
2615 end = start+SvLEN(cat)-UTF8_MAXLEN;
2618 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2619 "Character in 'W' format wrapped in pack");
2624 SvCUR_set(cat, cur - start);
2625 GROWING(0, cat, start, cur, len+1);
2626 end = start+SvLEN(cat)-1;
2628 *(U8 *) cur++ = (U8)auv;
2637 if (!(symptr->flags & FLAG_DO_UTF8)) {
2638 marked_upgrade(aTHX_ cat, symptr);
2639 lookahead.flags |= FLAG_DO_UTF8;
2640 lookahead.strbeg = symptr->strbeg;
2646 end = start+SvLEN(cat);
2647 if (!utf8) end -= UTF8_MAXLEN;
2651 auv = SvUV(fromstr);
2653 U8 buffer[UTF8_MAXLEN], *endb;
2654 endb = uvuni_to_utf8_flags(buffer, auv,
2656 0 : UNICODE_ALLOW_ANY);
2657 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
2659 SvCUR_set(cat, cur - start);
2660 GROWING(0, cat, start, cur,
2661 len+(endb-buffer)*UTF8_EXPAND);
2662 end = start+SvLEN(cat);
2664 cur = S_bytes_to_uni(buffer, endb-buffer, cur, 0);
2668 SvCUR_set(cat, cur - start);
2669 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2670 end = start+SvLEN(cat)-UTF8_MAXLEN;
2672 cur = (char *) uvuni_to_utf8_flags((U8 *) cur, auv,
2674 0 : UNICODE_ALLOW_ANY);
2679 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
2685 anv = SvNV(fromstr);
2686 # if defined(VMS) && !defined(_IEEE_FP)
2687 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
2688 * on Alpha; fake it if we don't have them.
2692 else if (anv < -FLT_MAX)
2694 else afloat = (float)anv;
2696 afloat = (float)anv;
2698 PUSH_VAR(utf8, cur, afloat, needs_swap);
2706 anv = SvNV(fromstr);
2707 # if defined(VMS) && !defined(_IEEE_FP)
2708 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
2709 * on Alpha; fake it if we don't have them.
2713 else if (anv < -DBL_MAX)
2715 else adouble = (double)anv;
2717 adouble = (double)anv;
2719 PUSH_VAR(utf8, cur, adouble, needs_swap);
2724 Zero(&anv, 1, NV); /* can be long double with unused bits */
2728 /* to work round a gcc/x86 bug; don't use SvNV */
2729 anv.nv = sv_2nv(fromstr);
2731 anv.nv = SvNV(fromstr);
2733 PUSH_BYTES(utf8, cur, anv.bytes, sizeof(anv.bytes), needs_swap);
2737 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2740 /* long doubles can have unused bits, which may be nonzero */
2741 Zero(&aldouble, 1, long double);
2745 /* to work round a gcc/x86 bug; don't use SvNV */
2746 aldouble.ld = (long double)sv_2nv(fromstr);
2748 aldouble.ld = (long double)SvNV(fromstr);
2750 PUSH_BYTES(utf8, cur, aldouble.bytes, sizeof(aldouble.bytes),
2756 case 'n' | TYPE_IS_SHRIEKING:
2761 ai16 = (I16)SvIV(fromstr);
2762 ai16 = PerlSock_htons(ai16);
2763 PUSH16(utf8, cur, &ai16, FALSE);
2766 case 'v' | TYPE_IS_SHRIEKING:
2771 ai16 = (I16)SvIV(fromstr);
2773 PUSH16(utf8, cur, &ai16, FALSE);
2776 case 'S' | TYPE_IS_SHRIEKING:
2777 #if SHORTSIZE != SIZE16
2779 unsigned short aushort;
2781 aushort = SvUV(fromstr);
2782 PUSH_VAR(utf8, cur, aushort, needs_swap);
2792 au16 = (U16)SvUV(fromstr);
2793 PUSH16(utf8, cur, &au16, needs_swap);
2796 case 's' | TYPE_IS_SHRIEKING:
2797 #if SHORTSIZE != SIZE16
2801 ashort = SvIV(fromstr);
2802 PUSH_VAR(utf8, cur, ashort, needs_swap);
2812 ai16 = (I16)SvIV(fromstr);
2813 PUSH16(utf8, cur, &ai16, needs_swap);
2817 case 'I' | TYPE_IS_SHRIEKING:
2821 auint = SvUV(fromstr);
2822 PUSH_VAR(utf8, cur, auint, needs_swap);
2829 aiv = SvIV(fromstr);
2830 PUSH_VAR(utf8, cur, aiv, needs_swap);
2837 auv = SvUV(fromstr);
2838 PUSH_VAR(utf8, cur, auv, needs_swap);
2845 anv = SvNV(fromstr);
2849 SvCUR_set(cat, cur - start);
2850 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
2853 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
2854 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
2855 any negative IVs will have already been got by the croak()
2856 above. IOK is untrue for fractions, so we test them
2857 against UV_MAX_P1. */
2858 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
2859 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
2860 char *in = buf + sizeof(buf);
2861 UV auv = SvUV(fromstr);
2864 *--in = (char)((auv & 0x7f) | 0x80);
2867 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
2868 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2869 in, (buf + sizeof(buf)) - in);
2870 } else if (SvPOKp(fromstr))
2872 else if (SvNOKp(fromstr)) {
2873 /* 10**NV_MAX_10_EXP is the largest power of 10
2874 so 10**(NV_MAX_10_EXP+1) is definitely unrepresentable
2875 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
2876 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
2877 And with that many bytes only Inf can overflow.
2878 Some C compilers are strict about integral constant
2879 expressions so we conservatively divide by a slightly
2880 smaller integer instead of multiplying by the exact
2881 floating-point value.
2883 #ifdef NV_MAX_10_EXP
2884 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
2885 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
2887 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
2888 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
2890 char *in = buf + sizeof(buf);
2892 anv = Perl_floor(anv);
2894 const NV next = Perl_floor(anv / 128);
2895 if (in <= buf) /* this cannot happen ;-) */
2896 Perl_croak(aTHX_ "Cannot compress integer in pack");
2897 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
2900 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
2901 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2902 in, (buf + sizeof(buf)) - in);
2911 /* Copy string and check for compliance */
2912 from = SvPV_const(fromstr, len);
2913 if ((norm = is_an_int(from, len)) == NULL)
2914 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
2916 Newx(result, len, char);
2919 while (!done) *--in = div128(norm, &done) | 0x80;
2920 result[len - 1] &= 0x7F; /* clear continue bit */
2921 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2922 in, (result + len) - in);
2924 SvREFCNT_dec(norm); /* free norm */
2929 case 'i' | TYPE_IS_SHRIEKING:
2933 aint = SvIV(fromstr);
2934 PUSH_VAR(utf8, cur, aint, needs_swap);
2937 case 'N' | TYPE_IS_SHRIEKING:
2942 au32 = SvUV(fromstr);
2943 au32 = PerlSock_htonl(au32);
2944 PUSH32(utf8, cur, &au32, FALSE);
2947 case 'V' | TYPE_IS_SHRIEKING:
2952 au32 = SvUV(fromstr);
2954 PUSH32(utf8, cur, &au32, FALSE);
2957 case 'L' | TYPE_IS_SHRIEKING:
2958 #if LONGSIZE != SIZE32
2960 unsigned long aulong;
2962 aulong = SvUV(fromstr);
2963 PUSH_VAR(utf8, cur, aulong, needs_swap);
2973 au32 = SvUV(fromstr);
2974 PUSH32(utf8, cur, &au32, needs_swap);
2977 case 'l' | TYPE_IS_SHRIEKING:
2978 #if LONGSIZE != SIZE32
2982 along = SvIV(fromstr);
2983 PUSH_VAR(utf8, cur, along, needs_swap);
2993 ai32 = SvIV(fromstr);
2994 PUSH32(utf8, cur, &ai32, needs_swap);
3002 auquad = (Uquad_t) SvUV(fromstr);
3003 PUSH_VAR(utf8, cur, auquad, needs_swap);
3010 aquad = (Quad_t)SvIV(fromstr);
3011 PUSH_VAR(utf8, cur, aquad, needs_swap);
3014 #endif /* HAS_QUAD */
3016 len = 1; /* assume SV is correct length */
3017 GROWING(utf8, cat, start, cur, sizeof(char *));
3024 SvGETMAGIC(fromstr);
3025 if (!SvOK(fromstr)) aptr = NULL;
3027 /* XXX better yet, could spirit away the string to
3028 * a safe spot and hang on to it until the result
3029 * of pack() (and all copies of the result) are
3032 if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
3033 !SvREADONLY(fromstr)))) {
3034 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3035 "Attempt to pack pointer to temporary value");
3037 if (SvPOK(fromstr) || SvNIOK(fromstr))
3038 aptr = SvPV_nomg_const_nolen(fromstr);
3040 aptr = SvPV_force_flags_nolen(fromstr, 0);
3042 PUSH_VAR(utf8, cur, aptr, needs_swap);
3046 const char *aptr, *aend;
3050 if (len <= 2) len = 45;
3051 else len = len / 3 * 3;
3053 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3054 "Field too wide in 'u' format in pack");
3057 aptr = SvPV_const(fromstr, fromlen);
3058 from_utf8 = DO_UTF8(fromstr);
3060 aend = aptr + fromlen;
3061 fromlen = sv_len_utf8_nomg(fromstr);
3062 } else aend = NULL; /* Unused, but keep compilers happy */
3063 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
3064 while (fromlen > 0) {
3067 U8 hunk[1+63/3*4+1];
3069 if ((I32)fromlen > len)
3075 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3076 'u' | TYPE_IS_PACK)) {
3078 SvCUR_set(cat, cur - start);
3079 Perl_croak(aTHX_ "panic: string is shorter than advertised, "
3080 "aptr=%p, aend=%p, buffer=%p, todo=%ld",
3081 aptr, aend, buffer, (long) todo);
3083 end = doencodes(hunk, buffer, todo);
3085 end = doencodes(hunk, aptr, todo);
3088 PUSH_BYTES(utf8, cur, hunk, end-hunk, 0);
3095 SvCUR_set(cat, cur - start);
3097 *symptr = lookahead;
3106 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
3109 SV *pat_sv = *++MARK;
3110 const char *pat = SvPV_const(pat_sv, fromlen);
3111 const char *patend = pat + fromlen;
3117 packlist(cat, pat, patend, MARK, SP + 1);
3127 * c-indentation-style: bsd
3129 * indent-tabs-mode: nil
3132 * ex: set ts=8 sts=4 sw=4 et: