3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
18 * [p.653 of _The Lord of the Rings_, IV/iv: "Of Herbs and Stewed Rabbit"]
21 /* This file contains pp ("push/pop") functions that
22 * execute the opcodes that make up a perl program. A typical pp function
23 * expects to find its arguments on the stack, and usually pushes its
24 * results onto the stack, hence the 'pp' terminology. Each OP structure
25 * contains a pointer to the relevant pp_foo() function.
27 * This particular file just contains pp_pack() and pp_unpack(). See the
28 * other pp*.c files for the rest of the pp_ functions.
32 #define PERL_IN_PP_PACK_C
35 /* Types used by pack/unpack */
37 e_no_len, /* no length */
38 e_number, /* number, [] */
42 typedef struct tempsym {
43 const char* patptr; /* current template char */
44 const char* patend; /* one after last char */
45 const char* grpbeg; /* 1st char of ()-group */
46 const char* grpend; /* end of ()-group */
47 I32 code; /* template code (!<>) */
48 I32 length; /* length/repeat count */
49 howlen_t howlen; /* how length is given */
50 int level; /* () nesting level */
51 U32 flags; /* /=4, comma=2, pack=1 */
52 /* and group modifiers */
53 STRLEN strbeg; /* offset of group start */
54 struct tempsym *previous; /* previous group */
57 #define TEMPSYM_INIT(symptr, p, e, f) \
59 (symptr)->patptr = (p); \
60 (symptr)->patend = (e); \
61 (symptr)->grpbeg = NULL; \
62 (symptr)->grpend = NULL; \
63 (symptr)->grpend = NULL; \
65 (symptr)->length = 0; \
66 (symptr)->howlen = e_no_len; \
67 (symptr)->level = 0; \
68 (symptr)->flags = (f); \
69 (symptr)->strbeg = 0; \
70 (symptr)->previous = NULL; \
78 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
81 U8 bytes[sizeof(long double)];
88 /* Maximum number of bytes to which a byte can grow due to upgrade */
92 * Offset for integer pack/unpack.
94 * On architectures where I16 and I32 aren't really 16 and 32 bits,
95 * which for now are all Crays, pack and unpack have to play games.
99 * These values are required for portability of pack() output.
100 * If they're not right on your machine, then pack() and unpack()
101 * wouldn't work right anyway; you'll need to apply the Cray hack.
102 * (I'd like to check them with #if, but you can't use sizeof() in
103 * the preprocessor.) --???
106 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
107 defines are now in config.h. --Andy Dougherty April 1998
112 /* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
115 #if U16SIZE > SIZE16 || U32SIZE > SIZE32
116 # if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
117 # define OFF16(p) ((char*)(p))
118 # define OFF32(p) ((char*)(p))
120 # if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
121 # define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
122 # define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
124 ++++ bad cray byte order
128 # define OFF16(p) ((char *) (p))
129 # define OFF32(p) ((char *) (p))
132 #define PUSH16(utf8, cur, p, needs_swap) \
133 PUSH_BYTES(utf8, cur, OFF16(p), SIZE16, needs_swap)
134 #define PUSH32(utf8, cur, p, needs_swap) \
135 PUSH_BYTES(utf8, cur, OFF32(p), SIZE32, needs_swap)
137 #if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
138 # define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_LITTLE_ENDIAN)
139 #elif BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
140 # define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_BIG_ENDIAN)
142 # error "Unsupported byteorder"
143 /* Need to add code here to re-instate mixed endian support.
144 NEEDS_SWAP would need to hold a flag indicating which action to
145 take, and S_reverse_copy and the code in uni_to_bytes would need
146 logic adding to deal with any mixed-endian transformations needed.
150 /* Only to be used inside a loop (see the break) */
151 #define SHIFT_BYTES(utf8, s, strend, buf, len, datumtype, needs_swap) \
153 if (UNLIKELY(utf8)) { \
154 if (!uni_to_bytes(aTHX_ &s, strend, \
155 (char *) (buf), len, datumtype)) break; \
157 if (UNLIKELY(needs_swap)) \
158 S_reverse_copy(s, (char *) (buf), len); \
160 Copy(s, (char *) (buf), len, char); \
165 #define SHIFT16(utf8, s, strend, p, datumtype, needs_swap) \
166 SHIFT_BYTES(utf8, s, strend, OFF16(p), SIZE16, datumtype, needs_swap)
168 #define SHIFT32(utf8, s, strend, p, datumtype, needs_swap) \
169 SHIFT_BYTES(utf8, s, strend, OFF32(p), SIZE32, datumtype, needs_swap)
171 #define SHIFT_VAR(utf8, s, strend, var, datumtype, needs_swap) \
172 SHIFT_BYTES(utf8, s, strend, &(var), sizeof(var), datumtype, needs_swap)
174 #define PUSH_VAR(utf8, aptr, var, needs_swap) \
175 PUSH_BYTES(utf8, aptr, &(var), sizeof(var), needs_swap)
177 /* Avoid stack overflow due to pathological templates. 100 should be plenty. */
178 #define MAX_SUB_TEMPLATE_LEVEL 100
180 /* flags (note that type modifiers can also be used as flags!) */
181 #define FLAG_WAS_UTF8 0x40
182 #define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
183 #define FLAG_UNPACK_ONLY_ONE 0x10
184 #define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
185 #define FLAG_SLASH 0x04
186 #define FLAG_COMMA 0x02
187 #define FLAG_PACK 0x01
190 S_mul128(pTHX_ SV *sv, U8 m)
193 char *s = SvPV(sv, len);
196 PERL_ARGS_ASSERT_MUL128;
198 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
199 SV * const tmpNew = newSVpvs("0000000000");
201 sv_catsv(tmpNew, sv);
202 SvREFCNT_dec(sv); /* free old sv */
207 while (!*t) /* trailing '\0'? */
210 const U32 i = ((*t - '0') << 7) + m;
211 *(t--) = '0' + (char)(i % 10);
217 /* Explosives and implosives. */
219 #if 'I' == 73 && 'J' == 74
220 /* On an ASCII/ISO kind of system */
221 #define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
224 Some other sort of character set - use memchr() so we don't match
227 #define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
231 #define TYPE_IS_SHRIEKING 0x100
232 #define TYPE_IS_BIG_ENDIAN 0x200
233 #define TYPE_IS_LITTLE_ENDIAN 0x400
234 #define TYPE_IS_PACK 0x800
235 #define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
236 #define TYPE_MODIFIERS(t) ((t) & ~0xFF)
237 #define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
239 # define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
240 # define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
242 # define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
244 #define PACK_SIZE_CANNOT_CSUM 0x80
245 #define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
246 #define PACK_SIZE_MASK 0x3F
248 #include "packsizetables.c"
251 S_reverse_copy(const char *src, char *dest, STRLEN len)
259 uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
262 UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
263 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
264 /* We try to process malformed UTF-8 as much as possible (preferably with
265 warnings), but these two mean we make no progress in the string and
266 might enter an infinite loop */
267 if (retlen == (STRLEN) -1 || retlen == 0)
268 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
269 (int) TYPE_NO_MODIFIERS(datumtype));
271 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
272 "Character in '%c' format wrapped in unpack",
273 (int) TYPE_NO_MODIFIERS(datumtype));
280 #define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
281 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
285 uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
289 const char *from = *s;
291 const U32 flags = ckWARN(WARN_UTF8) ?
292 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
293 const bool needs_swap = NEEDS_SWAP(datumtype);
295 if (UNLIKELY(needs_swap))
298 for (;buf_len > 0; buf_len--) {
299 if (from >= end) return FALSE;
300 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
301 if (retlen == (STRLEN) -1 || retlen == 0) {
302 from += UTF8SKIP(from);
304 } else from += retlen;
309 if (UNLIKELY(needs_swap))
310 *(U8 *)--buf = (U8)val;
312 *(U8 *)buf++ = (U8)val;
314 /* We have enough characters for the buffer. Did we have problems ? */
317 /* Rewalk the string fragment while warning */
319 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
320 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
321 if (ptr >= end) break;
322 utf8n_to_uvchr((U8 *) ptr, end-ptr, &retlen, flags);
324 if (from > end) from = end;
327 Perl_ck_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
328 WARN_PACK : WARN_UNPACK),
329 "Character(s) in '%c' format wrapped in %s",
330 (int) TYPE_NO_MODIFIERS(datumtype),
331 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
338 next_uni_uu(pTHX_ const char **s, const char *end, I32 *out)
342 const UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
343 if (val >= 0x100 || !ISUUCHAR(val) ||
344 retlen == (STRLEN) -1 || retlen == 0) {
348 *out = PL_uudmap[val] & 077;
354 S_bytes_to_uni(const U8 *start, STRLEN len, char *dest, const bool needs_swap) {
355 PERL_ARGS_ASSERT_BYTES_TO_UNI;
357 if (UNLIKELY(needs_swap)) {
358 const U8 *p = start + len;
359 while (p-- > start) {
360 append_utf8_from_native_byte(*p, (U8 **) & dest);
363 const U8 * const end = start + len;
364 while (start < end) {
365 append_utf8_from_native_byte(*start, (U8 **) & dest);
372 #define PUSH_BYTES(utf8, cur, buf, len, needs_swap) \
374 if (UNLIKELY(utf8)) \
375 (cur) = S_bytes_to_uni((U8 *) buf, len, (cur), needs_swap); \
377 if (UNLIKELY(needs_swap)) \
378 S_reverse_copy((char *)(buf), cur, len); \
380 Copy(buf, cur, len, char); \
385 #define GROWING(utf8, cat, start, cur, in_len) \
387 STRLEN glen = (in_len); \
388 if (utf8) glen *= UTF8_EXPAND; \
389 if ((cur) + glen >= (start) + SvLEN(cat)) { \
390 (start) = sv_exp_grow(cat, glen); \
391 (cur) = (start) + SvCUR(cat); \
395 #define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
397 const STRLEN glen = (in_len); \
399 if (utf8) gl *= UTF8_EXPAND; \
400 if ((cur) + gl >= (start) + SvLEN(cat)) { \
402 SvCUR_set((cat), (cur) - (start)); \
403 (start) = sv_exp_grow(cat, gl); \
404 (cur) = (start) + SvCUR(cat); \
406 PUSH_BYTES(utf8, cur, buf, glen, 0); \
409 #define PUSH_BYTE(utf8, s, byte) \
412 const U8 au8 = (byte); \
413 (s) = S_bytes_to_uni(&au8, 1, (s), 0); \
414 } else *(U8 *)(s)++ = (byte); \
417 /* Only to be used inside a loop (see the break) */
418 #define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
421 if (str >= end) break; \
422 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
423 if (retlen == (STRLEN) -1 || retlen == 0) { \
425 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
430 static const char *_action( const tempsym_t* symptr )
432 return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
435 /* Returns the sizeof() struct described by pat */
437 S_measure_struct(pTHX_ tempsym_t* symptr)
441 PERL_ARGS_ASSERT_MEASURE_STRUCT;
443 while (next_symbol(symptr)) {
447 switch (symptr->howlen) {
449 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
453 /* e_no_len and e_number */
454 len = symptr->length;
458 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
461 /* endianness doesn't influence the size of a type */
462 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
464 Perl_croak(aTHX_ "Invalid type '%c' in %s",
465 (int)TYPE_NO_MODIFIERS(symptr->code),
467 case '.' | TYPE_IS_SHRIEKING:
468 case '@' | TYPE_IS_SHRIEKING:
472 case 'U': /* XXXX Is it correct? */
475 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
476 (int) TYPE_NO_MODIFIERS(symptr->code),
483 tempsym_t savsym = *symptr;
484 symptr->patptr = savsym.grpbeg;
485 symptr->patend = savsym.grpend;
486 /* XXXX Theoretically, we need to measure many times at
487 different positions, since the subexpression may contain
488 alignment commands, but be not of aligned length.
489 Need to detect this and croak(). */
490 size = measure_struct(symptr);
494 case 'X' | TYPE_IS_SHRIEKING:
495 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
497 if (!len) /* Avoid division by 0 */
499 len = total % len; /* Assumed: the start is aligned. */
504 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
506 case 'x' | TYPE_IS_SHRIEKING:
507 if (!len) /* Avoid division by 0 */
509 star = total % len; /* Assumed: the start is aligned. */
510 if (star) /* Other portable ways? */
534 size = sizeof(char*);
544 /* locate matching closing parenthesis or bracket
545 * returns char pointer to char after match, or NULL
548 S_group_end(pTHX_ const char *patptr, const char *patend, char ender)
550 PERL_ARGS_ASSERT_GROUP_END;
552 while (patptr < patend) {
553 const char c = *patptr++;
560 while (patptr < patend && *patptr != '\n')
564 patptr = group_end(patptr, patend, ')') + 1;
566 patptr = group_end(patptr, patend, ']') + 1;
568 Perl_croak(aTHX_ "No group ending character '%c' found in template",
574 /* Convert unsigned decimal number to binary.
575 * Expects a pointer to the first digit and address of length variable
576 * Advances char pointer to 1st non-digit char and returns number
579 S_get_num(pTHX_ const char *patptr, I32 *lenptr )
581 I32 len = *patptr++ - '0';
583 PERL_ARGS_ASSERT_GET_NUM;
585 while (isDIGIT(*patptr)) {
586 if (len >= 0x7FFFFFFF/10)
587 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
588 len = (len * 10) + (*patptr++ - '0');
594 /* The marvellous template parsing routine: Using state stored in *symptr,
595 * locates next template code and count
598 S_next_symbol(pTHX_ tempsym_t* symptr )
600 const char* patptr = symptr->patptr;
601 const char* const patend = symptr->patend;
603 PERL_ARGS_ASSERT_NEXT_SYMBOL;
605 symptr->flags &= ~FLAG_SLASH;
607 while (patptr < patend) {
608 if (isSPACE(*patptr))
610 else if (*patptr == '#') {
612 while (patptr < patend && *patptr != '\n')
617 /* We should have found a template code */
618 I32 code = *patptr++ & 0xFF;
619 U32 inherited_modifiers = 0;
621 if (code == ','){ /* grandfather in commas but with a warning */
622 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
623 symptr->flags |= FLAG_COMMA;
624 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
625 "Invalid type ',' in %s", _action( symptr ) );
630 /* for '(', skip to ')' */
632 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
633 Perl_croak(aTHX_ "()-group starts with a count in %s",
635 symptr->grpbeg = patptr;
636 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
637 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
638 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
642 /* look for group modifiers to inherit */
643 if (TYPE_ENDIANNESS(symptr->flags)) {
644 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
645 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
648 /* look for modifiers */
649 while (patptr < patend) {
654 modifier = TYPE_IS_SHRIEKING;
655 allowed = "sSiIlLxXnNvV@.";
658 modifier = TYPE_IS_BIG_ENDIAN;
659 allowed = ENDIANNESS_ALLOWED_TYPES;
662 modifier = TYPE_IS_LITTLE_ENDIAN;
663 allowed = ENDIANNESS_ALLOWED_TYPES;
674 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
675 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
676 allowed, _action( symptr ) );
678 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
679 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
680 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
681 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
682 TYPE_ENDIANNESS_MASK)
683 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
684 *patptr, _action( symptr ) );
686 if ((code & modifier)) {
687 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
688 "Duplicate modifier '%c' after '%c' in %s",
689 *patptr, (int) TYPE_NO_MODIFIERS(code),
697 /* inherit modifiers */
698 code |= inherited_modifiers;
700 /* look for count and/or / */
701 if (patptr < patend) {
702 if (isDIGIT(*patptr)) {
703 patptr = get_num( patptr, &symptr->length );
704 symptr->howlen = e_number;
706 } else if (*patptr == '*') {
708 symptr->howlen = e_star;
710 } else if (*patptr == '[') {
711 const char* lenptr = ++patptr;
712 symptr->howlen = e_number;
713 patptr = group_end( patptr, patend, ']' ) + 1;
714 /* what kind of [] is it? */
715 if (isDIGIT(*lenptr)) {
716 lenptr = get_num( lenptr, &symptr->length );
718 Perl_croak(aTHX_ "Malformed integer in [] in %s",
721 tempsym_t savsym = *symptr;
722 symptr->patend = patptr-1;
723 symptr->patptr = lenptr;
724 savsym.length = measure_struct(symptr);
728 symptr->howlen = e_no_len;
733 while (patptr < patend) {
734 if (isSPACE(*patptr))
736 else if (*patptr == '#') {
738 while (patptr < patend && *patptr != '\n')
743 if (*patptr == '/') {
744 symptr->flags |= FLAG_SLASH;
746 if (patptr < patend &&
747 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
748 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
755 /* at end - no count, no / */
756 symptr->howlen = e_no_len;
761 symptr->patptr = patptr;
765 symptr->patptr = patptr;
770 There is no way to cleanly handle the case where we should process the
771 string per byte in its upgraded form while it's really in downgraded form
772 (e.g. estimates like strend-s as an upper bound for the number of
773 characters left wouldn't work). So if we foresee the need of this
774 (pattern starts with U or contains U0), we want to work on the encoded
775 version of the string. Users are advised to upgrade their pack string
776 themselves if they need to do a lot of unpacks like this on it
779 need_utf8(const char *pat, const char *patend)
783 PERL_ARGS_ASSERT_NEED_UTF8;
785 while (pat < patend) {
788 pat = (const char *) memchr(pat, '\n', patend-pat);
789 if (!pat) return FALSE;
790 } else if (pat[0] == 'U') {
791 if (first || pat[1] == '0') return TRUE;
792 } else first = FALSE;
799 first_symbol(const char *pat, const char *patend) {
800 PERL_ARGS_ASSERT_FIRST_SYMBOL;
802 while (pat < patend) {
803 if (pat[0] != '#') return pat[0];
805 pat = (const char *) memchr(pat, '\n', patend-pat);
813 =for apidoc unpackstring
815 The engine implementing the unpack() Perl function.
817 Using the template pat..patend, this function unpacks the string
818 s..strend into a number of mortal SVs, which it pushes onto the perl
819 argument (@_) stack (so you will need to issue a C<PUTBACK> before and
820 C<SPAGAIN> after the call to this function). It returns the number of
823 The strend and patend pointers should point to the byte following the last
824 character of each string.
826 Although this function returns its values on the perl argument stack, it
827 doesn't take any parameters from that stack (and thus in particular
828 there's no need to do a PUSHMARK before calling it, unlike L</call_pv> for
834 Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
838 PERL_ARGS_ASSERT_UNPACKSTRING;
840 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
841 else if (need_utf8(pat, patend)) {
842 /* We probably should try to avoid this in case a scalar context call
843 wouldn't get to the "U0" */
844 STRLEN len = strend - s;
845 s = (char *) bytes_to_utf8((U8 *) s, &len);
848 flags |= FLAG_DO_UTF8;
851 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
852 flags |= FLAG_PARSE_UTF8;
854 TEMPSYM_INIT(&sym, pat, patend, flags);
856 return unpack_rec(&sym, s, s, strend, NULL );
860 S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
864 const I32 start_sp_offset = SP - PL_stack_base;
869 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
871 bool explicit_length;
872 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
873 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
875 PERL_ARGS_ASSERT_UNPACK_REC;
877 symptr->strbeg = s - strbeg;
879 while (next_symbol(symptr)) {
882 I32 datumtype = symptr->code;
884 /* do first one only unless in list context
885 / is implemented by unpacking the count, then popping it from the
886 stack, so must check that we're not in the middle of a / */
888 && (SP - PL_stack_base == start_sp_offset + 1)
889 && (datumtype != '/') ) /* XXX can this be omitted */
892 switch (howlen = symptr->howlen) {
894 len = strend - strbeg; /* long enough */
897 /* e_no_len and e_number */
898 len = symptr->length;
902 explicit_length = TRUE;
904 beyond = s >= strend;
906 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
908 /* props nonzero means we can process this letter. */
909 const long size = props & PACK_SIZE_MASK;
910 const long howmany = (strend - s) / size;
914 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
915 if (len && unpack_only_one) len = 1;
921 needs_swap = NEEDS_SWAP(datumtype);
923 switch(TYPE_NO_ENDIANNESS(datumtype)) {
925 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
928 if (howlen == e_no_len)
929 len = 16; /* len is not specified */
937 tempsym_t savsym = *symptr;
938 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
939 symptr->flags |= group_modifiers;
940 symptr->patend = savsym.grpend;
941 symptr->previous = &savsym;
944 if (len && unpack_only_one) len = 1;
946 symptr->patptr = savsym.grpbeg;
947 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
948 else symptr->flags &= ~FLAG_PARSE_UTF8;
949 unpack_rec(symptr, s, strbeg, strend, &s);
950 if (s == strend && savsym.howlen == e_star)
951 break; /* No way to continue */
954 savsym.flags = symptr->flags & ~group_modifiers;
958 case '.' | TYPE_IS_SHRIEKING:
962 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
963 if (howlen == e_star) from = strbeg;
964 else if (len <= 0) from = s;
966 tempsym_t *group = symptr;
968 while (--len && group) group = group->previous;
969 from = group ? strbeg + group->strbeg : strbeg;
972 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
973 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
977 case '@' | TYPE_IS_SHRIEKING:
979 s = strbeg + symptr->strbeg;
980 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
984 Perl_croak(aTHX_ "'@' outside of string in unpack");
989 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
992 Perl_croak(aTHX_ "'@' outside of string in unpack");
996 case 'X' | TYPE_IS_SHRIEKING:
997 if (!len) /* Avoid division by 0 */
1000 const char *hop, *last;
1002 hop = last = strbeg;
1004 hop += UTF8SKIP(hop);
1011 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1015 len = (s - strbeg) % len;
1021 Perl_croak(aTHX_ "'X' outside of string in unpack");
1022 while (--s, UTF8_IS_CONTINUATION(*s)) {
1024 Perl_croak(aTHX_ "'X' outside of string in unpack");
1029 if (len > s - strbeg)
1030 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1034 case 'x' | TYPE_IS_SHRIEKING: {
1036 if (!len) /* Avoid division by 0 */
1038 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1039 else ai32 = (s - strbeg) % len;
1040 if (ai32 == 0) break;
1048 Perl_croak(aTHX_ "'x' outside of string in unpack");
1053 if (len > strend - s)
1054 Perl_croak(aTHX_ "'x' outside of string in unpack");
1059 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1065 /* Preliminary length estimate is assumed done in 'W' */
1066 if (len > strend - s) len = strend - s;
1072 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1073 if (hop >= strend) {
1075 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1080 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1082 } else if (len > strend - s)
1085 if (datumtype == 'Z') {
1086 /* 'Z' strips stuff after first null */
1087 const char *ptr, *end;
1089 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
1090 sv = newSVpvn(s, ptr-s);
1091 if (howlen == e_star) /* exact for 'Z*' */
1092 len = ptr-s + (ptr != strend ? 1 : 0);
1093 } else if (datumtype == 'A') {
1094 /* 'A' strips both nulls and spaces */
1096 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1097 for (ptr = s+len-1; ptr >= s; ptr--)
1098 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
1099 !isSPACE_utf8(ptr)) break;
1100 if (ptr >= s) ptr += UTF8SKIP(ptr);
1103 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1105 for (ptr = s+len-1; ptr >= s; ptr--)
1106 if (*ptr != 0 && !isSPACE(*ptr)) break;
1109 sv = newSVpvn(s, ptr-s);
1110 } else sv = newSVpvn(s, len);
1114 /* Undo any upgrade done due to need_utf8() */
1115 if (!(symptr->flags & FLAG_WAS_UTF8))
1116 sv_utf8_downgrade(sv, 0);
1124 if (howlen == e_star || len > (strend - s) * 8)
1125 len = (strend - s) * 8;
1128 while (len >= 8 && s < strend) {
1129 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
1134 cuv += PL_bitcount[*(U8 *)s++];
1137 if (len && s < strend) {
1139 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1140 if (datumtype == 'b')
1142 if (bits & 1) cuv++;
1147 if (bits & 0x80) cuv++;
1154 sv = sv_2mortal(newSV(len ? len : 1));
1157 if (datumtype == 'b') {
1159 const I32 ai32 = len;
1160 for (len = 0; len < ai32; len++) {
1161 if (len & 7) bits >>= 1;
1163 if (s >= strend) break;
1164 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1165 } else bits = *(U8 *) s++;
1166 *str++ = bits & 1 ? '1' : '0';
1170 const I32 ai32 = len;
1171 for (len = 0; len < ai32; len++) {
1172 if (len & 7) bits <<= 1;
1174 if (s >= strend) break;
1175 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1176 } else bits = *(U8 *) s++;
1177 *str++ = bits & 0x80 ? '1' : '0';
1181 SvCUR_set(sv, str - SvPVX_const(sv));
1188 /* Preliminary length estimate, acceptable for utf8 too */
1189 if (howlen == e_star || len > (strend - s) * 2)
1190 len = (strend - s) * 2;
1192 sv = sv_2mortal(newSV(len ? len : 1));
1196 if (datumtype == 'h') {
1199 for (len = 0; len < ai32; len++) {
1200 if (len & 1) bits >>= 4;
1202 if (s >= strend) break;
1203 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1204 } else bits = * (U8 *) s++;
1206 *str++ = PL_hexdigit[bits & 15];
1210 const I32 ai32 = len;
1211 for (len = 0; len < ai32; len++) {
1212 if (len & 1) bits <<= 4;
1214 if (s >= strend) break;
1215 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1216 } else bits = *(U8 *) s++;
1218 *str++ = PL_hexdigit[(bits >> 4) & 15];
1223 SvCUR_set(sv, str - SvPVX_const(sv));
1230 if (explicit_length)
1231 /* Switch to "character" mode */
1232 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1237 while (len-- > 0 && s < strend) {
1242 aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1243 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1244 if (retlen == (STRLEN) -1 || retlen == 0)
1245 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1249 aint = *(U8 *)(s)++;
1250 if (aint >= 128 && datumtype != 'C') /* fake up signed chars */
1254 else if (checksum > bits_in_uv)
1255 cdouble += (NV)aint;
1263 while (len-- > 0 && s < strend) {
1265 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1266 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1267 if (retlen == (STRLEN) -1 || retlen == 0)
1268 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1272 else if (checksum > bits_in_uv)
1273 cdouble += (NV) val;
1277 } else if (!checksum)
1279 const U8 ch = *(U8 *) s++;
1282 else if (checksum > bits_in_uv)
1283 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1285 while (len-- > 0) cuv += *(U8 *) s++;
1289 if (explicit_length && howlen != e_star) {
1290 /* Switch to "bytes in UTF-8" mode */
1291 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
1293 /* Should be impossible due to the need_utf8() test */
1294 Perl_croak(aTHX_ "U0 mode on a byte string");
1298 if (len > strend - s) len = strend - s;
1300 if (len && unpack_only_one) len = 1;
1304 while (len-- > 0 && s < strend) {
1308 U8 result[UTF8_MAXLEN];
1309 const char *ptr = s;
1311 /* Bug: warns about bad utf8 even if we are short on bytes
1312 and will break out of the loop */
1313 if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1316 len = UTF8SKIP(result);
1317 if (!uni_to_bytes(aTHX_ &ptr, strend,
1318 (char *) &result[1], len-1, 'U')) break;
1319 auv = utf8n_to_uvchr(result, len, &retlen, UTF8_ALLOW_DEFAULT);
1322 auv = utf8n_to_uvchr((U8*)s, strend - s, &retlen, UTF8_ALLOW_DEFAULT);
1323 if (retlen == (STRLEN) -1 || retlen == 0)
1324 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1329 else if (checksum > bits_in_uv)
1330 cdouble += (NV) auv;
1335 case 's' | TYPE_IS_SHRIEKING:
1336 #if SHORTSIZE != SIZE16
1339 SHIFT_VAR(utf8, s, strend, ashort, datumtype, needs_swap);
1342 else if (checksum > bits_in_uv)
1343 cdouble += (NV)ashort;
1355 #if U16SIZE > SIZE16
1358 SHIFT16(utf8, s, strend, &ai16, datumtype, needs_swap);
1359 #if U16SIZE > SIZE16
1365 else if (checksum > bits_in_uv)
1366 cdouble += (NV)ai16;
1371 case 'S' | TYPE_IS_SHRIEKING:
1372 #if SHORTSIZE != SIZE16
1374 unsigned short aushort;
1375 SHIFT_VAR(utf8, s, strend, aushort, datumtype, needs_swap,
1379 else if (checksum > bits_in_uv)
1380 cdouble += (NV)aushort;
1393 #if U16SIZE > SIZE16
1396 SHIFT16(utf8, s, strend, &au16, datumtype, needs_swap);
1397 if (datumtype == 'n')
1398 au16 = PerlSock_ntohs(au16);
1399 if (datumtype == 'v')
1403 else if (checksum > bits_in_uv)
1404 cdouble += (NV) au16;
1409 case 'v' | TYPE_IS_SHRIEKING:
1410 case 'n' | TYPE_IS_SHRIEKING:
1413 # if U16SIZE > SIZE16
1416 SHIFT16(utf8, s, strend, &ai16, datumtype, needs_swap);
1417 /* There should never be any byte-swapping here. */
1418 assert(!TYPE_ENDIANNESS(datumtype));
1419 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
1420 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1421 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
1422 ai16 = (I16) vtohs((U16) ai16);
1425 else if (checksum > bits_in_uv)
1426 cdouble += (NV) ai16;
1432 case 'i' | TYPE_IS_SHRIEKING:
1435 SHIFT_VAR(utf8, s, strend, aint, datumtype, needs_swap);
1438 else if (checksum > bits_in_uv)
1439 cdouble += (NV)aint;
1445 case 'I' | TYPE_IS_SHRIEKING:
1448 SHIFT_VAR(utf8, s, strend, auint, datumtype, needs_swap);
1451 else if (checksum > bits_in_uv)
1452 cdouble += (NV)auint;
1460 SHIFT_VAR(utf8, s, strend, aiv, datumtype, needs_swap);
1463 else if (checksum > bits_in_uv)
1472 SHIFT_VAR(utf8, s, strend, auv, datumtype, needs_swap);
1475 else if (checksum > bits_in_uv)
1481 case 'l' | TYPE_IS_SHRIEKING:
1482 #if LONGSIZE != SIZE32
1485 SHIFT_VAR(utf8, s, strend, along, datumtype, needs_swap);
1488 else if (checksum > bits_in_uv)
1489 cdouble += (NV)along;
1500 #if U32SIZE > SIZE32
1503 SHIFT32(utf8, s, strend, &ai32, datumtype, needs_swap);
1504 #if U32SIZE > SIZE32
1505 if (ai32 > 2147483647) ai32 -= 4294967296;
1509 else if (checksum > bits_in_uv)
1510 cdouble += (NV)ai32;
1515 case 'L' | TYPE_IS_SHRIEKING:
1516 #if LONGSIZE != SIZE32
1518 unsigned long aulong;
1519 SHIFT_VAR(utf8, s, strend, aulong, datumtype, needs_swap);
1522 else if (checksum > bits_in_uv)
1523 cdouble += (NV)aulong;
1536 #if U32SIZE > SIZE32
1539 SHIFT32(utf8, s, strend, &au32, datumtype, needs_swap);
1540 if (datumtype == 'N')
1541 au32 = PerlSock_ntohl(au32);
1542 if (datumtype == 'V')
1546 else if (checksum > bits_in_uv)
1547 cdouble += (NV)au32;
1552 case 'V' | TYPE_IS_SHRIEKING:
1553 case 'N' | TYPE_IS_SHRIEKING:
1556 #if U32SIZE > SIZE32
1559 SHIFT32(utf8, s, strend, &ai32, datumtype, needs_swap);
1560 /* There should never be any byte swapping here. */
1561 assert(!TYPE_ENDIANNESS(datumtype));
1562 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1563 ai32 = (I32)PerlSock_ntohl((U32)ai32);
1564 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1565 ai32 = (I32)vtohl((U32)ai32);
1568 else if (checksum > bits_in_uv)
1569 cdouble += (NV)ai32;
1577 SHIFT_VAR(utf8, s, strend, aptr, datumtype, needs_swap);
1578 /* newSVpv generates undef if aptr is NULL */
1579 mPUSHs(newSVpv(aptr, 0));
1587 while (len > 0 && s < strend) {
1589 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1590 auv = (auv << 7) | (ch & 0x7f);
1591 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
1599 if (++bytes >= sizeof(UV)) { /* promote to string */
1602 sv = Perl_newSVpvf(aTHX_ "%.*"UVuf, (int)TYPE_DIGITS(UV), auv);
1603 while (s < strend) {
1604 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1605 sv = mul128(sv, (U8)(ch & 0x7f));
1611 t = SvPV_nolen_const(sv);
1620 if ((s >= strend) && bytes)
1621 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
1625 if (symptr->howlen == e_star)
1626 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
1628 if (s + sizeof(char*) <= strend) {
1630 SHIFT_VAR(utf8, s, strend, aptr, datumtype, needs_swap);
1631 /* newSVpvn generates undef if aptr is NULL */
1632 PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP));
1639 SHIFT_VAR(utf8, s, strend, aquad, datumtype, needs_swap);
1641 mPUSHs(aquad >= IV_MIN && aquad <= IV_MAX ?
1642 newSViv((IV)aquad) : newSVnv((NV)aquad));
1643 else if (checksum > bits_in_uv)
1644 cdouble += (NV)aquad;
1652 SHIFT_VAR(utf8, s, strend, auquad, datumtype, needs_swap);
1654 mPUSHs(auquad <= UV_MAX ?
1655 newSVuv((UV)auquad) : newSVnv((NV)auquad));
1656 else if (checksum > bits_in_uv)
1657 cdouble += (NV)auquad;
1663 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
1667 SHIFT_VAR(utf8, s, strend, afloat, datumtype, needs_swap);
1677 SHIFT_VAR(utf8, s, strend, adouble, datumtype, needs_swap);
1687 SHIFT_BYTES(utf8, s, strend, anv.bytes, sizeof(anv.bytes),
1688 datumtype, needs_swap);
1695 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
1699 SHIFT_BYTES(utf8, s, strend, aldouble.bytes,
1700 sizeof(aldouble.bytes), datumtype, needs_swap);
1702 mPUSHn(aldouble.ld);
1704 cdouble += aldouble.ld;
1710 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
1711 sv = sv_2mortal(newSV(l));
1712 if (l) SvPOK_on(sv);
1715 while (next_uni_uu(aTHX_ &s, strend, &len)) {
1720 next_uni_uu(aTHX_ &s, strend, &a);
1721 next_uni_uu(aTHX_ &s, strend, &b);
1722 next_uni_uu(aTHX_ &s, strend, &c);
1723 next_uni_uu(aTHX_ &s, strend, &d);
1724 hunk[0] = (char)((a << 2) | (b >> 4));
1725 hunk[1] = (char)((b << 4) | (c >> 2));
1726 hunk[2] = (char)((c << 6) | d);
1728 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
1736 /* possible checksum byte */
1737 const char *skip = s+UTF8SKIP(s);
1738 if (skip < strend && *skip == '\n')
1744 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
1748 len = PL_uudmap[*(U8*)s++] & 077;
1750 if (s < strend && ISUUCHAR(*s))
1751 a = PL_uudmap[*(U8*)s++] & 077;
1754 if (s < strend && ISUUCHAR(*s))
1755 b = PL_uudmap[*(U8*)s++] & 077;
1758 if (s < strend && ISUUCHAR(*s))
1759 c = PL_uudmap[*(U8*)s++] & 077;
1762 if (s < strend && ISUUCHAR(*s))
1763 d = PL_uudmap[*(U8*)s++] & 077;
1766 hunk[0] = (char)((a << 2) | (b >> 4));
1767 hunk[1] = (char)((b << 4) | (c >> 2));
1768 hunk[2] = (char)((c << 6) | d);
1770 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
1775 else /* possible checksum byte */
1776 if (s + 1 < strend && s[1] == '\n')
1786 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
1787 (checksum > bits_in_uv &&
1788 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
1791 anv = (NV) (1 << (checksum & 15));
1792 while (checksum >= 16) {
1796 while (cdouble < 0.0)
1798 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
1799 sv = newSVnv(cdouble);
1802 if (checksum < bits_in_uv) {
1803 UV mask = ((UV)1 << checksum) - 1;
1812 if (symptr->flags & FLAG_SLASH){
1813 if (SP - PL_stack_base - start_sp_offset <= 0)
1815 if( next_symbol(symptr) ){
1816 if( symptr->howlen == e_number )
1817 Perl_croak(aTHX_ "Count after length/code in unpack" );
1819 /* ...end of char buffer then no decent length available */
1820 Perl_croak(aTHX_ "length/code after end of string in unpack" );
1822 /* take top of stack (hope it's numeric) */
1825 Perl_croak(aTHX_ "Negative '/' count in unpack" );
1828 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
1830 datumtype = symptr->code;
1831 explicit_length = FALSE;
1839 return SP - PL_stack_base - start_sp_offset;
1847 I32 gimme = GIMME_V;
1850 const char *pat = SvPV_const(left, llen);
1851 const char *s = SvPV_const(right, rlen);
1852 const char *strend = s + rlen;
1853 const char *patend = pat + llen;
1857 cnt = unpackstring(pat, patend, s, strend,
1858 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
1859 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
1862 if ( !cnt && gimme == G_SCALAR )
1863 PUSHs(&PL_sv_undef);
1868 doencodes(U8 *h, const char *s, I32 len)
1870 *h++ = PL_uuemap[len];
1872 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1873 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
1874 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
1875 *h++ = PL_uuemap[(077 & (s[2] & 077))];
1880 const char r = (len > 1 ? s[1] : '\0');
1881 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1882 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
1883 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
1884 *h++ = PL_uuemap[0];
1891 S_is_an_int(pTHX_ const char *s, STRLEN l)
1893 SV *result = newSVpvn(s, l);
1894 char *const result_c = SvPV_nolen(result); /* convenience */
1895 char *out = result_c;
1899 PERL_ARGS_ASSERT_IS_AN_INT;
1907 SvREFCNT_dec(result);
1930 SvREFCNT_dec(result);
1936 SvCUR_set(result, out - result_c);
1940 /* pnum must be '\0' terminated */
1942 S_div128(pTHX_ SV *pnum, bool *done)
1945 char * const s = SvPV(pnum, len);
1949 PERL_ARGS_ASSERT_DIV128;
1953 const int i = m * 10 + (*t - '0');
1954 const int r = (i >> 7); /* r < 10 */
1962 SvCUR_set(pnum, (STRLEN) (t - s));
1967 =for apidoc packlist
1969 The engine implementing pack() Perl function.
1975 Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, SV **beglist, SV **endlist )
1980 PERL_ARGS_ASSERT_PACKLIST;
1982 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
1984 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
1985 Also make sure any UTF8 flag is loaded */
1986 SvPV_force_nolen(cat);
1988 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
1990 (void)pack_rec( cat, &sym, beglist, endlist );
1993 /* like sv_utf8_upgrade, but also repoint the group start markers */
1995 marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
1998 const char *from_ptr, *from_start, *from_end, **marks, **m;
1999 char *to_start, *to_ptr;
2001 if (SvUTF8(sv)) return;
2003 from_start = SvPVX_const(sv);
2004 from_end = from_start + SvCUR(sv);
2005 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2006 if (!NATIVE_BYTE_IS_INVARIANT(*from_ptr)) break;
2007 if (from_ptr == from_end) {
2008 /* Simple case: no character needs to be changed */
2013 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
2014 Newx(to_start, len, char);
2015 Copy(from_start, to_start, from_ptr-from_start, char);
2016 to_ptr = to_start + (from_ptr-from_start);
2018 Newx(marks, sym_ptr->level+2, const char *);
2019 for (group=sym_ptr; group; group = group->previous)
2020 marks[group->level] = from_start + group->strbeg;
2021 marks[sym_ptr->level+1] = from_end+1;
2022 for (m = marks; *m < from_ptr; m++)
2023 *m = to_start + (*m-from_start);
2025 for (;from_ptr < from_end; from_ptr++) {
2026 while (*m == from_ptr) *m++ = to_ptr;
2027 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
2031 while (*m == from_ptr) *m++ = to_ptr;
2032 if (m != marks + sym_ptr->level+1) {
2035 Perl_croak(aTHX_ "panic: marks beyond string end, m=%p, marks=%p, "
2036 "level=%d", m, marks, sym_ptr->level);
2038 for (group=sym_ptr; group; group = group->previous)
2039 group->strbeg = marks[group->level] - to_start;
2044 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
2045 from_start -= SvIVX(sv);
2048 SvFLAGS(sv) &= ~SVf_OOK;
2051 Safefree(from_start);
2052 SvPV_set(sv, to_start);
2053 SvCUR_set(sv, to_ptr - to_start);
2058 /* Exponential string grower. Makes string extension effectively O(n)
2059 needed says how many extra bytes we need (not counting the final '\0')
2060 Only grows the string if there is an actual lack of space
2063 S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
2064 const STRLEN cur = SvCUR(sv);
2065 const STRLEN len = SvLEN(sv);
2068 PERL_ARGS_ASSERT_SV_EXP_GROW;
2070 if (len - cur > needed) return SvPVX(sv);
2071 extend = needed > len ? needed : len;
2072 return SvGROW(sv, len+extend+1);
2077 S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
2080 tempsym_t lookahead;
2081 I32 items = endlist - beglist;
2082 bool found = next_symbol(symptr);
2083 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
2084 bool warn_utf8 = ckWARN(WARN_UTF8);
2086 PERL_ARGS_ASSERT_PACK_REC;
2088 if (symptr->level == 0 && found && symptr->code == 'U') {
2089 marked_upgrade(aTHX_ cat, symptr);
2090 symptr->flags |= FLAG_DO_UTF8;
2093 symptr->strbeg = SvCUR(cat);
2099 SV *lengthcode = NULL;
2100 I32 datumtype = symptr->code;
2101 howlen_t howlen = symptr->howlen;
2102 char *start = SvPVX(cat);
2103 char *cur = start + SvCUR(cat);
2106 #define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2110 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2114 /* e_no_len and e_number */
2115 len = symptr->length;
2120 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
2122 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2123 /* We can process this letter. */
2124 STRLEN size = props & PACK_SIZE_MASK;
2125 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2129 /* Look ahead for next symbol. Do we have code/code? */
2130 lookahead = *symptr;
2131 found = next_symbol(&lookahead);
2132 if (symptr->flags & FLAG_SLASH) {
2134 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
2135 if (strchr("aAZ", lookahead.code)) {
2136 if (lookahead.howlen == e_number) count = lookahead.length;
2139 count = sv_len_utf8(*beglist);
2142 if (lookahead.code == 'Z') count++;
2145 if (lookahead.howlen == e_number && lookahead.length < items)
2146 count = lookahead.length;
2149 lookahead.howlen = e_number;
2150 lookahead.length = count;
2151 lengthcode = sv_2mortal(newSViv(count));
2154 needs_swap = NEEDS_SWAP(datumtype);
2156 /* Code inside the switch must take care to properly update
2157 cat (CUR length and '\0' termination) if it updated *cur and
2158 doesn't simply leave using break */
2159 switch(TYPE_NO_ENDIANNESS(datumtype)) {
2161 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2162 (int) TYPE_NO_MODIFIERS(datumtype));
2164 Perl_croak(aTHX_ "'%%' may not be used in pack");
2167 case '.' | TYPE_IS_SHRIEKING:
2169 if (howlen == e_star) from = start;
2170 else if (len == 0) from = cur;
2172 tempsym_t *group = symptr;
2174 while (--len && group) group = group->previous;
2175 from = group ? start + group->strbeg : start;
2178 len = SvIV(fromstr);
2180 case '@' | TYPE_IS_SHRIEKING:
2182 from = start + symptr->strbeg;
2184 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
2186 while (len && from < cur) {
2187 from += UTF8SKIP(from);
2191 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2193 /* Here we know from == cur */
2195 GROWING(0, cat, start, cur, len);
2196 Zero(cur, len, char);
2198 } else if (from < cur) {
2201 } else goto no_change;
2209 if (len > 0) goto grow;
2210 if (len == 0) goto no_change;
2217 tempsym_t savsym = *symptr;
2218 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2219 symptr->flags |= group_modifiers;
2220 symptr->patend = savsym.grpend;
2222 symptr->previous = &lookahead;
2225 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2226 else symptr->flags &= ~FLAG_PARSE_UTF8;
2227 was_utf8 = SvUTF8(cat);
2228 symptr->patptr = savsym.grpbeg;
2229 beglist = pack_rec(cat, symptr, beglist, endlist);
2230 if (SvUTF8(cat) != was_utf8)
2231 /* This had better be an upgrade while in utf8==0 mode */
2234 if (savsym.howlen == e_star && beglist == endlist)
2235 break; /* No way to continue */
2237 items = endlist - beglist;
2238 lookahead.flags = symptr->flags & ~group_modifiers;
2241 case 'X' | TYPE_IS_SHRIEKING:
2242 if (!len) /* Avoid division by 0 */
2249 hop += UTF8SKIP(hop);
2256 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2260 len = (cur-start) % len;
2264 if (len < 1) goto no_change;
2268 Perl_croak(aTHX_ "'%c' outside of string in pack",
2269 (int) TYPE_NO_MODIFIERS(datumtype));
2270 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2272 Perl_croak(aTHX_ "'%c' outside of string in pack",
2273 (int) TYPE_NO_MODIFIERS(datumtype));
2279 if (cur - start < len)
2280 Perl_croak(aTHX_ "'%c' outside of string in pack",
2281 (int) TYPE_NO_MODIFIERS(datumtype));
2284 if (cur < start+symptr->strbeg) {
2285 /* Make sure group starts don't point into the void */
2287 const STRLEN length = cur-start;
2288 for (group = symptr;
2289 group && length < group->strbeg;
2290 group = group->previous) group->strbeg = length;
2291 lookahead.strbeg = length;
2294 case 'x' | TYPE_IS_SHRIEKING: {
2296 if (!len) /* Avoid division by 0 */
2298 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
2299 else ai32 = (cur - start) % len;
2300 if (ai32 == 0) goto no_change;
2312 aptr = SvPV_const(fromstr, fromlen);
2313 if (DO_UTF8(fromstr)) {
2314 const char *end, *s;
2316 if (!utf8 && !SvUTF8(cat)) {
2317 marked_upgrade(aTHX_ cat, symptr);
2318 lookahead.flags |= FLAG_DO_UTF8;
2319 lookahead.strbeg = symptr->strbeg;
2322 cur = start + SvCUR(cat);
2324 if (howlen == e_star) {
2325 if (utf8) goto string_copy;
2329 end = aptr + fromlen;
2330 fromlen = datumtype == 'Z' ? len-1 : len;
2331 while ((I32) fromlen > 0 && s < end) {
2336 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2339 if (datumtype == 'Z') len++;
2345 fromlen = len - fromlen;
2346 if (datumtype == 'Z') fromlen--;
2347 if (howlen == e_star) {
2349 if (datumtype == 'Z') len++;
2351 GROWING(0, cat, start, cur, len);
2352 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
2353 datumtype | TYPE_IS_PACK))
2354 Perl_croak(aTHX_ "panic: predicted utf8 length not available, "
2355 "for '%c', aptr=%p end=%p cur=%p, fromlen=%"UVuf,
2356 (int)datumtype, aptr, end, cur, (UV)fromlen);
2360 if (howlen == e_star) {
2362 if (datumtype == 'Z') len++;
2364 if (len <= (I32) fromlen) {
2366 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2368 /* assumes a byte expands to at most UTF8_EXPAND bytes on
2370 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2371 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
2373 while (fromlen > 0) {
2374 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
2380 if (howlen == e_star) {
2382 if (datumtype == 'Z') len++;
2384 if (len <= (I32) fromlen) {
2386 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2388 GROWING(0, cat, start, cur, len);
2389 Copy(aptr, cur, fromlen, char);
2393 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2400 const char *str, *end;
2407 str = SvPV_const(fromstr, fromlen);
2408 end = str + fromlen;
2409 if (DO_UTF8(fromstr)) {
2411 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2413 utf8_source = FALSE;
2414 utf8_flags = 0; /* Unused, but keep compilers happy */
2416 if (howlen == e_star) len = fromlen;
2417 field_len = (len+7)/8;
2418 GROWING(utf8, cat, start, cur, field_len);
2419 if (len > (I32)fromlen) len = fromlen;
2422 if (datumtype == 'B')
2426 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2428 } else bits |= *str++ & 1;
2429 if (l & 7) bits <<= 1;
2431 PUSH_BYTE(utf8, cur, bits);
2436 /* datumtype == 'b' */
2440 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2441 if (val & 1) bits |= 0x80;
2442 } else if (*str++ & 1)
2444 if (l & 7) bits >>= 1;
2446 PUSH_BYTE(utf8, cur, bits);
2452 if (datumtype == 'B')
2453 bits <<= 7 - (l & 7);
2455 bits >>= 7 - (l & 7);
2456 PUSH_BYTE(utf8, cur, bits);
2459 /* Determine how many chars are left in the requested field */
2461 if (howlen == e_star) field_len = 0;
2462 else field_len -= l;
2463 Zero(cur, field_len, char);
2469 const char *str, *end;
2476 str = SvPV_const(fromstr, fromlen);
2477 end = str + fromlen;
2478 if (DO_UTF8(fromstr)) {
2480 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2482 utf8_source = FALSE;
2483 utf8_flags = 0; /* Unused, but keep compilers happy */
2485 if (howlen == e_star) len = fromlen;
2486 field_len = (len+1)/2;
2487 GROWING(utf8, cat, start, cur, field_len);
2488 if (!utf8 && len > (I32)fromlen) len = fromlen;
2491 if (datumtype == 'H')
2495 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2496 if (val < 256 && isALPHA(val))
2497 bits |= (val + 9) & 0xf;
2500 } else if (isALPHA(*str))
2501 bits |= (*str++ + 9) & 0xf;
2503 bits |= *str++ & 0xf;
2504 if (l & 1) bits <<= 4;
2506 PUSH_BYTE(utf8, cur, bits);
2514 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2515 if (val < 256 && isALPHA(val))
2516 bits |= ((val + 9) & 0xf) << 4;
2518 bits |= (val & 0xf) << 4;
2519 } else if (isALPHA(*str))
2520 bits |= ((*str++ + 9) & 0xf) << 4;
2522 bits |= (*str++ & 0xf) << 4;
2523 if (l & 1) bits >>= 4;
2525 PUSH_BYTE(utf8, cur, bits);
2531 PUSH_BYTE(utf8, cur, bits);
2534 /* Determine how many chars are left in the requested field */
2536 if (howlen == e_star) field_len = 0;
2537 else field_len -= l;
2538 Zero(cur, field_len, char);
2546 aiv = SvIV(fromstr);
2547 if ((-128 > aiv || aiv > 127))
2548 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2549 "Character in 'c' format wrapped in pack");
2550 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
2555 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2561 aiv = SvIV(fromstr);
2562 if ((0 > aiv || aiv > 0xff))
2563 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2564 "Character in 'C' format wrapped in pack");
2565 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
2570 U8 in_bytes = (U8)IN_BYTES;
2572 end = start+SvLEN(cat)-1;
2573 if (utf8) end -= UTF8_MAXLEN-1;
2577 auv = SvUV(fromstr);
2578 if (in_bytes) auv = auv % 0x100;
2583 SvCUR_set(cat, cur - start);
2585 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2586 end = start+SvLEN(cat)-UTF8_MAXLEN;
2588 cur = (char *) uvchr_to_utf8_flags((U8 *) cur,
2591 0 : UNICODE_ALLOW_ANY);
2596 SvCUR_set(cat, cur - start);
2597 marked_upgrade(aTHX_ cat, symptr);
2598 lookahead.flags |= FLAG_DO_UTF8;
2599 lookahead.strbeg = symptr->strbeg;
2602 cur = start + SvCUR(cat);
2603 end = start+SvLEN(cat)-UTF8_MAXLEN;
2606 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2607 "Character in 'W' format wrapped in pack");
2612 SvCUR_set(cat, cur - start);
2613 GROWING(0, cat, start, cur, len+1);
2614 end = start+SvLEN(cat)-1;
2616 *(U8 *) cur++ = (U8)auv;
2625 if (!(symptr->flags & FLAG_DO_UTF8)) {
2626 marked_upgrade(aTHX_ cat, symptr);
2627 lookahead.flags |= FLAG_DO_UTF8;
2628 lookahead.strbeg = symptr->strbeg;
2634 end = start+SvLEN(cat);
2635 if (!utf8) end -= UTF8_MAXLEN;
2639 auv = SvUV(fromstr);
2641 U8 buffer[UTF8_MAXLEN], *endb;
2642 endb = uvchr_to_utf8_flags(buffer, auv,
2644 0 : UNICODE_ALLOW_ANY);
2645 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
2647 SvCUR_set(cat, cur - start);
2648 GROWING(0, cat, start, cur,
2649 len+(endb-buffer)*UTF8_EXPAND);
2650 end = start+SvLEN(cat);
2652 cur = S_bytes_to_uni(buffer, endb-buffer, cur, 0);
2656 SvCUR_set(cat, cur - start);
2657 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2658 end = start+SvLEN(cat)-UTF8_MAXLEN;
2660 cur = (char *) uvchr_to_utf8_flags((U8 *) cur, auv,
2662 0 : UNICODE_ALLOW_ANY);
2667 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
2673 anv = SvNV(fromstr);
2674 # if defined(VMS) && !defined(_IEEE_FP)
2675 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
2676 * on Alpha; fake it if we don't have them.
2680 else if (anv < -FLT_MAX)
2682 else afloat = (float)anv;
2684 afloat = (float)anv;
2686 PUSH_VAR(utf8, cur, afloat, needs_swap);
2694 anv = SvNV(fromstr);
2695 # if defined(VMS) && !defined(_IEEE_FP)
2696 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
2697 * on Alpha; fake it if we don't have them.
2701 else if (anv < -DBL_MAX)
2703 else adouble = (double)anv;
2705 adouble = (double)anv;
2707 PUSH_VAR(utf8, cur, adouble, needs_swap);
2712 Zero(&anv, 1, NV); /* can be long double with unused bits */
2716 /* to work round a gcc/x86 bug; don't use SvNV */
2717 anv.nv = sv_2nv(fromstr);
2719 anv.nv = SvNV(fromstr);
2721 PUSH_BYTES(utf8, cur, anv.bytes, sizeof(anv.bytes), needs_swap);
2725 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2728 /* long doubles can have unused bits, which may be nonzero */
2729 Zero(&aldouble, 1, long double);
2733 /* to work round a gcc/x86 bug; don't use SvNV */
2734 aldouble.ld = (long double)sv_2nv(fromstr);
2736 aldouble.ld = (long double)SvNV(fromstr);
2738 PUSH_BYTES(utf8, cur, aldouble.bytes, sizeof(aldouble.bytes),
2744 case 'n' | TYPE_IS_SHRIEKING:
2749 ai16 = (I16)SvIV(fromstr);
2750 ai16 = PerlSock_htons(ai16);
2751 PUSH16(utf8, cur, &ai16, FALSE);
2754 case 'v' | TYPE_IS_SHRIEKING:
2759 ai16 = (I16)SvIV(fromstr);
2761 PUSH16(utf8, cur, &ai16, FALSE);
2764 case 'S' | TYPE_IS_SHRIEKING:
2765 #if SHORTSIZE != SIZE16
2767 unsigned short aushort;
2769 aushort = SvUV(fromstr);
2770 PUSH_VAR(utf8, cur, aushort, needs_swap);
2780 au16 = (U16)SvUV(fromstr);
2781 PUSH16(utf8, cur, &au16, needs_swap);
2784 case 's' | TYPE_IS_SHRIEKING:
2785 #if SHORTSIZE != SIZE16
2789 ashort = SvIV(fromstr);
2790 PUSH_VAR(utf8, cur, ashort, needs_swap);
2800 ai16 = (I16)SvIV(fromstr);
2801 PUSH16(utf8, cur, &ai16, needs_swap);
2805 case 'I' | TYPE_IS_SHRIEKING:
2809 auint = SvUV(fromstr);
2810 PUSH_VAR(utf8, cur, auint, needs_swap);
2817 aiv = SvIV(fromstr);
2818 PUSH_VAR(utf8, cur, aiv, needs_swap);
2825 auv = SvUV(fromstr);
2826 PUSH_VAR(utf8, cur, auv, needs_swap);
2833 anv = SvNV(fromstr);
2837 SvCUR_set(cat, cur - start);
2838 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
2841 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
2842 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
2843 any negative IVs will have already been got by the croak()
2844 above. IOK is untrue for fractions, so we test them
2845 against UV_MAX_P1. */
2846 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
2847 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
2848 char *in = buf + sizeof(buf);
2849 UV auv = SvUV(fromstr);
2852 *--in = (char)((auv & 0x7f) | 0x80);
2855 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
2856 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2857 in, (buf + sizeof(buf)) - in);
2858 } else if (SvPOKp(fromstr))
2860 else if (SvNOKp(fromstr)) {
2861 /* 10**NV_MAX_10_EXP is the largest power of 10
2862 so 10**(NV_MAX_10_EXP+1) is definitely unrepresentable
2863 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
2864 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
2865 And with that many bytes only Inf can overflow.
2866 Some C compilers are strict about integral constant
2867 expressions so we conservatively divide by a slightly
2868 smaller integer instead of multiplying by the exact
2869 floating-point value.
2871 #ifdef NV_MAX_10_EXP
2872 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
2873 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
2875 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
2876 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
2878 char *in = buf + sizeof(buf);
2880 anv = Perl_floor(anv);
2882 const NV next = Perl_floor(anv / 128);
2883 if (in <= buf) /* this cannot happen ;-) */
2884 Perl_croak(aTHX_ "Cannot compress integer in pack");
2885 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
2888 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
2889 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2890 in, (buf + sizeof(buf)) - in);
2899 /* Copy string and check for compliance */
2900 from = SvPV_const(fromstr, len);
2901 if ((norm = is_an_int(from, len)) == NULL)
2902 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
2904 Newx(result, len, char);
2907 while (!done) *--in = div128(norm, &done) | 0x80;
2908 result[len - 1] &= 0x7F; /* clear continue bit */
2909 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2910 in, (result + len) - in);
2912 SvREFCNT_dec(norm); /* free norm */
2917 case 'i' | TYPE_IS_SHRIEKING:
2921 aint = SvIV(fromstr);
2922 PUSH_VAR(utf8, cur, aint, needs_swap);
2925 case 'N' | TYPE_IS_SHRIEKING:
2930 au32 = SvUV(fromstr);
2931 au32 = PerlSock_htonl(au32);
2932 PUSH32(utf8, cur, &au32, FALSE);
2935 case 'V' | TYPE_IS_SHRIEKING:
2940 au32 = SvUV(fromstr);
2942 PUSH32(utf8, cur, &au32, FALSE);
2945 case 'L' | TYPE_IS_SHRIEKING:
2946 #if LONGSIZE != SIZE32
2948 unsigned long aulong;
2950 aulong = SvUV(fromstr);
2951 PUSH_VAR(utf8, cur, aulong, needs_swap);
2961 au32 = SvUV(fromstr);
2962 PUSH32(utf8, cur, &au32, needs_swap);
2965 case 'l' | TYPE_IS_SHRIEKING:
2966 #if LONGSIZE != SIZE32
2970 along = SvIV(fromstr);
2971 PUSH_VAR(utf8, cur, along, needs_swap);
2981 ai32 = SvIV(fromstr);
2982 PUSH32(utf8, cur, &ai32, needs_swap);
2990 auquad = (Uquad_t) SvUV(fromstr);
2991 PUSH_VAR(utf8, cur, auquad, needs_swap);
2998 aquad = (Quad_t)SvIV(fromstr);
2999 PUSH_VAR(utf8, cur, aquad, needs_swap);
3004 len = 1; /* assume SV is correct length */
3005 GROWING(utf8, cat, start, cur, sizeof(char *));
3012 SvGETMAGIC(fromstr);
3013 if (!SvOK(fromstr)) aptr = NULL;
3015 /* XXX better yet, could spirit away the string to
3016 * a safe spot and hang on to it until the result
3017 * of pack() (and all copies of the result) are
3020 if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
3021 !SvREADONLY(fromstr)))) {
3022 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3023 "Attempt to pack pointer to temporary value");
3025 if (SvPOK(fromstr) || SvNIOK(fromstr))
3026 aptr = SvPV_nomg_const_nolen(fromstr);
3028 aptr = SvPV_force_flags_nolen(fromstr, 0);
3030 PUSH_VAR(utf8, cur, aptr, needs_swap);
3034 const char *aptr, *aend;
3038 if (len <= 2) len = 45;
3039 else len = len / 3 * 3;
3041 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3042 "Field too wide in 'u' format in pack");
3045 aptr = SvPV_const(fromstr, fromlen);
3046 from_utf8 = DO_UTF8(fromstr);
3048 aend = aptr + fromlen;
3049 fromlen = sv_len_utf8_nomg(fromstr);
3050 } else aend = NULL; /* Unused, but keep compilers happy */
3051 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
3052 while (fromlen > 0) {
3055 U8 hunk[1+63/3*4+1];
3057 if ((I32)fromlen > len)
3063 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3064 'u' | TYPE_IS_PACK)) {
3066 SvCUR_set(cat, cur - start);
3067 Perl_croak(aTHX_ "panic: string is shorter than advertised, "
3068 "aptr=%p, aend=%p, buffer=%p, todo=%ld",
3069 aptr, aend, buffer, (long) todo);
3071 end = doencodes(hunk, buffer, todo);
3073 end = doencodes(hunk, aptr, todo);
3076 PUSH_BYTES(utf8, cur, hunk, end-hunk, 0);
3083 SvCUR_set(cat, cur - start);
3085 *symptr = lookahead;
3094 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
3097 SV *pat_sv = *++MARK;
3098 const char *pat = SvPV_const(pat_sv, fromlen);
3099 const char *patend = pat + fromlen;
3105 packlist(cat, pat, patend, MARK, SP + 1);
3115 * c-indentation-style: bsd
3117 * indent-tabs-mode: nil
3120 * ex: set ts=8 sts=4 sw=4 et: