3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
18 * [p.653 of _The Lord of the Rings_, IV/iv: "Of Herbs and Stewed Rabbit"]
21 /* This file contains pp ("push/pop") functions that
22 * execute the opcodes that make up a perl program. A typical pp function
23 * expects to find its arguments on the stack, and usually pushes its
24 * results onto the stack, hence the 'pp' terminology. Each OP structure
25 * contains a pointer to the relevant pp_foo() function.
27 * This particular file just contains pp_pack() and pp_unpack(). See the
28 * other pp*.c files for the rest of the pp_ functions.
32 #define PERL_IN_PP_PACK_C
35 /* Types used by pack/unpack */
37 e_no_len, /* no length */
38 e_number, /* number, [] */
42 typedef struct tempsym {
43 const char* patptr; /* current template char */
44 const char* patend; /* one after last char */
45 const char* grpbeg; /* 1st char of ()-group */
46 const char* grpend; /* end of ()-group */
47 I32 code; /* template code (!<>) */
48 I32 length; /* length/repeat count */
49 howlen_t howlen; /* how length is given */
50 int level; /* () nesting level */
51 U32 flags; /* /=4, comma=2, pack=1 */
52 /* and group modifiers */
53 STRLEN strbeg; /* offset of group start */
54 struct tempsym *previous; /* previous group */
57 #define TEMPSYM_INIT(symptr, p, e, f) \
59 (symptr)->patptr = (p); \
60 (symptr)->patend = (e); \
61 (symptr)->grpbeg = NULL; \
62 (symptr)->grpend = NULL; \
63 (symptr)->grpend = NULL; \
65 (symptr)->length = 0; \
66 (symptr)->howlen = e_no_len; \
67 (symptr)->level = 0; \
68 (symptr)->flags = (f); \
69 (symptr)->strbeg = 0; \
70 (symptr)->previous = NULL; \
78 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
81 U8 bytes[sizeof(long double)];
88 /* Maximum number of bytes to which a byte can grow due to upgrade */
92 * Offset for integer pack/unpack.
94 * On architectures where I16 and I32 aren't really 16 and 32 bits,
95 * which for now are all Crays, pack and unpack have to play games.
99 * These values are required for portability of pack() output.
100 * If they're not right on your machine, then pack() and unpack()
101 * wouldn't work right anyway; you'll need to apply the Cray hack.
102 * (I'd like to check them with #if, but you can't use sizeof() in
103 * the preprocessor.) --???
106 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
107 defines are now in config.h. --Andy Dougherty April 1998
112 /* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
115 #if U16SIZE > SIZE16 || U32SIZE > SIZE32
116 # if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
117 # define OFF16(p) ((char*)(p))
118 # define OFF32(p) ((char*)(p))
120 # if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
121 # define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
122 # define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
124 ++++ bad cray byte order
128 # define OFF16(p) ((char *) (p))
129 # define OFF32(p) ((char *) (p))
132 #define PUSH16(utf8, cur, p, needs_swap) \
133 PUSH_BYTES(utf8, cur, OFF16(p), SIZE16, needs_swap)
134 #define PUSH32(utf8, cur, p, needs_swap) \
135 PUSH_BYTES(utf8, cur, OFF32(p), SIZE32, needs_swap)
137 #if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
138 # define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_LITTLE_ENDIAN)
139 #elif BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
140 # define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_BIG_ENDIAN)
142 # error "Unsupported byteorder"
143 /* Need to add code here to re-instate mixed endian support.
144 NEEDS_SWAP would need to hold a flag indicating which action to
145 take, and S_reverse_copy and the code in uni_to_bytes would need
146 logic adding to deal with any mixed-endian transformations needed.
150 /* Only to be used inside a loop (see the break) */
151 #define SHIFT_BYTES(utf8, s, strend, buf, len, datumtype, needs_swap) \
153 if (UNLIKELY(utf8)) { \
154 if (!uni_to_bytes(aTHX_ &s, strend, \
155 (char *) (buf), len, datumtype)) break; \
157 if (UNLIKELY(needs_swap)) \
158 S_reverse_copy(s, (char *) (buf), len); \
160 Copy(s, (char *) (buf), len, char); \
165 #define SHIFT16(utf8, s, strend, p, datumtype, needs_swap) \
166 SHIFT_BYTES(utf8, s, strend, OFF16(p), SIZE16, datumtype, needs_swap)
168 #define SHIFT32(utf8, s, strend, p, datumtype, needs_swap) \
169 SHIFT_BYTES(utf8, s, strend, OFF32(p), SIZE32, datumtype, needs_swap)
171 #define SHIFT_VAR(utf8, s, strend, var, datumtype, needs_swap) \
172 SHIFT_BYTES(utf8, s, strend, &(var), sizeof(var), datumtype, needs_swap)
174 #define PUSH_VAR(utf8, aptr, var, needs_swap) \
175 PUSH_BYTES(utf8, aptr, &(var), sizeof(var), needs_swap)
177 /* Avoid stack overflow due to pathological templates. 100 should be plenty. */
178 #define MAX_SUB_TEMPLATE_LEVEL 100
180 /* flags (note that type modifiers can also be used as flags!) */
181 #define FLAG_WAS_UTF8 0x40
182 #define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
183 #define FLAG_UNPACK_ONLY_ONE 0x10
184 #define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
185 #define FLAG_SLASH 0x04
186 #define FLAG_COMMA 0x02
187 #define FLAG_PACK 0x01
190 S_mul128(pTHX_ SV *sv, U8 m)
193 char *s = SvPV(sv, len);
196 PERL_ARGS_ASSERT_MUL128;
198 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
199 SV * const tmpNew = newSVpvs("0000000000");
201 sv_catsv(tmpNew, sv);
202 SvREFCNT_dec(sv); /* free old sv */
207 while (!*t) /* trailing '\0'? */
210 const U32 i = ((*t - '0') << 7) + m;
211 *(t--) = '0' + (char)(i % 10);
217 /* Explosives and implosives. */
219 #if 'I' == 73 && 'J' == 74
220 /* On an ASCII/ISO kind of system */
221 #define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
224 Some other sort of character set - use memchr() so we don't match
227 #define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
231 #define TYPE_IS_SHRIEKING 0x100
232 #define TYPE_IS_BIG_ENDIAN 0x200
233 #define TYPE_IS_LITTLE_ENDIAN 0x400
234 #define TYPE_IS_PACK 0x800
235 #define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
236 #define TYPE_MODIFIERS(t) ((t) & ~0xFF)
237 #define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
239 # define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
240 # define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
242 # define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
244 #define PACK_SIZE_CANNOT_CSUM 0x80
245 #define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
246 #define PACK_SIZE_MASK 0x3F
248 #include "packsizetables.c"
251 S_reverse_copy(const char *src, char *dest, STRLEN len)
259 uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
262 UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
263 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
264 /* We try to process malformed UTF-8 as much as possible (preferably with
265 warnings), but these two mean we make no progress in the string and
266 might enter an infinite loop */
267 if (retlen == (STRLEN) -1 || retlen == 0)
268 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
269 (int) TYPE_NO_MODIFIERS(datumtype));
271 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
272 "Character in '%c' format wrapped in unpack",
273 (int) TYPE_NO_MODIFIERS(datumtype));
280 #define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
281 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
285 uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
289 const char *from = *s;
291 const U32 flags = ckWARN(WARN_UTF8) ?
292 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
293 const bool needs_swap = NEEDS_SWAP(datumtype);
295 if (UNLIKELY(needs_swap))
298 for (;buf_len > 0; buf_len--) {
299 if (from >= end) return FALSE;
300 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
301 if (retlen == (STRLEN) -1 || retlen == 0) {
302 from += UTF8SKIP(from);
304 } else from += retlen;
309 if (UNLIKELY(needs_swap))
310 *(U8 *)--buf = (U8)val;
312 *(U8 *)buf++ = (U8)val;
314 /* We have enough characters for the buffer. Did we have problems ? */
317 /* Rewalk the string fragment while warning */
319 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
320 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
321 if (ptr >= end) break;
322 utf8n_to_uvchr((U8 *) ptr, end-ptr, &retlen, flags);
324 if (from > end) from = end;
327 Perl_ck_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
328 WARN_PACK : WARN_UNPACK),
329 "Character(s) in '%c' format wrapped in %s",
330 (int) TYPE_NO_MODIFIERS(datumtype),
331 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
338 next_uni_uu(pTHX_ const char **s, const char *end, I32 *out)
342 const UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
343 if (val >= 0x100 || !ISUUCHAR(val) ||
344 retlen == (STRLEN) -1 || retlen == 0) {
348 *out = PL_uudmap[val] & 077;
354 S_bytes_to_uni(const U8 *start, STRLEN len, char *dest, const bool needs_swap) {
355 PERL_ARGS_ASSERT_BYTES_TO_UNI;
357 if (UNLIKELY(needs_swap)) {
358 const U8 *p = start + len;
359 while (p-- > start) {
360 append_utf8_from_native_byte(*p, (U8 **) & dest);
363 const U8 * const end = start + len;
364 while (start < end) {
365 append_utf8_from_native_byte(*start, (U8 **) & dest);
372 #define PUSH_BYTES(utf8, cur, buf, len, needs_swap) \
374 if (UNLIKELY(utf8)) \
375 (cur) = S_bytes_to_uni((U8 *) buf, len, (cur), needs_swap); \
377 if (UNLIKELY(needs_swap)) \
378 S_reverse_copy((char *)(buf), cur, len); \
380 Copy(buf, cur, len, char); \
385 #define GROWING(utf8, cat, start, cur, in_len) \
387 STRLEN glen = (in_len); \
388 if (utf8) glen *= UTF8_EXPAND; \
389 if ((cur) + glen >= (start) + SvLEN(cat)) { \
390 (start) = sv_exp_grow(cat, glen); \
391 (cur) = (start) + SvCUR(cat); \
395 #define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
397 const STRLEN glen = (in_len); \
399 if (utf8) gl *= UTF8_EXPAND; \
400 if ((cur) + gl >= (start) + SvLEN(cat)) { \
402 SvCUR_set((cat), (cur) - (start)); \
403 (start) = sv_exp_grow(cat, gl); \
404 (cur) = (start) + SvCUR(cat); \
406 PUSH_BYTES(utf8, cur, buf, glen, 0); \
409 #define PUSH_BYTE(utf8, s, byte) \
412 const U8 au8 = (byte); \
413 (s) = S_bytes_to_uni(&au8, 1, (s), 0); \
414 } else *(U8 *)(s)++ = (byte); \
417 /* Only to be used inside a loop (see the break) */
418 #define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
421 if (str >= end) break; \
422 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
423 if (retlen == (STRLEN) -1 || retlen == 0) { \
425 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
430 static const char *_action( const tempsym_t* symptr )
432 return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
435 /* Returns the sizeof() struct described by pat */
437 S_measure_struct(pTHX_ tempsym_t* symptr)
441 PERL_ARGS_ASSERT_MEASURE_STRUCT;
443 while (next_symbol(symptr)) {
447 switch (symptr->howlen) {
449 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
453 /* e_no_len and e_number */
454 len = symptr->length;
458 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
461 /* endianness doesn't influence the size of a type */
462 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
464 Perl_croak(aTHX_ "Invalid type '%c' in %s",
465 (int)TYPE_NO_MODIFIERS(symptr->code),
467 case '.' | TYPE_IS_SHRIEKING:
468 case '@' | TYPE_IS_SHRIEKING:
472 case 'U': /* XXXX Is it correct? */
475 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
476 (int) TYPE_NO_MODIFIERS(symptr->code),
483 tempsym_t savsym = *symptr;
484 symptr->patptr = savsym.grpbeg;
485 symptr->patend = savsym.grpend;
486 /* XXXX Theoretically, we need to measure many times at
487 different positions, since the subexpression may contain
488 alignment commands, but be not of aligned length.
489 Need to detect this and croak(). */
490 size = measure_struct(symptr);
494 case 'X' | TYPE_IS_SHRIEKING:
495 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
497 if (!len) /* Avoid division by 0 */
499 len = total % len; /* Assumed: the start is aligned. */
504 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
506 case 'x' | TYPE_IS_SHRIEKING:
507 if (!len) /* Avoid division by 0 */
509 star = total % len; /* Assumed: the start is aligned. */
510 if (star) /* Other portable ways? */
534 size = sizeof(char*);
544 /* locate matching closing parenthesis or bracket
545 * returns char pointer to char after match, or NULL
548 S_group_end(pTHX_ const char *patptr, const char *patend, char ender)
550 PERL_ARGS_ASSERT_GROUP_END;
552 while (patptr < patend) {
553 const char c = *patptr++;
560 while (patptr < patend && *patptr != '\n')
564 patptr = group_end(patptr, patend, ')') + 1;
566 patptr = group_end(patptr, patend, ']') + 1;
568 Perl_croak(aTHX_ "No group ending character '%c' found in template",
574 /* Convert unsigned decimal number to binary.
575 * Expects a pointer to the first digit and address of length variable
576 * Advances char pointer to 1st non-digit char and returns number
579 S_get_num(pTHX_ const char *patptr, I32 *lenptr )
581 I32 len = *patptr++ - '0';
583 PERL_ARGS_ASSERT_GET_NUM;
585 while (isDIGIT(*patptr)) {
586 if (len >= 0x7FFFFFFF/10)
587 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
588 len = (len * 10) + (*patptr++ - '0');
594 /* The marvellous template parsing routine: Using state stored in *symptr,
595 * locates next template code and count
598 S_next_symbol(pTHX_ tempsym_t* symptr )
600 const char* patptr = symptr->patptr;
601 const char* const patend = symptr->patend;
603 PERL_ARGS_ASSERT_NEXT_SYMBOL;
605 symptr->flags &= ~FLAG_SLASH;
607 while (patptr < patend) {
608 if (isSPACE(*patptr))
610 else if (*patptr == '#') {
612 while (patptr < patend && *patptr != '\n')
617 /* We should have found a template code */
618 I32 code = *patptr++ & 0xFF;
619 U32 inherited_modifiers = 0;
621 if (code == ','){ /* grandfather in commas but with a warning */
622 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
623 symptr->flags |= FLAG_COMMA;
624 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
625 "Invalid type ',' in %s", _action( symptr ) );
630 /* for '(', skip to ')' */
632 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
633 Perl_croak(aTHX_ "()-group starts with a count in %s",
635 symptr->grpbeg = patptr;
636 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
637 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
638 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
642 /* look for group modifiers to inherit */
643 if (TYPE_ENDIANNESS(symptr->flags)) {
644 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
645 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
648 /* look for modifiers */
649 while (patptr < patend) {
654 modifier = TYPE_IS_SHRIEKING;
655 allowed = "sSiIlLxXnNvV@.";
658 modifier = TYPE_IS_BIG_ENDIAN;
659 allowed = ENDIANNESS_ALLOWED_TYPES;
662 modifier = TYPE_IS_LITTLE_ENDIAN;
663 allowed = ENDIANNESS_ALLOWED_TYPES;
674 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
675 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
676 allowed, _action( symptr ) );
678 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
679 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
680 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
681 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
682 TYPE_ENDIANNESS_MASK)
683 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
684 *patptr, _action( symptr ) );
686 if ((code & modifier)) {
687 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
688 "Duplicate modifier '%c' after '%c' in %s",
689 *patptr, (int) TYPE_NO_MODIFIERS(code),
697 /* inherit modifiers */
698 code |= inherited_modifiers;
700 /* look for count and/or / */
701 if (patptr < patend) {
702 if (isDIGIT(*patptr)) {
703 patptr = get_num( patptr, &symptr->length );
704 symptr->howlen = e_number;
706 } else if (*patptr == '*') {
708 symptr->howlen = e_star;
710 } else if (*patptr == '[') {
711 const char* lenptr = ++patptr;
712 symptr->howlen = e_number;
713 patptr = group_end( patptr, patend, ']' ) + 1;
714 /* what kind of [] is it? */
715 if (isDIGIT(*lenptr)) {
716 lenptr = get_num( lenptr, &symptr->length );
718 Perl_croak(aTHX_ "Malformed integer in [] in %s",
721 tempsym_t savsym = *symptr;
722 symptr->patend = patptr-1;
723 symptr->patptr = lenptr;
724 savsym.length = measure_struct(symptr);
728 symptr->howlen = e_no_len;
733 while (patptr < patend) {
734 if (isSPACE(*patptr))
736 else if (*patptr == '#') {
738 while (patptr < patend && *patptr != '\n')
743 if (*patptr == '/') {
744 symptr->flags |= FLAG_SLASH;
746 if (patptr < patend &&
747 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
748 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
755 /* at end - no count, no / */
756 symptr->howlen = e_no_len;
761 symptr->patptr = patptr;
765 symptr->patptr = patptr;
770 There is no way to cleanly handle the case where we should process the
771 string per byte in its upgraded form while it's really in downgraded form
772 (e.g. estimates like strend-s as an upper bound for the number of
773 characters left wouldn't work). So if we foresee the need of this
774 (pattern starts with U or contains U0), we want to work on the encoded
775 version of the string. Users are advised to upgrade their pack string
776 themselves if they need to do a lot of unpacks like this on it
779 need_utf8(const char *pat, const char *patend)
783 PERL_ARGS_ASSERT_NEED_UTF8;
785 while (pat < patend) {
788 pat = (const char *) memchr(pat, '\n', patend-pat);
789 if (!pat) return FALSE;
790 } else if (pat[0] == 'U') {
791 if (first || pat[1] == '0') return TRUE;
792 } else first = FALSE;
799 first_symbol(const char *pat, const char *patend) {
800 PERL_ARGS_ASSERT_FIRST_SYMBOL;
802 while (pat < patend) {
803 if (pat[0] != '#') return pat[0];
805 pat = (const char *) memchr(pat, '\n', patend-pat);
813 =for apidoc unpackstring
815 The engine implementing the unpack() Perl function.
817 Using the template pat..patend, this function unpacks the string
818 s..strend into a number of mortal SVs, which it pushes onto the perl
819 argument (@_) stack (so you will need to issue a C<PUTBACK> before and
820 C<SPAGAIN> after the call to this function). It returns the number of
823 The strend and patend pointers should point to the byte following the last
824 character of each string.
826 Although this function returns its values on the perl argument stack, it
827 doesn't take any parameters from that stack (and thus in particular
828 there's no need to do a PUSHMARK before calling it, unlike L</call_pv> for
834 Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
838 PERL_ARGS_ASSERT_UNPACKSTRING;
840 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
841 else if (need_utf8(pat, patend)) {
842 /* We probably should try to avoid this in case a scalar context call
843 wouldn't get to the "U0" */
844 STRLEN len = strend - s;
845 s = (char *) bytes_to_utf8((U8 *) s, &len);
848 flags |= FLAG_DO_UTF8;
851 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
852 flags |= FLAG_PARSE_UTF8;
854 TEMPSYM_INIT(&sym, pat, patend, flags);
856 return unpack_rec(&sym, s, s, strend, NULL );
860 S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
864 const I32 start_sp_offset = SP - PL_stack_base;
869 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
871 bool explicit_length;
872 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
873 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
875 PERL_ARGS_ASSERT_UNPACK_REC;
877 symptr->strbeg = s - strbeg;
879 while (next_symbol(symptr)) {
882 I32 datumtype = symptr->code;
884 /* do first one only unless in list context
885 / is implemented by unpacking the count, then popping it from the
886 stack, so must check that we're not in the middle of a / */
888 && (SP - PL_stack_base == start_sp_offset + 1)
889 && (datumtype != '/') ) /* XXX can this be omitted */
892 switch (howlen = symptr->howlen) {
894 len = strend - strbeg; /* long enough */
897 /* e_no_len and e_number */
898 len = symptr->length;
902 explicit_length = TRUE;
904 beyond = s >= strend;
906 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
908 /* props nonzero means we can process this letter. */
909 const long size = props & PACK_SIZE_MASK;
910 const long howmany = (strend - s) / size;
914 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
915 if (len && unpack_only_one) len = 1;
921 needs_swap = NEEDS_SWAP(datumtype);
923 switch(TYPE_NO_ENDIANNESS(datumtype)) {
925 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
928 if (howlen == e_no_len)
929 len = 16; /* len is not specified */
937 tempsym_t savsym = *symptr;
938 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
939 symptr->flags |= group_modifiers;
940 symptr->patend = savsym.grpend;
941 symptr->previous = &savsym;
944 if (len && unpack_only_one) len = 1;
946 symptr->patptr = savsym.grpbeg;
947 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
948 else symptr->flags &= ~FLAG_PARSE_UTF8;
949 unpack_rec(symptr, s, strbeg, strend, &s);
950 if (s == strend && savsym.howlen == e_star)
951 break; /* No way to continue */
954 savsym.flags = symptr->flags & ~group_modifiers;
958 case '.' | TYPE_IS_SHRIEKING:
962 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
963 if (howlen == e_star) from = strbeg;
964 else if (len <= 0) from = s;
966 tempsym_t *group = symptr;
968 while (--len && group) group = group->previous;
969 from = group ? strbeg + group->strbeg : strbeg;
972 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
973 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
977 case '@' | TYPE_IS_SHRIEKING:
979 s = strbeg + symptr->strbeg;
980 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
984 Perl_croak(aTHX_ "'@' outside of string in unpack");
989 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
992 Perl_croak(aTHX_ "'@' outside of string in unpack");
996 case 'X' | TYPE_IS_SHRIEKING:
997 if (!len) /* Avoid division by 0 */
1000 const char *hop, *last;
1002 hop = last = strbeg;
1004 hop += UTF8SKIP(hop);
1011 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1015 len = (s - strbeg) % len;
1021 Perl_croak(aTHX_ "'X' outside of string in unpack");
1022 while (--s, UTF8_IS_CONTINUATION(*s)) {
1024 Perl_croak(aTHX_ "'X' outside of string in unpack");
1029 if (len > s - strbeg)
1030 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1034 case 'x' | TYPE_IS_SHRIEKING: {
1036 if (!len) /* Avoid division by 0 */
1038 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1039 else ai32 = (s - strbeg) % len;
1040 if (ai32 == 0) break;
1048 Perl_croak(aTHX_ "'x' outside of string in unpack");
1053 if (len > strend - s)
1054 Perl_croak(aTHX_ "'x' outside of string in unpack");
1059 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1065 /* Preliminary length estimate is assumed done in 'W' */
1066 if (len > strend - s) len = strend - s;
1072 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1073 if (hop >= strend) {
1075 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1080 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1082 } else if (len > strend - s)
1085 if (datumtype == 'Z') {
1086 /* 'Z' strips stuff after first null */
1087 const char *ptr, *end;
1089 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
1090 sv = newSVpvn(s, ptr-s);
1091 if (howlen == e_star) /* exact for 'Z*' */
1092 len = ptr-s + (ptr != strend ? 1 : 0);
1093 } else if (datumtype == 'A') {
1094 /* 'A' strips both nulls and spaces */
1096 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1097 for (ptr = s+len-1; ptr >= s; ptr--)
1098 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
1099 !isSPACE_utf8(ptr)) break;
1100 if (ptr >= s) ptr += UTF8SKIP(ptr);
1103 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1105 for (ptr = s+len-1; ptr >= s; ptr--)
1106 if (*ptr != 0 && !isSPACE(*ptr)) break;
1109 sv = newSVpvn(s, ptr-s);
1110 } else sv = newSVpvn(s, len);
1114 /* Undo any upgrade done due to need_utf8() */
1115 if (!(symptr->flags & FLAG_WAS_UTF8))
1116 sv_utf8_downgrade(sv, 0);
1124 if (howlen == e_star || len > (strend - s) * 8)
1125 len = (strend - s) * 8;
1128 while (len >= 8 && s < strend) {
1129 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
1134 cuv += PL_bitcount[*(U8 *)s++];
1137 if (len && s < strend) {
1139 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1140 if (datumtype == 'b')
1142 if (bits & 1) cuv++;
1147 if (bits & 0x80) cuv++;
1154 sv = sv_2mortal(newSV(len ? len : 1));
1157 if (datumtype == 'b') {
1159 const I32 ai32 = len;
1160 for (len = 0; len < ai32; len++) {
1161 if (len & 7) bits >>= 1;
1163 if (s >= strend) break;
1164 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1165 } else bits = *(U8 *) s++;
1166 *str++ = bits & 1 ? '1' : '0';
1170 const I32 ai32 = len;
1171 for (len = 0; len < ai32; len++) {
1172 if (len & 7) bits <<= 1;
1174 if (s >= strend) break;
1175 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1176 } else bits = *(U8 *) s++;
1177 *str++ = bits & 0x80 ? '1' : '0';
1181 SvCUR_set(sv, str - SvPVX_const(sv));
1188 /* Preliminary length estimate, acceptable for utf8 too */
1189 if (howlen == e_star || len > (strend - s) * 2)
1190 len = (strend - s) * 2;
1192 sv = sv_2mortal(newSV(len ? len : 1));
1196 if (datumtype == 'h') {
1199 for (len = 0; len < ai32; len++) {
1200 if (len & 1) bits >>= 4;
1202 if (s >= strend) break;
1203 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1204 } else bits = * (U8 *) s++;
1206 *str++ = PL_hexdigit[bits & 15];
1210 const I32 ai32 = len;
1211 for (len = 0; len < ai32; len++) {
1212 if (len & 1) bits <<= 4;
1214 if (s >= strend) break;
1215 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1216 } else bits = *(U8 *) s++;
1218 *str++ = PL_hexdigit[(bits >> 4) & 15];
1223 SvCUR_set(sv, str - SvPVX_const(sv));
1230 if (explicit_length)
1231 /* Switch to "character" mode */
1232 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1237 while (len-- > 0 && s < strend) {
1242 aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1243 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1244 if (retlen == (STRLEN) -1 || retlen == 0)
1245 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1249 aint = *(U8 *)(s)++;
1250 if (aint >= 128 && datumtype != 'C') /* fake up signed chars */
1254 else if (checksum > bits_in_uv)
1255 cdouble += (NV)aint;
1263 while (len-- > 0 && s < strend) {
1265 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1266 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1267 if (retlen == (STRLEN) -1 || retlen == 0)
1268 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1272 else if (checksum > bits_in_uv)
1273 cdouble += (NV) val;
1277 } else if (!checksum)
1279 const U8 ch = *(U8 *) s++;
1282 else if (checksum > bits_in_uv)
1283 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1285 while (len-- > 0) cuv += *(U8 *) s++;
1289 if (explicit_length && howlen != e_star) {
1290 /* Switch to "bytes in UTF-8" mode */
1291 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
1293 /* Should be impossible due to the need_utf8() test */
1294 Perl_croak(aTHX_ "U0 mode on a byte string");
1298 if (len > strend - s) len = strend - s;
1300 if (len && unpack_only_one) len = 1;
1304 while (len-- > 0 && s < strend) {
1308 U8 result[UTF8_MAXLEN];
1309 const char *ptr = s;
1311 /* Bug: warns about bad utf8 even if we are short on bytes
1312 and will break out of the loop */
1313 if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1316 len = UTF8SKIP(result);
1317 if (!uni_to_bytes(aTHX_ &ptr, strend,
1318 (char *) &result[1], len-1, 'U')) break;
1319 auv = utf8n_to_uvchr(result, len, &retlen, UTF8_ALLOW_DEFAULT);
1322 auv = utf8n_to_uvchr((U8*)s, strend - s, &retlen, UTF8_ALLOW_DEFAULT);
1323 if (retlen == (STRLEN) -1 || retlen == 0)
1324 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1329 else if (checksum > bits_in_uv)
1330 cdouble += (NV) auv;
1335 case 's' | TYPE_IS_SHRIEKING:
1336 #if SHORTSIZE != SIZE16
1339 SHIFT_VAR(utf8, s, strend, ashort, datumtype, needs_swap);
1342 else if (checksum > bits_in_uv)
1343 cdouble += (NV)ashort;
1355 #if U16SIZE > SIZE16
1358 SHIFT16(utf8, s, strend, &ai16, datumtype, needs_swap);
1359 #if U16SIZE > SIZE16
1365 else if (checksum > bits_in_uv)
1366 cdouble += (NV)ai16;
1371 case 'S' | TYPE_IS_SHRIEKING:
1372 #if SHORTSIZE != SIZE16
1374 unsigned short aushort;
1375 SHIFT_VAR(utf8, s, strend, aushort, datumtype, needs_swap,
1379 else if (checksum > bits_in_uv)
1380 cdouble += (NV)aushort;
1393 #if U16SIZE > SIZE16
1396 SHIFT16(utf8, s, strend, &au16, datumtype, needs_swap);
1397 if (datumtype == 'n')
1398 au16 = PerlSock_ntohs(au16);
1399 if (datumtype == 'v')
1403 else if (checksum > bits_in_uv)
1404 cdouble += (NV) au16;
1409 case 'v' | TYPE_IS_SHRIEKING:
1410 case 'n' | TYPE_IS_SHRIEKING:
1413 # if U16SIZE > SIZE16
1416 SHIFT16(utf8, s, strend, &ai16, datumtype, needs_swap);
1417 /* There should never be any byte-swapping here. */
1418 assert(!TYPE_ENDIANNESS(datumtype));
1419 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
1420 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1421 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
1422 ai16 = (I16) vtohs((U16) ai16);
1425 else if (checksum > bits_in_uv)
1426 cdouble += (NV) ai16;
1432 case 'i' | TYPE_IS_SHRIEKING:
1435 SHIFT_VAR(utf8, s, strend, aint, datumtype, needs_swap);
1438 else if (checksum > bits_in_uv)
1439 cdouble += (NV)aint;
1445 case 'I' | TYPE_IS_SHRIEKING:
1448 SHIFT_VAR(utf8, s, strend, auint, datumtype, needs_swap);
1451 else if (checksum > bits_in_uv)
1452 cdouble += (NV)auint;
1460 SHIFT_VAR(utf8, s, strend, aiv, datumtype, needs_swap);
1463 else if (checksum > bits_in_uv)
1472 SHIFT_VAR(utf8, s, strend, auv, datumtype, needs_swap);
1475 else if (checksum > bits_in_uv)
1481 case 'l' | TYPE_IS_SHRIEKING:
1482 #if LONGSIZE != SIZE32
1485 SHIFT_VAR(utf8, s, strend, along, datumtype, needs_swap);
1488 else if (checksum > bits_in_uv)
1489 cdouble += (NV)along;
1500 #if U32SIZE > SIZE32
1503 SHIFT32(utf8, s, strend, &ai32, datumtype, needs_swap);
1504 #if U32SIZE > SIZE32
1505 if (ai32 > 2147483647) ai32 -= 4294967296;
1509 else if (checksum > bits_in_uv)
1510 cdouble += (NV)ai32;
1515 case 'L' | TYPE_IS_SHRIEKING:
1516 #if LONGSIZE != SIZE32
1518 unsigned long aulong;
1519 SHIFT_VAR(utf8, s, strend, aulong, datumtype, needs_swap);
1522 else if (checksum > bits_in_uv)
1523 cdouble += (NV)aulong;
1536 #if U32SIZE > SIZE32
1539 SHIFT32(utf8, s, strend, &au32, datumtype, needs_swap);
1540 if (datumtype == 'N')
1541 au32 = PerlSock_ntohl(au32);
1542 if (datumtype == 'V')
1546 else if (checksum > bits_in_uv)
1547 cdouble += (NV)au32;
1552 case 'V' | TYPE_IS_SHRIEKING:
1553 case 'N' | TYPE_IS_SHRIEKING:
1556 #if U32SIZE > SIZE32
1559 SHIFT32(utf8, s, strend, &ai32, datumtype, needs_swap);
1560 /* There should never be any byte swapping here. */
1561 assert(!TYPE_ENDIANNESS(datumtype));
1562 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1563 ai32 = (I32)PerlSock_ntohl((U32)ai32);
1564 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1565 ai32 = (I32)vtohl((U32)ai32);
1568 else if (checksum > bits_in_uv)
1569 cdouble += (NV)ai32;
1577 SHIFT_VAR(utf8, s, strend, aptr, datumtype, needs_swap);
1578 /* newSVpv generates undef if aptr is NULL */
1579 mPUSHs(newSVpv(aptr, 0));
1587 while (len > 0 && s < strend) {
1589 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1590 auv = (auv << 7) | (ch & 0x7f);
1591 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
1599 if (++bytes >= sizeof(UV)) { /* promote to string */
1602 sv = Perl_newSVpvf(aTHX_ "%.*"UVuf, (int)TYPE_DIGITS(UV), auv);
1603 while (s < strend) {
1604 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1605 sv = mul128(sv, (U8)(ch & 0x7f));
1611 t = SvPV_nolen_const(sv);
1620 if ((s >= strend) && bytes)
1621 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
1625 if (symptr->howlen == e_star)
1626 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
1628 if (s + sizeof(char*) <= strend) {
1630 SHIFT_VAR(utf8, s, strend, aptr, datumtype, needs_swap);
1631 /* newSVpvn generates undef if aptr is NULL */
1632 PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP));
1635 #if defined(HAS_QUAD) && IVSIZE >= 8
1639 SHIFT_VAR(utf8, s, strend, aquad, datumtype, needs_swap);
1641 mPUSHs(newSViv((IV)aquad));
1642 else if (checksum > bits_in_uv)
1643 cdouble += (NV)aquad;
1651 SHIFT_VAR(utf8, s, strend, auquad, datumtype, needs_swap);
1653 mPUSHs(newSVuv((UV)auquad));
1654 else if (checksum > bits_in_uv)
1655 cdouble += (NV)auquad;
1661 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
1665 SHIFT_VAR(utf8, s, strend, afloat, datumtype, needs_swap);
1675 SHIFT_VAR(utf8, s, strend, adouble, datumtype, needs_swap);
1685 SHIFT_BYTES(utf8, s, strend, anv.bytes, sizeof(anv.bytes),
1686 datumtype, needs_swap);
1693 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
1697 SHIFT_BYTES(utf8, s, strend, aldouble.bytes,
1698 sizeof(aldouble.bytes), datumtype, needs_swap);
1700 mPUSHn(aldouble.ld);
1702 cdouble += aldouble.ld;
1708 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
1709 sv = sv_2mortal(newSV(l));
1710 if (l) SvPOK_on(sv);
1713 while (next_uni_uu(aTHX_ &s, strend, &len)) {
1718 next_uni_uu(aTHX_ &s, strend, &a);
1719 next_uni_uu(aTHX_ &s, strend, &b);
1720 next_uni_uu(aTHX_ &s, strend, &c);
1721 next_uni_uu(aTHX_ &s, strend, &d);
1722 hunk[0] = (char)((a << 2) | (b >> 4));
1723 hunk[1] = (char)((b << 4) | (c >> 2));
1724 hunk[2] = (char)((c << 6) | d);
1726 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
1734 /* possible checksum byte */
1735 const char *skip = s+UTF8SKIP(s);
1736 if (skip < strend && *skip == '\n')
1742 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
1746 len = PL_uudmap[*(U8*)s++] & 077;
1748 if (s < strend && ISUUCHAR(*s))
1749 a = PL_uudmap[*(U8*)s++] & 077;
1752 if (s < strend && ISUUCHAR(*s))
1753 b = PL_uudmap[*(U8*)s++] & 077;
1756 if (s < strend && ISUUCHAR(*s))
1757 c = PL_uudmap[*(U8*)s++] & 077;
1760 if (s < strend && ISUUCHAR(*s))
1761 d = PL_uudmap[*(U8*)s++] & 077;
1764 hunk[0] = (char)((a << 2) | (b >> 4));
1765 hunk[1] = (char)((b << 4) | (c >> 2));
1766 hunk[2] = (char)((c << 6) | d);
1768 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
1773 else /* possible checksum byte */
1774 if (s + 1 < strend && s[1] == '\n')
1784 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
1785 (checksum > bits_in_uv &&
1786 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
1789 anv = (NV) (1 << (checksum & 15));
1790 while (checksum >= 16) {
1794 while (cdouble < 0.0)
1796 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
1797 sv = newSVnv(cdouble);
1800 if (checksum < bits_in_uv) {
1801 UV mask = ((UV)1 << checksum) - 1;
1810 if (symptr->flags & FLAG_SLASH){
1811 if (SP - PL_stack_base - start_sp_offset <= 0)
1813 if( next_symbol(symptr) ){
1814 if( symptr->howlen == e_number )
1815 Perl_croak(aTHX_ "Count after length/code in unpack" );
1817 /* ...end of char buffer then no decent length available */
1818 Perl_croak(aTHX_ "length/code after end of string in unpack" );
1820 /* take top of stack (hope it's numeric) */
1823 Perl_croak(aTHX_ "Negative '/' count in unpack" );
1826 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
1828 datumtype = symptr->code;
1829 explicit_length = FALSE;
1837 return SP - PL_stack_base - start_sp_offset;
1845 I32 gimme = GIMME_V;
1848 const char *pat = SvPV_const(left, llen);
1849 const char *s = SvPV_const(right, rlen);
1850 const char *strend = s + rlen;
1851 const char *patend = pat + llen;
1855 cnt = unpackstring(pat, patend, s, strend,
1856 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
1857 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
1860 if ( !cnt && gimme == G_SCALAR )
1861 PUSHs(&PL_sv_undef);
1866 doencodes(U8 *h, const char *s, I32 len)
1868 *h++ = PL_uuemap[len];
1870 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1871 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
1872 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
1873 *h++ = PL_uuemap[(077 & (s[2] & 077))];
1878 const char r = (len > 1 ? s[1] : '\0');
1879 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1880 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
1881 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
1882 *h++ = PL_uuemap[0];
1889 S_is_an_int(pTHX_ const char *s, STRLEN l)
1891 SV *result = newSVpvn(s, l);
1892 char *const result_c = SvPV_nolen(result); /* convenience */
1893 char *out = result_c;
1897 PERL_ARGS_ASSERT_IS_AN_INT;
1905 SvREFCNT_dec(result);
1928 SvREFCNT_dec(result);
1934 SvCUR_set(result, out - result_c);
1938 /* pnum must be '\0' terminated */
1940 S_div128(pTHX_ SV *pnum, bool *done)
1943 char * const s = SvPV(pnum, len);
1947 PERL_ARGS_ASSERT_DIV128;
1951 const int i = m * 10 + (*t - '0');
1952 const int r = (i >> 7); /* r < 10 */
1960 SvCUR_set(pnum, (STRLEN) (t - s));
1965 =for apidoc packlist
1967 The engine implementing pack() Perl function.
1973 Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, SV **beglist, SV **endlist )
1978 PERL_ARGS_ASSERT_PACKLIST;
1980 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
1982 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
1983 Also make sure any UTF8 flag is loaded */
1984 SvPV_force_nolen(cat);
1986 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
1988 (void)pack_rec( cat, &sym, beglist, endlist );
1991 /* like sv_utf8_upgrade, but also repoint the group start markers */
1993 marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
1996 const char *from_ptr, *from_start, *from_end, **marks, **m;
1997 char *to_start, *to_ptr;
1999 if (SvUTF8(sv)) return;
2001 from_start = SvPVX_const(sv);
2002 from_end = from_start + SvCUR(sv);
2003 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2004 if (!NATIVE_BYTE_IS_INVARIANT(*from_ptr)) break;
2005 if (from_ptr == from_end) {
2006 /* Simple case: no character needs to be changed */
2011 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
2012 Newx(to_start, len, char);
2013 Copy(from_start, to_start, from_ptr-from_start, char);
2014 to_ptr = to_start + (from_ptr-from_start);
2016 Newx(marks, sym_ptr->level+2, const char *);
2017 for (group=sym_ptr; group; group = group->previous)
2018 marks[group->level] = from_start + group->strbeg;
2019 marks[sym_ptr->level+1] = from_end+1;
2020 for (m = marks; *m < from_ptr; m++)
2021 *m = to_start + (*m-from_start);
2023 for (;from_ptr < from_end; from_ptr++) {
2024 while (*m == from_ptr) *m++ = to_ptr;
2025 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
2029 while (*m == from_ptr) *m++ = to_ptr;
2030 if (m != marks + sym_ptr->level+1) {
2033 Perl_croak(aTHX_ "panic: marks beyond string end, m=%p, marks=%p, "
2034 "level=%d", m, marks, sym_ptr->level);
2036 for (group=sym_ptr; group; group = group->previous)
2037 group->strbeg = marks[group->level] - to_start;
2042 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
2043 from_start -= SvIVX(sv);
2046 SvFLAGS(sv) &= ~SVf_OOK;
2049 Safefree(from_start);
2050 SvPV_set(sv, to_start);
2051 SvCUR_set(sv, to_ptr - to_start);
2056 /* Exponential string grower. Makes string extension effectively O(n)
2057 needed says how many extra bytes we need (not counting the final '\0')
2058 Only grows the string if there is an actual lack of space
2061 S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
2062 const STRLEN cur = SvCUR(sv);
2063 const STRLEN len = SvLEN(sv);
2066 PERL_ARGS_ASSERT_SV_EXP_GROW;
2068 if (len - cur > needed) return SvPVX(sv);
2069 extend = needed > len ? needed : len;
2070 return SvGROW(sv, len+extend+1);
2075 S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
2078 tempsym_t lookahead;
2079 I32 items = endlist - beglist;
2080 bool found = next_symbol(symptr);
2081 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
2082 bool warn_utf8 = ckWARN(WARN_UTF8);
2084 PERL_ARGS_ASSERT_PACK_REC;
2086 if (symptr->level == 0 && found && symptr->code == 'U') {
2087 marked_upgrade(aTHX_ cat, symptr);
2088 symptr->flags |= FLAG_DO_UTF8;
2091 symptr->strbeg = SvCUR(cat);
2097 SV *lengthcode = NULL;
2098 I32 datumtype = symptr->code;
2099 howlen_t howlen = symptr->howlen;
2100 char *start = SvPVX(cat);
2101 char *cur = start + SvCUR(cat);
2104 #define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2108 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2112 /* e_no_len and e_number */
2113 len = symptr->length;
2118 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
2120 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2121 /* We can process this letter. */
2122 STRLEN size = props & PACK_SIZE_MASK;
2123 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2127 /* Look ahead for next symbol. Do we have code/code? */
2128 lookahead = *symptr;
2129 found = next_symbol(&lookahead);
2130 if (symptr->flags & FLAG_SLASH) {
2132 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
2133 if (strchr("aAZ", lookahead.code)) {
2134 if (lookahead.howlen == e_number) count = lookahead.length;
2137 count = sv_len_utf8(*beglist);
2140 if (lookahead.code == 'Z') count++;
2143 if (lookahead.howlen == e_number && lookahead.length < items)
2144 count = lookahead.length;
2147 lookahead.howlen = e_number;
2148 lookahead.length = count;
2149 lengthcode = sv_2mortal(newSViv(count));
2152 needs_swap = NEEDS_SWAP(datumtype);
2154 /* Code inside the switch must take care to properly update
2155 cat (CUR length and '\0' termination) if it updated *cur and
2156 doesn't simply leave using break */
2157 switch(TYPE_NO_ENDIANNESS(datumtype)) {
2159 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2160 (int) TYPE_NO_MODIFIERS(datumtype));
2162 Perl_croak(aTHX_ "'%%' may not be used in pack");
2165 case '.' | TYPE_IS_SHRIEKING:
2167 if (howlen == e_star) from = start;
2168 else if (len == 0) from = cur;
2170 tempsym_t *group = symptr;
2172 while (--len && group) group = group->previous;
2173 from = group ? start + group->strbeg : start;
2176 len = SvIV(fromstr);
2178 case '@' | TYPE_IS_SHRIEKING:
2180 from = start + symptr->strbeg;
2182 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
2184 while (len && from < cur) {
2185 from += UTF8SKIP(from);
2189 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2191 /* Here we know from == cur */
2193 GROWING(0, cat, start, cur, len);
2194 Zero(cur, len, char);
2196 } else if (from < cur) {
2199 } else goto no_change;
2207 if (len > 0) goto grow;
2208 if (len == 0) goto no_change;
2215 tempsym_t savsym = *symptr;
2216 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2217 symptr->flags |= group_modifiers;
2218 symptr->patend = savsym.grpend;
2220 symptr->previous = &lookahead;
2223 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2224 else symptr->flags &= ~FLAG_PARSE_UTF8;
2225 was_utf8 = SvUTF8(cat);
2226 symptr->patptr = savsym.grpbeg;
2227 beglist = pack_rec(cat, symptr, beglist, endlist);
2228 if (SvUTF8(cat) != was_utf8)
2229 /* This had better be an upgrade while in utf8==0 mode */
2232 if (savsym.howlen == e_star && beglist == endlist)
2233 break; /* No way to continue */
2235 items = endlist - beglist;
2236 lookahead.flags = symptr->flags & ~group_modifiers;
2239 case 'X' | TYPE_IS_SHRIEKING:
2240 if (!len) /* Avoid division by 0 */
2247 hop += UTF8SKIP(hop);
2254 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2258 len = (cur-start) % len;
2262 if (len < 1) goto no_change;
2266 Perl_croak(aTHX_ "'%c' outside of string in pack",
2267 (int) TYPE_NO_MODIFIERS(datumtype));
2268 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2270 Perl_croak(aTHX_ "'%c' outside of string in pack",
2271 (int) TYPE_NO_MODIFIERS(datumtype));
2277 if (cur - start < len)
2278 Perl_croak(aTHX_ "'%c' outside of string in pack",
2279 (int) TYPE_NO_MODIFIERS(datumtype));
2282 if (cur < start+symptr->strbeg) {
2283 /* Make sure group starts don't point into the void */
2285 const STRLEN length = cur-start;
2286 for (group = symptr;
2287 group && length < group->strbeg;
2288 group = group->previous) group->strbeg = length;
2289 lookahead.strbeg = length;
2292 case 'x' | TYPE_IS_SHRIEKING: {
2294 if (!len) /* Avoid division by 0 */
2296 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
2297 else ai32 = (cur - start) % len;
2298 if (ai32 == 0) goto no_change;
2310 aptr = SvPV_const(fromstr, fromlen);
2311 if (DO_UTF8(fromstr)) {
2312 const char *end, *s;
2314 if (!utf8 && !SvUTF8(cat)) {
2315 marked_upgrade(aTHX_ cat, symptr);
2316 lookahead.flags |= FLAG_DO_UTF8;
2317 lookahead.strbeg = symptr->strbeg;
2320 cur = start + SvCUR(cat);
2322 if (howlen == e_star) {
2323 if (utf8) goto string_copy;
2327 end = aptr + fromlen;
2328 fromlen = datumtype == 'Z' ? len-1 : len;
2329 while ((I32) fromlen > 0 && s < end) {
2334 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2337 if (datumtype == 'Z') len++;
2343 fromlen = len - fromlen;
2344 if (datumtype == 'Z') fromlen--;
2345 if (howlen == e_star) {
2347 if (datumtype == 'Z') len++;
2349 GROWING(0, cat, start, cur, len);
2350 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
2351 datumtype | TYPE_IS_PACK))
2352 Perl_croak(aTHX_ "panic: predicted utf8 length not available, "
2353 "for '%c', aptr=%p end=%p cur=%p, fromlen=%"UVuf,
2354 (int)datumtype, aptr, end, cur, (UV)fromlen);
2358 if (howlen == e_star) {
2360 if (datumtype == 'Z') len++;
2362 if (len <= (I32) fromlen) {
2364 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2366 /* assumes a byte expands to at most UTF8_EXPAND bytes on
2368 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2369 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
2371 while (fromlen > 0) {
2372 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
2378 if (howlen == e_star) {
2380 if (datumtype == 'Z') len++;
2382 if (len <= (I32) fromlen) {
2384 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2386 GROWING(0, cat, start, cur, len);
2387 Copy(aptr, cur, fromlen, char);
2391 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2398 const char *str, *end;
2405 str = SvPV_const(fromstr, fromlen);
2406 end = str + fromlen;
2407 if (DO_UTF8(fromstr)) {
2409 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2411 utf8_source = FALSE;
2412 utf8_flags = 0; /* Unused, but keep compilers happy */
2414 if (howlen == e_star) len = fromlen;
2415 field_len = (len+7)/8;
2416 GROWING(utf8, cat, start, cur, field_len);
2417 if (len > (I32)fromlen) len = fromlen;
2420 if (datumtype == 'B')
2424 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2426 } else bits |= *str++ & 1;
2427 if (l & 7) bits <<= 1;
2429 PUSH_BYTE(utf8, cur, bits);
2434 /* datumtype == 'b' */
2438 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2439 if (val & 1) bits |= 0x80;
2440 } else if (*str++ & 1)
2442 if (l & 7) bits >>= 1;
2444 PUSH_BYTE(utf8, cur, bits);
2450 if (datumtype == 'B')
2451 bits <<= 7 - (l & 7);
2453 bits >>= 7 - (l & 7);
2454 PUSH_BYTE(utf8, cur, bits);
2457 /* Determine how many chars are left in the requested field */
2459 if (howlen == e_star) field_len = 0;
2460 else field_len -= l;
2461 Zero(cur, field_len, char);
2467 const char *str, *end;
2474 str = SvPV_const(fromstr, fromlen);
2475 end = str + fromlen;
2476 if (DO_UTF8(fromstr)) {
2478 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2480 utf8_source = FALSE;
2481 utf8_flags = 0; /* Unused, but keep compilers happy */
2483 if (howlen == e_star) len = fromlen;
2484 field_len = (len+1)/2;
2485 GROWING(utf8, cat, start, cur, field_len);
2486 if (!utf8 && len > (I32)fromlen) len = fromlen;
2489 if (datumtype == 'H')
2493 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2494 if (val < 256 && isALPHA(val))
2495 bits |= (val + 9) & 0xf;
2498 } else if (isALPHA(*str))
2499 bits |= (*str++ + 9) & 0xf;
2501 bits |= *str++ & 0xf;
2502 if (l & 1) bits <<= 4;
2504 PUSH_BYTE(utf8, cur, bits);
2512 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2513 if (val < 256 && isALPHA(val))
2514 bits |= ((val + 9) & 0xf) << 4;
2516 bits |= (val & 0xf) << 4;
2517 } else if (isALPHA(*str))
2518 bits |= ((*str++ + 9) & 0xf) << 4;
2520 bits |= (*str++ & 0xf) << 4;
2521 if (l & 1) bits >>= 4;
2523 PUSH_BYTE(utf8, cur, bits);
2529 PUSH_BYTE(utf8, cur, bits);
2532 /* Determine how many chars are left in the requested field */
2534 if (howlen == e_star) field_len = 0;
2535 else field_len -= l;
2536 Zero(cur, field_len, char);
2544 aiv = SvIV(fromstr);
2545 if ((-128 > aiv || aiv > 127))
2546 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2547 "Character in 'c' format wrapped in pack");
2548 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
2553 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2559 aiv = SvIV(fromstr);
2560 if ((0 > aiv || aiv > 0xff))
2561 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2562 "Character in 'C' format wrapped in pack");
2563 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
2568 U8 in_bytes = (U8)IN_BYTES;
2570 end = start+SvLEN(cat)-1;
2571 if (utf8) end -= UTF8_MAXLEN-1;
2575 auv = SvUV(fromstr);
2576 if (in_bytes) auv = auv % 0x100;
2581 SvCUR_set(cat, cur - start);
2583 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2584 end = start+SvLEN(cat)-UTF8_MAXLEN;
2586 cur = (char *) uvchr_to_utf8_flags((U8 *) cur,
2589 0 : UNICODE_ALLOW_ANY);
2594 SvCUR_set(cat, cur - start);
2595 marked_upgrade(aTHX_ cat, symptr);
2596 lookahead.flags |= FLAG_DO_UTF8;
2597 lookahead.strbeg = symptr->strbeg;
2600 cur = start + SvCUR(cat);
2601 end = start+SvLEN(cat)-UTF8_MAXLEN;
2604 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2605 "Character in 'W' format wrapped in pack");
2610 SvCUR_set(cat, cur - start);
2611 GROWING(0, cat, start, cur, len+1);
2612 end = start+SvLEN(cat)-1;
2614 *(U8 *) cur++ = (U8)auv;
2623 if (!(symptr->flags & FLAG_DO_UTF8)) {
2624 marked_upgrade(aTHX_ cat, symptr);
2625 lookahead.flags |= FLAG_DO_UTF8;
2626 lookahead.strbeg = symptr->strbeg;
2632 end = start+SvLEN(cat);
2633 if (!utf8) end -= UTF8_MAXLEN;
2637 auv = SvUV(fromstr);
2639 U8 buffer[UTF8_MAXLEN], *endb;
2640 endb = uvchr_to_utf8_flags(buffer, auv,
2642 0 : UNICODE_ALLOW_ANY);
2643 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
2645 SvCUR_set(cat, cur - start);
2646 GROWING(0, cat, start, cur,
2647 len+(endb-buffer)*UTF8_EXPAND);
2648 end = start+SvLEN(cat);
2650 cur = S_bytes_to_uni(buffer, endb-buffer, cur, 0);
2654 SvCUR_set(cat, cur - start);
2655 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2656 end = start+SvLEN(cat)-UTF8_MAXLEN;
2658 cur = (char *) uvchr_to_utf8_flags((U8 *) cur, auv,
2660 0 : UNICODE_ALLOW_ANY);
2665 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
2671 anv = SvNV(fromstr);
2672 # if defined(VMS) && !defined(_IEEE_FP)
2673 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
2674 * on Alpha; fake it if we don't have them.
2678 else if (anv < -FLT_MAX)
2680 else afloat = (float)anv;
2682 afloat = (float)anv;
2684 PUSH_VAR(utf8, cur, afloat, needs_swap);
2692 anv = SvNV(fromstr);
2693 # if defined(VMS) && !defined(_IEEE_FP)
2694 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
2695 * on Alpha; fake it if we don't have them.
2699 else if (anv < -DBL_MAX)
2701 else adouble = (double)anv;
2703 adouble = (double)anv;
2705 PUSH_VAR(utf8, cur, adouble, needs_swap);
2710 Zero(&anv, 1, NV); /* can be long double with unused bits */
2714 /* to work round a gcc/x86 bug; don't use SvNV */
2715 anv.nv = sv_2nv(fromstr);
2717 anv.nv = SvNV(fromstr);
2719 PUSH_BYTES(utf8, cur, anv.bytes, sizeof(anv.bytes), needs_swap);
2723 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2726 /* long doubles can have unused bits, which may be nonzero */
2727 Zero(&aldouble, 1, long double);
2731 /* to work round a gcc/x86 bug; don't use SvNV */
2732 aldouble.ld = (long double)sv_2nv(fromstr);
2734 aldouble.ld = (long double)SvNV(fromstr);
2736 PUSH_BYTES(utf8, cur, aldouble.bytes, sizeof(aldouble.bytes),
2742 case 'n' | TYPE_IS_SHRIEKING:
2747 ai16 = (I16)SvIV(fromstr);
2748 ai16 = PerlSock_htons(ai16);
2749 PUSH16(utf8, cur, &ai16, FALSE);
2752 case 'v' | TYPE_IS_SHRIEKING:
2757 ai16 = (I16)SvIV(fromstr);
2759 PUSH16(utf8, cur, &ai16, FALSE);
2762 case 'S' | TYPE_IS_SHRIEKING:
2763 #if SHORTSIZE != SIZE16
2765 unsigned short aushort;
2767 aushort = SvUV(fromstr);
2768 PUSH_VAR(utf8, cur, aushort, needs_swap);
2778 au16 = (U16)SvUV(fromstr);
2779 PUSH16(utf8, cur, &au16, needs_swap);
2782 case 's' | TYPE_IS_SHRIEKING:
2783 #if SHORTSIZE != SIZE16
2787 ashort = SvIV(fromstr);
2788 PUSH_VAR(utf8, cur, ashort, needs_swap);
2798 ai16 = (I16)SvIV(fromstr);
2799 PUSH16(utf8, cur, &ai16, needs_swap);
2803 case 'I' | TYPE_IS_SHRIEKING:
2807 auint = SvUV(fromstr);
2808 PUSH_VAR(utf8, cur, auint, needs_swap);
2815 aiv = SvIV(fromstr);
2816 PUSH_VAR(utf8, cur, aiv, needs_swap);
2823 auv = SvUV(fromstr);
2824 PUSH_VAR(utf8, cur, auv, needs_swap);
2831 anv = SvNV(fromstr);
2835 SvCUR_set(cat, cur - start);
2836 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
2839 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
2840 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
2841 any negative IVs will have already been got by the croak()
2842 above. IOK is untrue for fractions, so we test them
2843 against UV_MAX_P1. */
2844 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
2845 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
2846 char *in = buf + sizeof(buf);
2847 UV auv = SvUV(fromstr);
2850 *--in = (char)((auv & 0x7f) | 0x80);
2853 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
2854 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2855 in, (buf + sizeof(buf)) - in);
2856 } else if (SvPOKp(fromstr))
2858 else if (SvNOKp(fromstr)) {
2859 /* 10**NV_MAX_10_EXP is the largest power of 10
2860 so 10**(NV_MAX_10_EXP+1) is definitely unrepresentable
2861 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
2862 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
2863 And with that many bytes only Inf can overflow.
2864 Some C compilers are strict about integral constant
2865 expressions so we conservatively divide by a slightly
2866 smaller integer instead of multiplying by the exact
2867 floating-point value.
2869 #ifdef NV_MAX_10_EXP
2870 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
2871 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
2873 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
2874 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
2876 char *in = buf + sizeof(buf);
2878 anv = Perl_floor(anv);
2880 const NV next = Perl_floor(anv / 128);
2881 if (in <= buf) /* this cannot happen ;-) */
2882 Perl_croak(aTHX_ "Cannot compress integer in pack");
2883 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
2886 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
2887 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2888 in, (buf + sizeof(buf)) - in);
2897 /* Copy string and check for compliance */
2898 from = SvPV_const(fromstr, len);
2899 if ((norm = is_an_int(from, len)) == NULL)
2900 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
2902 Newx(result, len, char);
2905 while (!done) *--in = div128(norm, &done) | 0x80;
2906 result[len - 1] &= 0x7F; /* clear continue bit */
2907 PUSH_GROWING_BYTES(utf8, cat, start, cur,
2908 in, (result + len) - in);
2910 SvREFCNT_dec(norm); /* free norm */
2915 case 'i' | TYPE_IS_SHRIEKING:
2919 aint = SvIV(fromstr);
2920 PUSH_VAR(utf8, cur, aint, needs_swap);
2923 case 'N' | TYPE_IS_SHRIEKING:
2928 au32 = SvUV(fromstr);
2929 au32 = PerlSock_htonl(au32);
2930 PUSH32(utf8, cur, &au32, FALSE);
2933 case 'V' | TYPE_IS_SHRIEKING:
2938 au32 = SvUV(fromstr);
2940 PUSH32(utf8, cur, &au32, FALSE);
2943 case 'L' | TYPE_IS_SHRIEKING:
2944 #if LONGSIZE != SIZE32
2946 unsigned long aulong;
2948 aulong = SvUV(fromstr);
2949 PUSH_VAR(utf8, cur, aulong, needs_swap);
2959 au32 = SvUV(fromstr);
2960 PUSH32(utf8, cur, &au32, needs_swap);
2963 case 'l' | TYPE_IS_SHRIEKING:
2964 #if LONGSIZE != SIZE32
2968 along = SvIV(fromstr);
2969 PUSH_VAR(utf8, cur, along, needs_swap);
2979 ai32 = SvIV(fromstr);
2980 PUSH32(utf8, cur, &ai32, needs_swap);
2983 #if defined(HAS_QUAD) && IVSIZE >= 8
2988 auquad = (Uquad_t) SvUV(fromstr);
2989 PUSH_VAR(utf8, cur, auquad, needs_swap);
2996 aquad = (Quad_t)SvIV(fromstr);
2997 PUSH_VAR(utf8, cur, aquad, needs_swap);
3002 len = 1; /* assume SV is correct length */
3003 GROWING(utf8, cat, start, cur, sizeof(char *));
3010 SvGETMAGIC(fromstr);
3011 if (!SvOK(fromstr)) aptr = NULL;
3013 /* XXX better yet, could spirit away the string to
3014 * a safe spot and hang on to it until the result
3015 * of pack() (and all copies of the result) are
3018 if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
3019 !SvREADONLY(fromstr)))) {
3020 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3021 "Attempt to pack pointer to temporary value");
3023 if (SvPOK(fromstr) || SvNIOK(fromstr))
3024 aptr = SvPV_nomg_const_nolen(fromstr);
3026 aptr = SvPV_force_flags_nolen(fromstr, 0);
3028 PUSH_VAR(utf8, cur, aptr, needs_swap);
3032 const char *aptr, *aend;
3036 if (len <= 2) len = 45;
3037 else len = len / 3 * 3;
3039 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3040 "Field too wide in 'u' format in pack");
3043 aptr = SvPV_const(fromstr, fromlen);
3044 from_utf8 = DO_UTF8(fromstr);
3046 aend = aptr + fromlen;
3047 fromlen = sv_len_utf8_nomg(fromstr);
3048 } else aend = NULL; /* Unused, but keep compilers happy */
3049 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
3050 while (fromlen > 0) {
3053 U8 hunk[1+63/3*4+1];
3055 if ((I32)fromlen > len)
3061 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3062 'u' | TYPE_IS_PACK)) {
3064 SvCUR_set(cat, cur - start);
3065 Perl_croak(aTHX_ "panic: string is shorter than advertised, "
3066 "aptr=%p, aend=%p, buffer=%p, todo=%ld",
3067 aptr, aend, buffer, (long) todo);
3069 end = doencodes(hunk, buffer, todo);
3071 end = doencodes(hunk, aptr, todo);
3074 PUSH_BYTES(utf8, cur, hunk, end-hunk, 0);
3081 SvCUR_set(cat, cur - start);
3083 *symptr = lookahead;
3092 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
3095 SV *pat_sv = *++MARK;
3096 const char *pat = SvPV_const(pat_sv, fromlen);
3097 const char *patend = pat + fromlen;
3103 packlist(cat, pat, patend, MARK, SP + 1);
3113 * c-indentation-style: bsd
3115 * indent-tabs-mode: nil
3118 * ex: set ts=8 sts=4 sw=4 et: