3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
18 * [p.653 of _The Lord of the Rings_, IV/iv: "Of Herbs and Stewed Rabbit"]
21 /* This file contains pp ("push/pop") functions that
22 * execute the opcodes that make up a perl program. A typical pp function
23 * expects to find its arguments on the stack, and usually pushes its
24 * results onto the stack, hence the 'pp' terminology. Each OP structure
25 * contains a pointer to the relevant pp_foo() function.
27 * This particular file just contains pp_pack() and pp_unpack(). See the
28 * other pp*.c files for the rest of the pp_ functions.
32 #define PERL_IN_PP_PACK_C
35 /* Types used by pack/unpack */
37 e_no_len, /* no length */
38 e_number, /* number, [] */
42 typedef struct tempsym {
43 const char* patptr; /* current template char */
44 const char* patend; /* one after last char */
45 const char* grpbeg; /* 1st char of ()-group */
46 const char* grpend; /* end of ()-group */
47 I32 code; /* template code (!<>) */
48 I32 length; /* length/repeat count */
49 howlen_t howlen; /* how length is given */
50 int level; /* () nesting level */
51 U32 flags; /* /=4, comma=2, pack=1 */
52 /* and group modifiers */
53 STRLEN strbeg; /* offset of group start */
54 struct tempsym *previous; /* previous group */
57 #define TEMPSYM_INIT(symptr, p, e, f) \
59 (symptr)->patptr = (p); \
60 (symptr)->patend = (e); \
61 (symptr)->grpbeg = NULL; \
62 (symptr)->grpend = NULL; \
63 (symptr)->grpend = NULL; \
65 (symptr)->length = 0; \
66 (symptr)->howlen = e_no_len; \
67 (symptr)->level = 0; \
68 (symptr)->flags = (f); \
69 (symptr)->strbeg = 0; \
70 (symptr)->previous = NULL; \
78 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
81 U8 bytes[sizeof(long double)];
86 # define PERL_PACK_CAN_BYTEORDER
87 # define PERL_PACK_CAN_SHRIEKSIGN
93 /* Maximum number of bytes to which a byte can grow due to upgrade */
97 * Offset for integer pack/unpack.
99 * On architectures where I16 and I32 aren't really 16 and 32 bits,
100 * which for now are all Crays, pack and unpack have to play games.
104 * These values are required for portability of pack() output.
105 * If they're not right on your machine, then pack() and unpack()
106 * wouldn't work right anyway; you'll need to apply the Cray hack.
107 * (I'd like to check them with #if, but you can't use sizeof() in
108 * the preprocessor.) --???
111 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
112 defines are now in config.h. --Andy Dougherty April 1998
117 /* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
120 #if U16SIZE > SIZE16 || U32SIZE > SIZE32
121 # if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
122 # define OFF16(p) ((char*)(p))
123 # define OFF32(p) ((char*)(p))
125 # if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
126 # define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
127 # define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
129 ++++ bad cray byte order
133 # define OFF16(p) ((char *) (p))
134 # define OFF32(p) ((char *) (p))
137 /* Only to be used inside a loop (see the break) */
138 #define SHIFT16(utf8, s, strend, p, datumtype) STMT_START { \
140 if (!uni_to_bytes(aTHX_ &(s), strend, OFF16(p), SIZE16, datumtype)) break; \
142 Copy(s, OFF16(p), SIZE16, char); \
147 /* Only to be used inside a loop (see the break) */
148 #define SHIFT32(utf8, s, strend, p, datumtype) STMT_START { \
150 if (!uni_to_bytes(aTHX_ &(s), strend, OFF32(p), SIZE32, datumtype)) break; \
152 Copy(s, OFF32(p), SIZE32, char); \
157 #define PUSH16(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF16(p), SIZE16)
158 #define PUSH32(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF32(p), SIZE32)
160 /* Only to be used inside a loop (see the break) */
161 #define SHIFT_BYTES(utf8, s, strend, buf, len, datumtype) \
164 if (!uni_to_bytes(aTHX_ &s, strend, \
165 (char *) (buf), len, datumtype)) break; \
167 Copy(s, (char *) (buf), len, char); \
172 #define SHIFT_VAR(utf8, s, strend, var, datumtype) \
173 SHIFT_BYTES(utf8, s, strend, &(var), sizeof(var), datumtype)
175 #define PUSH_VAR(utf8, aptr, var) \
176 PUSH_BYTES(utf8, aptr, &(var), sizeof(var))
178 /* Avoid stack overflow due to pathological templates. 100 should be plenty. */
179 #define MAX_SUB_TEMPLATE_LEVEL 100
181 /* flags (note that type modifiers can also be used as flags!) */
182 #define FLAG_WAS_UTF8 0x40
183 #define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
184 #define FLAG_UNPACK_ONLY_ONE 0x10
185 #define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
186 #define FLAG_SLASH 0x04
187 #define FLAG_COMMA 0x02
188 #define FLAG_PACK 0x01
191 S_mul128(pTHX_ SV *sv, U8 m)
194 char *s = SvPV(sv, len);
197 PERL_ARGS_ASSERT_MUL128;
199 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
200 SV * const tmpNew = newSVpvs("0000000000");
202 sv_catsv(tmpNew, sv);
203 SvREFCNT_dec(sv); /* free old sv */
208 while (!*t) /* trailing '\0'? */
211 const U32 i = ((*t - '0') << 7) + m;
212 *(t--) = '0' + (char)(i % 10);
218 /* Explosives and implosives. */
220 #if 'I' == 73 && 'J' == 74
221 /* On an ASCII/ISO kind of system */
222 #define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
225 Some other sort of character set - use memchr() so we don't match
228 #define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
232 #define TYPE_IS_SHRIEKING 0x100
233 #define TYPE_IS_BIG_ENDIAN 0x200
234 #define TYPE_IS_LITTLE_ENDIAN 0x400
235 #define TYPE_IS_PACK 0x800
236 #define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
237 #define TYPE_MODIFIERS(t) ((t) & ~0xFF)
238 #define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
240 #ifdef PERL_PACK_CAN_SHRIEKSIGN
241 # define SHRIEKING_ALLOWED_TYPES "sSiIlLxXnNvV@."
243 # define SHRIEKING_ALLOWED_TYPES "sSiIlLxX"
246 #ifndef PERL_PACK_CAN_BYTEORDER
247 /* Put "can't" first because it is shorter */
248 # define TYPE_ENDIANNESS(t) 0
249 # define TYPE_NO_ENDIANNESS(t) (t)
251 # define ENDIANNESS_ALLOWED_TYPES ""
253 # define DO_BO_UNPACK(var, type)
254 # define DO_BO_PACK(var, type)
255 # define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast)
256 # define DO_BO_PACK_PTR(var, type, pre_cast, post_cast)
257 # define DO_BO_UNPACK_N(var, type)
258 # define DO_BO_PACK_N(var, type)
259 # define DO_BO_UNPACK_P(var)
260 # define DO_BO_PACK_P(var)
261 # define DO_BO_UNPACK_PC(var)
262 # define DO_BO_PACK_PC(var)
264 #else /* PERL_PACK_CAN_BYTEORDER */
266 # define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
267 # define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
269 # define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
271 # define DO_BO_UNPACK(var, type) \
273 switch (TYPE_ENDIANNESS(datumtype)) { \
274 case TYPE_IS_BIG_ENDIAN: var = my_betoh ## type (var); break; \
275 case TYPE_IS_LITTLE_ENDIAN: var = my_letoh ## type (var); break; \
280 # define DO_BO_PACK(var, type) \
282 switch (TYPE_ENDIANNESS(datumtype)) { \
283 case TYPE_IS_BIG_ENDIAN: var = my_htobe ## type (var); break; \
284 case TYPE_IS_LITTLE_ENDIAN: var = my_htole ## type (var); break; \
289 # define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast) \
291 switch (TYPE_ENDIANNESS(datumtype)) { \
292 case TYPE_IS_BIG_ENDIAN: \
293 var = (post_cast*) my_betoh ## type ((pre_cast) var); \
295 case TYPE_IS_LITTLE_ENDIAN: \
296 var = (post_cast *) my_letoh ## type ((pre_cast) var); \
303 # define DO_BO_PACK_PTR(var, type, pre_cast, post_cast) \
305 switch (TYPE_ENDIANNESS(datumtype)) { \
306 case TYPE_IS_BIG_ENDIAN: \
307 var = (post_cast *) my_htobe ## type ((pre_cast) var); \
309 case TYPE_IS_LITTLE_ENDIAN: \
310 var = (post_cast *) my_htole ## type ((pre_cast) var); \
317 # define BO_CANT_DOIT(action, type) \
319 switch (TYPE_ENDIANNESS(datumtype)) { \
320 case TYPE_IS_BIG_ENDIAN: \
321 Perl_croak(aTHX_ "Can't %s big-endian %ss on this " \
322 "platform", #action, #type); \
324 case TYPE_IS_LITTLE_ENDIAN: \
325 Perl_croak(aTHX_ "Can't %s little-endian %ss on this " \
326 "platform", #action, #type); \
333 # if PTRSIZE == INTSIZE
334 # define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, i, int, void)
335 # define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, i, int, void)
336 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, i, int, char)
337 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, i, int, char)
338 # elif PTRSIZE == LONGSIZE
339 # if LONGSIZE < IVSIZE && IVSIZE == 8
340 # define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, 64, IV, void)
341 # define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, 64, IV, void)
342 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, 64, IV, char)
343 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, 64, IV, char)
345 # define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, IV, void)
346 # define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, IV, void)
347 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, IV, char)
348 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, IV, char)
350 # elif PTRSIZE == IVSIZE
351 # define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, IV, void)
352 # define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, IV, void)
353 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, IV, char)
354 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, IV, char)
356 # define DO_BO_UNPACK_P(var) BO_CANT_DOIT(unpack, pointer)
357 # define DO_BO_PACK_P(var) BO_CANT_DOIT(pack, pointer)
358 # define DO_BO_UNPACK_PC(var) BO_CANT_DOIT(unpack, pointer)
359 # define DO_BO_PACK_PC(var) BO_CANT_DOIT(pack, pointer)
362 # if defined(my_htolen) && defined(my_letohn) && \
363 defined(my_htoben) && defined(my_betohn)
364 # define DO_BO_UNPACK_N(var, type) \
366 switch (TYPE_ENDIANNESS(datumtype)) { \
367 case TYPE_IS_BIG_ENDIAN: my_betohn(&var, sizeof(type)); break;\
368 case TYPE_IS_LITTLE_ENDIAN: my_letohn(&var, sizeof(type)); break;\
373 # define DO_BO_PACK_N(var, type) \
375 switch (TYPE_ENDIANNESS(datumtype)) { \
376 case TYPE_IS_BIG_ENDIAN: my_htoben(&var, sizeof(type)); break;\
377 case TYPE_IS_LITTLE_ENDIAN: my_htolen(&var, sizeof(type)); break;\
382 # define DO_BO_UNPACK_N(var, type) BO_CANT_DOIT(unpack, type)
383 # define DO_BO_PACK_N(var, type) BO_CANT_DOIT(pack, type)
386 #endif /* PERL_PACK_CAN_BYTEORDER */
388 #define PACK_SIZE_CANNOT_CSUM 0x80
389 #define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
390 #define PACK_SIZE_MASK 0x3F
392 /* These tables are regenerated by genpacksizetables.pl (and then hand pasted
393 in). You're unlikely ever to need to regenerate them. */
395 #if TYPE_IS_SHRIEKING != 0x100
396 ++++shriek offset should be 256
399 typedef U8 packprops_t;
402 STATIC const packprops_t packprops[512] = {
404 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
405 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
406 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
407 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
409 /* C */ sizeof(unsigned char),
410 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
411 /* D */ LONG_DOUBLESIZE,
418 /* I */ sizeof(unsigned int),
425 #if defined(HAS_QUAD)
426 /* Q */ sizeof(Uquad_t),
433 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
435 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
436 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
437 /* c */ sizeof(char),
438 /* d */ sizeof(double),
440 /* f */ sizeof(float),
449 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
450 #if defined(HAS_QUAD)
451 /* q */ sizeof(Quad_t),
459 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
460 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
461 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
462 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
463 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
464 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
465 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
466 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
467 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
468 0, 0, 0, 0, 0, 0, 0, 0,
470 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
471 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
472 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
473 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
474 0, 0, 0, 0, 0, 0, 0, 0, 0,
475 /* I */ sizeof(unsigned int),
477 /* L */ sizeof(unsigned long),
479 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
485 /* S */ sizeof(unsigned short),
487 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
492 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
496 /* l */ sizeof(long),
498 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
504 /* s */ sizeof(short),
506 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
511 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
512 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
513 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
514 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
515 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
516 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
517 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
518 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
519 0, 0, 0, 0, 0, 0, 0, 0, 0
522 /* EBCDIC (or bust) */
523 STATIC const packprops_t packprops[512] = {
525 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
526 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
527 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
528 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
529 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
530 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
531 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
532 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
534 /* c */ sizeof(char),
535 /* d */ sizeof(double),
537 /* f */ sizeof(float),
547 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
548 #if defined(HAS_QUAD)
549 /* q */ sizeof(Quad_t),
553 0, 0, 0, 0, 0, 0, 0, 0, 0,
557 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
558 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
559 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
560 /* C */ sizeof(unsigned char),
561 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
562 /* D */ LONG_DOUBLESIZE,
569 /* I */ sizeof(unsigned int),
577 #if defined(HAS_QUAD)
578 /* Q */ sizeof(Uquad_t),
582 0, 0, 0, 0, 0, 0, 0, 0, 0,
585 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
587 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
588 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
589 0, 0, 0, 0, 0, 0, 0, 0, 0,
591 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
592 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
593 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
594 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
595 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
596 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
597 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
598 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
599 0, 0, 0, 0, 0, 0, 0, 0, 0,
601 0, 0, 0, 0, 0, 0, 0, 0, 0,
602 /* l */ sizeof(long),
604 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
609 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
610 /* s */ sizeof(short),
612 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
617 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
618 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
620 /* I */ sizeof(unsigned int),
621 0, 0, 0, 0, 0, 0, 0, 0, 0,
622 /* L */ sizeof(unsigned long),
624 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
629 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
630 /* S */ sizeof(unsigned short),
632 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
637 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
638 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
643 uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
646 UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
647 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
648 /* We try to process malformed UTF-8 as much as possible (preferably with
649 warnings), but these two mean we make no progress in the string and
650 might enter an infinite loop */
651 if (retlen == (STRLEN) -1 || retlen == 0)
652 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
653 (int) TYPE_NO_MODIFIERS(datumtype));
655 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
656 "Character in '%c' format wrapped in unpack",
657 (int) TYPE_NO_MODIFIERS(datumtype));
664 #define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
665 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
669 uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
673 const char *from = *s;
675 const U32 flags = ckWARN(WARN_UTF8) ?
676 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
677 for (;buf_len > 0; buf_len--) {
678 if (from >= end) return FALSE;
679 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
680 if (retlen == (STRLEN) -1 || retlen == 0) {
681 from += UTF8SKIP(from);
683 } else from += retlen;
688 *(U8 *)buf++ = (U8)val;
690 /* We have enough characters for the buffer. Did we have problems ? */
693 /* Rewalk the string fragment while warning */
695 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
696 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
697 if (ptr >= end) break;
698 utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
700 if (from > end) from = end;
703 Perl_ck_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
704 WARN_PACK : WARN_UNPACK),
705 "Character(s) in '%c' format wrapped in %s",
706 (int) TYPE_NO_MODIFIERS(datumtype),
707 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
714 next_uni_uu(pTHX_ const char **s, const char *end, I32 *out)
718 const UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
719 if (val >= 0x100 || !ISUUCHAR(val) ||
720 retlen == (STRLEN) -1 || retlen == 0) {
724 *out = PL_uudmap[val] & 077;
730 S_bytes_to_uni(const U8 *start, STRLEN len, char *dest) {
731 const U8 * const end = start + len;
733 PERL_ARGS_ASSERT_BYTES_TO_UNI;
735 while (start < end) {
736 const UV uv = NATIVE_TO_ASCII(*start);
737 if (UNI_IS_INVARIANT(uv))
738 *dest++ = (char)(U8)UTF_TO_NATIVE(uv);
740 *dest++ = (char)(U8)UTF8_EIGHT_BIT_HI(uv);
741 *dest++ = (char)(U8)UTF8_EIGHT_BIT_LO(uv);
748 #define PUSH_BYTES(utf8, cur, buf, len) \
751 (cur) = bytes_to_uni((U8 *) buf, len, (cur)); \
753 Copy(buf, cur, len, char); \
758 #define GROWING(utf8, cat, start, cur, in_len) \
760 STRLEN glen = (in_len); \
761 if (utf8) glen *= UTF8_EXPAND; \
762 if ((cur) + glen >= (start) + SvLEN(cat)) { \
763 (start) = sv_exp_grow(cat, glen); \
764 (cur) = (start) + SvCUR(cat); \
768 #define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
770 const STRLEN glen = (in_len); \
772 if (utf8) gl *= UTF8_EXPAND; \
773 if ((cur) + gl >= (start) + SvLEN(cat)) { \
775 SvCUR_set((cat), (cur) - (start)); \
776 (start) = sv_exp_grow(cat, gl); \
777 (cur) = (start) + SvCUR(cat); \
779 PUSH_BYTES(utf8, cur, buf, glen); \
782 #define PUSH_BYTE(utf8, s, byte) \
785 const U8 au8 = (byte); \
786 (s) = bytes_to_uni(&au8, 1, (s)); \
787 } else *(U8 *)(s)++ = (byte); \
790 /* Only to be used inside a loop (see the break) */
791 #define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
794 if (str >= end) break; \
795 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
796 if (retlen == (STRLEN) -1 || retlen == 0) { \
798 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
803 static const char *_action( const tempsym_t* symptr )
805 return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
808 /* Returns the sizeof() struct described by pat */
810 S_measure_struct(pTHX_ tempsym_t* symptr)
814 PERL_ARGS_ASSERT_MEASURE_STRUCT;
816 while (next_symbol(symptr)) {
820 switch (symptr->howlen) {
822 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
826 /* e_no_len and e_number */
827 len = symptr->length;
831 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
834 /* endianness doesn't influence the size of a type */
835 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
837 Perl_croak(aTHX_ "Invalid type '%c' in %s",
838 (int)TYPE_NO_MODIFIERS(symptr->code),
840 #ifdef PERL_PACK_CAN_SHRIEKSIGN
841 case '.' | TYPE_IS_SHRIEKING:
842 case '@' | TYPE_IS_SHRIEKING:
847 case 'U': /* XXXX Is it correct? */
850 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
851 (int) TYPE_NO_MODIFIERS(symptr->code),
858 tempsym_t savsym = *symptr;
859 symptr->patptr = savsym.grpbeg;
860 symptr->patend = savsym.grpend;
861 /* XXXX Theoretically, we need to measure many times at
862 different positions, since the subexpression may contain
863 alignment commands, but be not of aligned length.
864 Need to detect this and croak(). */
865 size = measure_struct(symptr);
869 case 'X' | TYPE_IS_SHRIEKING:
870 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
872 if (!len) /* Avoid division by 0 */
874 len = total % len; /* Assumed: the start is aligned. */
879 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
881 case 'x' | TYPE_IS_SHRIEKING:
882 if (!len) /* Avoid division by 0 */
884 star = total % len; /* Assumed: the start is aligned. */
885 if (star) /* Other portable ways? */
909 size = sizeof(char*);
919 /* locate matching closing parenthesis or bracket
920 * returns char pointer to char after match, or NULL
923 S_group_end(pTHX_ const char *patptr, const char *patend, char ender)
925 PERL_ARGS_ASSERT_GROUP_END;
927 while (patptr < patend) {
928 const char c = *patptr++;
935 while (patptr < patend && *patptr != '\n')
939 patptr = group_end(patptr, patend, ')') + 1;
941 patptr = group_end(patptr, patend, ']') + 1;
943 Perl_croak(aTHX_ "No group ending character '%c' found in template",
949 /* Convert unsigned decimal number to binary.
950 * Expects a pointer to the first digit and address of length variable
951 * Advances char pointer to 1st non-digit char and returns number
954 S_get_num(pTHX_ const char *patptr, I32 *lenptr )
956 I32 len = *patptr++ - '0';
958 PERL_ARGS_ASSERT_GET_NUM;
960 while (isDIGIT(*patptr)) {
961 if (len >= 0x7FFFFFFF/10)
962 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
963 len = (len * 10) + (*patptr++ - '0');
969 /* The marvellous template parsing routine: Using state stored in *symptr,
970 * locates next template code and count
973 S_next_symbol(pTHX_ tempsym_t* symptr )
975 const char* patptr = symptr->patptr;
976 const char* const patend = symptr->patend;
978 PERL_ARGS_ASSERT_NEXT_SYMBOL;
980 symptr->flags &= ~FLAG_SLASH;
982 while (patptr < patend) {
983 if (isSPACE(*patptr))
985 else if (*patptr == '#') {
987 while (patptr < patend && *patptr != '\n')
992 /* We should have found a template code */
993 I32 code = *patptr++ & 0xFF;
994 U32 inherited_modifiers = 0;
996 if (code == ','){ /* grandfather in commas but with a warning */
997 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
998 symptr->flags |= FLAG_COMMA;
999 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
1000 "Invalid type ',' in %s", _action( symptr ) );
1005 /* for '(', skip to ')' */
1007 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
1008 Perl_croak(aTHX_ "()-group starts with a count in %s",
1009 _action( symptr ) );
1010 symptr->grpbeg = patptr;
1011 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
1012 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
1013 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
1014 _action( symptr ) );
1017 /* look for group modifiers to inherit */
1018 if (TYPE_ENDIANNESS(symptr->flags)) {
1019 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
1020 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
1023 /* look for modifiers */
1024 while (patptr < patend) {
1025 const char *allowed;
1029 modifier = TYPE_IS_SHRIEKING;
1030 allowed = SHRIEKING_ALLOWED_TYPES;
1032 #ifdef PERL_PACK_CAN_BYTEORDER
1034 modifier = TYPE_IS_BIG_ENDIAN;
1035 allowed = ENDIANNESS_ALLOWED_TYPES;
1038 modifier = TYPE_IS_LITTLE_ENDIAN;
1039 allowed = ENDIANNESS_ALLOWED_TYPES;
1041 #endif /* PERL_PACK_CAN_BYTEORDER */
1051 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
1052 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
1053 allowed, _action( symptr ) );
1055 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
1056 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
1057 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
1058 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
1059 TYPE_ENDIANNESS_MASK)
1060 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
1061 *patptr, _action( symptr ) );
1063 if ((code & modifier)) {
1064 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
1065 "Duplicate modifier '%c' after '%c' in %s",
1066 *patptr, (int) TYPE_NO_MODIFIERS(code),
1067 _action( symptr ) );
1074 /* inherit modifiers */
1075 code |= inherited_modifiers;
1077 /* look for count and/or / */
1078 if (patptr < patend) {
1079 if (isDIGIT(*patptr)) {
1080 patptr = get_num( patptr, &symptr->length );
1081 symptr->howlen = e_number;
1083 } else if (*patptr == '*') {
1085 symptr->howlen = e_star;
1087 } else if (*patptr == '[') {
1088 const char* lenptr = ++patptr;
1089 symptr->howlen = e_number;
1090 patptr = group_end( patptr, patend, ']' ) + 1;
1091 /* what kind of [] is it? */
1092 if (isDIGIT(*lenptr)) {
1093 lenptr = get_num( lenptr, &symptr->length );
1094 if( *lenptr != ']' )
1095 Perl_croak(aTHX_ "Malformed integer in [] in %s",
1096 _action( symptr ) );
1098 tempsym_t savsym = *symptr;
1099 symptr->patend = patptr-1;
1100 symptr->patptr = lenptr;
1101 savsym.length = measure_struct(symptr);
1105 symptr->howlen = e_no_len;
1110 while (patptr < patend) {
1111 if (isSPACE(*patptr))
1113 else if (*patptr == '#') {
1115 while (patptr < patend && *patptr != '\n')
1117 if (patptr < patend)
1120 if (*patptr == '/') {
1121 symptr->flags |= FLAG_SLASH;
1123 if (patptr < patend &&
1124 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
1125 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
1126 _action( symptr ) );
1132 /* at end - no count, no / */
1133 symptr->howlen = e_no_len;
1137 symptr->code = code;
1138 symptr->patptr = patptr;
1142 symptr->patptr = patptr;
1147 There is no way to cleanly handle the case where we should process the
1148 string per byte in its upgraded form while it's really in downgraded form
1149 (e.g. estimates like strend-s as an upper bound for the number of
1150 characters left wouldn't work). So if we foresee the need of this
1151 (pattern starts with U or contains U0), we want to work on the encoded
1152 version of the string. Users are advised to upgrade their pack string
1153 themselves if they need to do a lot of unpacks like this on it
1156 need_utf8(const char *pat, const char *patend)
1160 PERL_ARGS_ASSERT_NEED_UTF8;
1162 while (pat < patend) {
1163 if (pat[0] == '#') {
1165 pat = (const char *) memchr(pat, '\n', patend-pat);
1166 if (!pat) return FALSE;
1167 } else if (pat[0] == 'U') {
1168 if (first || pat[1] == '0') return TRUE;
1169 } else first = FALSE;
1176 first_symbol(const char *pat, const char *patend) {
1177 PERL_ARGS_ASSERT_FIRST_SYMBOL;
1179 while (pat < patend) {
1180 if (pat[0] != '#') return pat[0];
1182 pat = (const char *) memchr(pat, '\n', patend-pat);
1190 =for apidoc unpackstring
1192 The engine implementing the unpack() Perl function.
1194 Using the template pat..patend, this function unpacks the string
1195 s..strend into a number of mortal SVs, which it pushes onto the perl
1196 argument (@_) stack (so you will need to issue a C<PUTBACK> before and
1197 C<SPAGAIN> after the call to this function). It returns the number of
1200 The strend and patend pointers should point to the byte following the last
1201 character of each string.
1203 Although this function returns its values on the perl argument stack, it
1204 doesn't take any parameters from that stack (and thus in particular
1205 there's no need to do a PUSHMARK before calling it, unlike L</call_pv> for
1211 Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
1215 PERL_ARGS_ASSERT_UNPACKSTRING;
1217 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
1218 else if (need_utf8(pat, patend)) {
1219 /* We probably should try to avoid this in case a scalar context call
1220 wouldn't get to the "U0" */
1221 STRLEN len = strend - s;
1222 s = (char *) bytes_to_utf8((U8 *) s, &len);
1225 flags |= FLAG_DO_UTF8;
1228 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
1229 flags |= FLAG_PARSE_UTF8;
1231 TEMPSYM_INIT(&sym, pat, patend, flags);
1233 return unpack_rec(&sym, s, s, strend, NULL );
1237 S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
1241 const I32 start_sp_offset = SP - PL_stack_base;
1246 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
1247 bool beyond = FALSE;
1248 bool explicit_length;
1249 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
1250 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
1252 PERL_ARGS_ASSERT_UNPACK_REC;
1254 symptr->strbeg = s - strbeg;
1256 while (next_symbol(symptr)) {
1259 I32 datumtype = symptr->code;
1260 /* do first one only unless in list context
1261 / is implemented by unpacking the count, then popping it from the
1262 stack, so must check that we're not in the middle of a / */
1263 if ( unpack_only_one
1264 && (SP - PL_stack_base == start_sp_offset + 1)
1265 && (datumtype != '/') ) /* XXX can this be omitted */
1268 switch (howlen = symptr->howlen) {
1270 len = strend - strbeg; /* long enough */
1273 /* e_no_len and e_number */
1274 len = symptr->length;
1278 explicit_length = TRUE;
1280 beyond = s >= strend;
1282 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
1284 /* props nonzero means we can process this letter. */
1285 const long size = props & PACK_SIZE_MASK;
1286 const long howmany = (strend - s) / size;
1290 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
1291 if (len && unpack_only_one) len = 1;
1297 switch(TYPE_NO_ENDIANNESS(datumtype)) {
1299 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
1302 if (howlen == e_no_len)
1303 len = 16; /* len is not specified */
1311 tempsym_t savsym = *symptr;
1312 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
1313 symptr->flags |= group_modifiers;
1314 symptr->patend = savsym.grpend;
1315 symptr->previous = &savsym;
1318 if (len && unpack_only_one) len = 1;
1320 symptr->patptr = savsym.grpbeg;
1321 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
1322 else symptr->flags &= ~FLAG_PARSE_UTF8;
1323 unpack_rec(symptr, s, strbeg, strend, &s);
1324 if (s == strend && savsym.howlen == e_star)
1325 break; /* No way to continue */
1328 savsym.flags = symptr->flags & ~group_modifiers;
1332 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1333 case '.' | TYPE_IS_SHRIEKING:
1338 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1339 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
1340 #else /* PERL_PACK_CAN_SHRIEKSIGN */
1341 const bool u8 = utf8;
1343 if (howlen == e_star) from = strbeg;
1344 else if (len <= 0) from = s;
1346 tempsym_t *group = symptr;
1348 while (--len && group) group = group->previous;
1349 from = group ? strbeg + group->strbeg : strbeg;
1352 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
1353 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
1357 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1358 case '@' | TYPE_IS_SHRIEKING:
1361 s = strbeg + symptr->strbeg;
1362 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1363 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
1364 #else /* PERL_PACK_CAN_SHRIEKSIGN */
1370 Perl_croak(aTHX_ "'@' outside of string in unpack");
1375 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
1378 Perl_croak(aTHX_ "'@' outside of string in unpack");
1382 case 'X' | TYPE_IS_SHRIEKING:
1383 if (!len) /* Avoid division by 0 */
1386 const char *hop, *last;
1388 hop = last = strbeg;
1390 hop += UTF8SKIP(hop);
1397 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1401 len = (s - strbeg) % len;
1407 Perl_croak(aTHX_ "'X' outside of string in unpack");
1408 while (--s, UTF8_IS_CONTINUATION(*s)) {
1410 Perl_croak(aTHX_ "'X' outside of string in unpack");
1415 if (len > s - strbeg)
1416 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1420 case 'x' | TYPE_IS_SHRIEKING: {
1422 if (!len) /* Avoid division by 0 */
1424 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1425 else ai32 = (s - strbeg) % len;
1426 if (ai32 == 0) break;
1434 Perl_croak(aTHX_ "'x' outside of string in unpack");
1439 if (len > strend - s)
1440 Perl_croak(aTHX_ "'x' outside of string in unpack");
1445 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1451 /* Preliminary length estimate is assumed done in 'W' */
1452 if (len > strend - s) len = strend - s;
1458 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1459 if (hop >= strend) {
1461 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1466 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1468 } else if (len > strend - s)
1471 if (datumtype == 'Z') {
1472 /* 'Z' strips stuff after first null */
1473 const char *ptr, *end;
1475 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
1476 sv = newSVpvn(s, ptr-s);
1477 if (howlen == e_star) /* exact for 'Z*' */
1478 len = ptr-s + (ptr != strend ? 1 : 0);
1479 } else if (datumtype == 'A') {
1480 /* 'A' strips both nulls and spaces */
1482 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1483 for (ptr = s+len-1; ptr >= s; ptr--)
1484 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
1485 !isSPACE_utf8(ptr)) break;
1486 if (ptr >= s) ptr += UTF8SKIP(ptr);
1489 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1491 for (ptr = s+len-1; ptr >= s; ptr--)
1492 if (*ptr != 0 && !isSPACE(*ptr)) break;
1495 sv = newSVpvn(s, ptr-s);
1496 } else sv = newSVpvn(s, len);
1500 /* Undo any upgrade done due to need_utf8() */
1501 if (!(symptr->flags & FLAG_WAS_UTF8))
1502 sv_utf8_downgrade(sv, 0);
1510 if (howlen == e_star || len > (strend - s) * 8)
1511 len = (strend - s) * 8;
1514 while (len >= 8 && s < strend) {
1515 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
1520 cuv += PL_bitcount[*(U8 *)s++];
1523 if (len && s < strend) {
1525 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1526 if (datumtype == 'b')
1528 if (bits & 1) cuv++;
1533 if (bits & 0x80) cuv++;
1540 sv = sv_2mortal(newSV(len ? len : 1));
1543 if (datumtype == 'b') {
1545 const I32 ai32 = len;
1546 for (len = 0; len < ai32; len++) {
1547 if (len & 7) bits >>= 1;
1549 if (s >= strend) break;
1550 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1551 } else bits = *(U8 *) s++;
1552 *str++ = bits & 1 ? '1' : '0';
1556 const I32 ai32 = len;
1557 for (len = 0; len < ai32; len++) {
1558 if (len & 7) bits <<= 1;
1560 if (s >= strend) break;
1561 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1562 } else bits = *(U8 *) s++;
1563 *str++ = bits & 0x80 ? '1' : '0';
1567 SvCUR_set(sv, str - SvPVX_const(sv));
1574 /* Preliminary length estimate, acceptable for utf8 too */
1575 if (howlen == e_star || len > (strend - s) * 2)
1576 len = (strend - s) * 2;
1578 sv = sv_2mortal(newSV(len ? len : 1));
1582 if (datumtype == 'h') {
1585 for (len = 0; len < ai32; len++) {
1586 if (len & 1) bits >>= 4;
1588 if (s >= strend) break;
1589 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1590 } else bits = * (U8 *) s++;
1592 *str++ = PL_hexdigit[bits & 15];
1596 const I32 ai32 = len;
1597 for (len = 0; len < ai32; len++) {
1598 if (len & 1) bits <<= 4;
1600 if (s >= strend) break;
1601 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1602 } else bits = *(U8 *) s++;
1604 *str++ = PL_hexdigit[(bits >> 4) & 15];
1609 SvCUR_set(sv, str - SvPVX_const(sv));
1616 if (explicit_length)
1617 /* Switch to "character" mode */
1618 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1623 while (len-- > 0 && s < strend) {
1628 aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1629 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1630 if (retlen == (STRLEN) -1 || retlen == 0)
1631 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1635 aint = *(U8 *)(s)++;
1636 if (aint >= 128 && datumtype != 'C') /* fake up signed chars */
1640 else if (checksum > bits_in_uv)
1641 cdouble += (NV)aint;
1649 while (len-- > 0 && s < strend) {
1651 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1652 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1653 if (retlen == (STRLEN) -1 || retlen == 0)
1654 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1658 else if (checksum > bits_in_uv)
1659 cdouble += (NV) val;
1663 } else if (!checksum)
1665 const U8 ch = *(U8 *) s++;
1668 else if (checksum > bits_in_uv)
1669 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1671 while (len-- > 0) cuv += *(U8 *) s++;
1675 if (explicit_length && howlen != e_star) {
1676 /* Switch to "bytes in UTF-8" mode */
1677 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
1679 /* Should be impossible due to the need_utf8() test */
1680 Perl_croak(aTHX_ "U0 mode on a byte string");
1684 if (len > strend - s) len = strend - s;
1686 if (len && unpack_only_one) len = 1;
1690 while (len-- > 0 && s < strend) {
1694 U8 result[UTF8_MAXLEN];
1695 const char *ptr = s;
1697 /* Bug: warns about bad utf8 even if we are short on bytes
1698 and will break out of the loop */
1699 if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1702 len = UTF8SKIP(result);
1703 if (!uni_to_bytes(aTHX_ &ptr, strend,
1704 (char *) &result[1], len-1, 'U')) break;
1705 auv = utf8n_to_uvuni(result, len, &retlen, UTF8_ALLOW_DEFAULT);
1708 auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, UTF8_ALLOW_DEFAULT);
1709 if (retlen == (STRLEN) -1 || retlen == 0)
1710 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1715 else if (checksum > bits_in_uv)
1716 cdouble += (NV) auv;
1721 case 's' | TYPE_IS_SHRIEKING:
1722 #if SHORTSIZE != SIZE16
1725 SHIFT_VAR(utf8, s, strend, ashort, datumtype);
1726 DO_BO_UNPACK(ashort, s);
1729 else if (checksum > bits_in_uv)
1730 cdouble += (NV)ashort;
1742 #if U16SIZE > SIZE16
1745 SHIFT16(utf8, s, strend, &ai16, datumtype);
1746 DO_BO_UNPACK(ai16, 16);
1747 #if U16SIZE > SIZE16
1753 else if (checksum > bits_in_uv)
1754 cdouble += (NV)ai16;
1759 case 'S' | TYPE_IS_SHRIEKING:
1760 #if SHORTSIZE != SIZE16
1762 unsigned short aushort;
1763 SHIFT_VAR(utf8, s, strend, aushort, datumtype);
1764 DO_BO_UNPACK(aushort, s);
1767 else if (checksum > bits_in_uv)
1768 cdouble += (NV)aushort;
1781 #if U16SIZE > SIZE16
1784 SHIFT16(utf8, s, strend, &au16, datumtype);
1785 DO_BO_UNPACK(au16, 16);
1787 if (datumtype == 'n')
1788 au16 = PerlSock_ntohs(au16);
1791 if (datumtype == 'v')
1796 else if (checksum > bits_in_uv)
1797 cdouble += (NV) au16;
1802 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1803 case 'v' | TYPE_IS_SHRIEKING:
1804 case 'n' | TYPE_IS_SHRIEKING:
1807 # if U16SIZE > SIZE16
1810 SHIFT16(utf8, s, strend, &ai16, datumtype);
1812 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
1813 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1814 # endif /* HAS_NTOHS */
1816 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
1817 ai16 = (I16) vtohs((U16) ai16);
1818 # endif /* HAS_VTOHS */
1821 else if (checksum > bits_in_uv)
1822 cdouble += (NV) ai16;
1827 #endif /* PERL_PACK_CAN_SHRIEKSIGN */
1829 case 'i' | TYPE_IS_SHRIEKING:
1832 SHIFT_VAR(utf8, s, strend, aint, datumtype);
1833 DO_BO_UNPACK(aint, i);
1836 else if (checksum > bits_in_uv)
1837 cdouble += (NV)aint;
1843 case 'I' | TYPE_IS_SHRIEKING:
1846 SHIFT_VAR(utf8, s, strend, auint, datumtype);
1847 DO_BO_UNPACK(auint, i);
1850 else if (checksum > bits_in_uv)
1851 cdouble += (NV)auint;
1859 SHIFT_VAR(utf8, s, strend, aiv, datumtype);
1860 #if IVSIZE == INTSIZE
1861 DO_BO_UNPACK(aiv, i);
1862 #elif IVSIZE == LONGSIZE
1863 DO_BO_UNPACK(aiv, l);
1864 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
1865 DO_BO_UNPACK(aiv, 64);
1867 Perl_croak(aTHX_ "'j' not supported on this platform");
1871 else if (checksum > bits_in_uv)
1880 SHIFT_VAR(utf8, s, strend, auv, datumtype);
1881 #if IVSIZE == INTSIZE
1882 DO_BO_UNPACK(auv, i);
1883 #elif IVSIZE == LONGSIZE
1884 DO_BO_UNPACK(auv, l);
1885 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
1886 DO_BO_UNPACK(auv, 64);
1888 Perl_croak(aTHX_ "'J' not supported on this platform");
1892 else if (checksum > bits_in_uv)
1898 case 'l' | TYPE_IS_SHRIEKING:
1899 #if LONGSIZE != SIZE32
1902 SHIFT_VAR(utf8, s, strend, along, datumtype);
1903 DO_BO_UNPACK(along, l);
1906 else if (checksum > bits_in_uv)
1907 cdouble += (NV)along;
1918 #if U32SIZE > SIZE32
1921 SHIFT32(utf8, s, strend, &ai32, datumtype);
1922 DO_BO_UNPACK(ai32, 32);
1923 #if U32SIZE > SIZE32
1924 if (ai32 > 2147483647) ai32 -= 4294967296;
1928 else if (checksum > bits_in_uv)
1929 cdouble += (NV)ai32;
1934 case 'L' | TYPE_IS_SHRIEKING:
1935 #if LONGSIZE != SIZE32
1937 unsigned long aulong;
1938 SHIFT_VAR(utf8, s, strend, aulong, datumtype);
1939 DO_BO_UNPACK(aulong, l);
1942 else if (checksum > bits_in_uv)
1943 cdouble += (NV)aulong;
1956 #if U32SIZE > SIZE32
1959 SHIFT32(utf8, s, strend, &au32, datumtype);
1960 DO_BO_UNPACK(au32, 32);
1962 if (datumtype == 'N')
1963 au32 = PerlSock_ntohl(au32);
1966 if (datumtype == 'V')
1971 else if (checksum > bits_in_uv)
1972 cdouble += (NV)au32;
1977 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1978 case 'V' | TYPE_IS_SHRIEKING:
1979 case 'N' | TYPE_IS_SHRIEKING:
1982 # if U32SIZE > SIZE32
1985 SHIFT32(utf8, s, strend, &ai32, datumtype);
1987 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1988 ai32 = (I32)PerlSock_ntohl((U32)ai32);
1991 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1992 ai32 = (I32)vtohl((U32)ai32);
1996 else if (checksum > bits_in_uv)
1997 cdouble += (NV)ai32;
2002 #endif /* PERL_PACK_CAN_SHRIEKSIGN */
2006 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
2007 DO_BO_UNPACK_PC(aptr);
2008 /* newSVpv generates undef if aptr is NULL */
2009 mPUSHs(newSVpv(aptr, 0));
2017 while (len > 0 && s < strend) {
2019 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
2020 auv = (auv << 7) | (ch & 0x7f);
2021 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
2029 if (++bytes >= sizeof(UV)) { /* promote to string */
2032 sv = Perl_newSVpvf(aTHX_ "%.*"UVuf, (int)TYPE_DIGITS(UV), auv);
2033 while (s < strend) {
2034 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
2035 sv = mul128(sv, (U8)(ch & 0x7f));
2041 t = SvPV_nolen_const(sv);
2050 if ((s >= strend) && bytes)
2051 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
2055 if (symptr->howlen == e_star)
2056 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
2058 if (s + sizeof(char*) <= strend) {
2060 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
2061 DO_BO_UNPACK_PC(aptr);
2062 /* newSVpvn generates undef if aptr is NULL */
2063 PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP));
2070 SHIFT_VAR(utf8, s, strend, aquad, datumtype);
2071 DO_BO_UNPACK(aquad, 64);
2073 mPUSHs(aquad >= IV_MIN && aquad <= IV_MAX ?
2074 newSViv((IV)aquad) : newSVnv((NV)aquad));
2075 else if (checksum > bits_in_uv)
2076 cdouble += (NV)aquad;
2084 SHIFT_VAR(utf8, s, strend, auquad, datumtype);
2085 DO_BO_UNPACK(auquad, 64);
2087 mPUSHs(auquad <= UV_MAX ?
2088 newSVuv((UV)auquad) : newSVnv((NV)auquad));
2089 else if (checksum > bits_in_uv)
2090 cdouble += (NV)auquad;
2095 #endif /* HAS_QUAD */
2096 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
2100 SHIFT_VAR(utf8, s, strend, afloat, datumtype);
2101 DO_BO_UNPACK_N(afloat, float);
2111 SHIFT_VAR(utf8, s, strend, adouble, datumtype);
2112 DO_BO_UNPACK_N(adouble, double);
2122 SHIFT_BYTES(utf8, s, strend, anv.bytes, sizeof(anv.bytes), datumtype);
2123 DO_BO_UNPACK_N(anv.nv, NV);
2130 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2134 SHIFT_BYTES(utf8, s, strend, aldouble.bytes, sizeof(aldouble.bytes), datumtype);
2135 DO_BO_UNPACK_N(aldouble.ld, long double);
2137 mPUSHn(aldouble.ld);
2139 cdouble += aldouble.ld;
2145 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
2146 sv = sv_2mortal(newSV(l));
2147 if (l) SvPOK_on(sv);
2150 while (next_uni_uu(aTHX_ &s, strend, &len)) {
2155 next_uni_uu(aTHX_ &s, strend, &a);
2156 next_uni_uu(aTHX_ &s, strend, &b);
2157 next_uni_uu(aTHX_ &s, strend, &c);
2158 next_uni_uu(aTHX_ &s, strend, &d);
2159 hunk[0] = (char)((a << 2) | (b >> 4));
2160 hunk[1] = (char)((b << 4) | (c >> 2));
2161 hunk[2] = (char)((c << 6) | d);
2163 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2171 /* possible checksum byte */
2172 const char *skip = s+UTF8SKIP(s);
2173 if (skip < strend && *skip == '\n')
2179 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
2183 len = PL_uudmap[*(U8*)s++] & 077;
2185 if (s < strend && ISUUCHAR(*s))
2186 a = PL_uudmap[*(U8*)s++] & 077;
2189 if (s < strend && ISUUCHAR(*s))
2190 b = PL_uudmap[*(U8*)s++] & 077;
2193 if (s < strend && ISUUCHAR(*s))
2194 c = PL_uudmap[*(U8*)s++] & 077;
2197 if (s < strend && ISUUCHAR(*s))
2198 d = PL_uudmap[*(U8*)s++] & 077;
2201 hunk[0] = (char)((a << 2) | (b >> 4));
2202 hunk[1] = (char)((b << 4) | (c >> 2));
2203 hunk[2] = (char)((c << 6) | d);
2205 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2210 else /* possible checksum byte */
2211 if (s + 1 < strend && s[1] == '\n')
2221 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
2222 (checksum > bits_in_uv &&
2223 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
2226 anv = (NV) (1 << (checksum & 15));
2227 while (checksum >= 16) {
2231 while (cdouble < 0.0)
2233 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
2234 sv = newSVnv(cdouble);
2237 if (checksum < bits_in_uv) {
2238 UV mask = ((UV)1 << checksum) - 1;
2247 if (symptr->flags & FLAG_SLASH){
2248 if (SP - PL_stack_base - start_sp_offset <= 0)
2250 if( next_symbol(symptr) ){
2251 if( symptr->howlen == e_number )
2252 Perl_croak(aTHX_ "Count after length/code in unpack" );
2254 /* ...end of char buffer then no decent length available */
2255 Perl_croak(aTHX_ "length/code after end of string in unpack" );
2257 /* take top of stack (hope it's numeric) */
2260 Perl_croak(aTHX_ "Negative '/' count in unpack" );
2263 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
2265 datumtype = symptr->code;
2266 explicit_length = FALSE;
2274 return SP - PL_stack_base - start_sp_offset;
2282 I32 gimme = GIMME_V;
2285 const char *pat = SvPV_const(left, llen);
2286 const char *s = SvPV_const(right, rlen);
2287 const char *strend = s + rlen;
2288 const char *patend = pat + llen;
2292 cnt = unpackstring(pat, patend, s, strend,
2293 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
2294 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
2297 if ( !cnt && gimme == G_SCALAR )
2298 PUSHs(&PL_sv_undef);
2303 doencodes(U8 *h, const char *s, I32 len)
2305 *h++ = PL_uuemap[len];
2307 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2308 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
2309 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
2310 *h++ = PL_uuemap[(077 & (s[2] & 077))];
2315 const char r = (len > 1 ? s[1] : '\0');
2316 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2317 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
2318 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
2319 *h++ = PL_uuemap[0];
2326 S_is_an_int(pTHX_ const char *s, STRLEN l)
2328 SV *result = newSVpvn(s, l);
2329 char *const result_c = SvPV_nolen(result); /* convenience */
2330 char *out = result_c;
2334 PERL_ARGS_ASSERT_IS_AN_INT;
2342 SvREFCNT_dec(result);
2365 SvREFCNT_dec(result);
2371 SvCUR_set(result, out - result_c);
2375 /* pnum must be '\0' terminated */
2377 S_div128(pTHX_ SV *pnum, bool *done)
2380 char * const s = SvPV(pnum, len);
2384 PERL_ARGS_ASSERT_DIV128;
2388 const int i = m * 10 + (*t - '0');
2389 const int r = (i >> 7); /* r < 10 */
2397 SvCUR_set(pnum, (STRLEN) (t - s));
2402 =for apidoc packlist
2404 The engine implementing pack() Perl function.
2410 Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, SV **beglist, SV **endlist )
2415 PERL_ARGS_ASSERT_PACKLIST;
2417 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
2419 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
2420 Also make sure any UTF8 flag is loaded */
2421 SvPV_force_nolen(cat);
2423 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
2425 (void)pack_rec( cat, &sym, beglist, endlist );
2428 /* like sv_utf8_upgrade, but also repoint the group start markers */
2430 marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2433 const char *from_ptr, *from_start, *from_end, **marks, **m;
2434 char *to_start, *to_ptr;
2436 if (SvUTF8(sv)) return;
2438 from_start = SvPVX_const(sv);
2439 from_end = from_start + SvCUR(sv);
2440 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2441 if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
2442 if (from_ptr == from_end) {
2443 /* Simple case: no character needs to be changed */
2448 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
2449 Newx(to_start, len, char);
2450 Copy(from_start, to_start, from_ptr-from_start, char);
2451 to_ptr = to_start + (from_ptr-from_start);
2453 Newx(marks, sym_ptr->level+2, const char *);
2454 for (group=sym_ptr; group; group = group->previous)
2455 marks[group->level] = from_start + group->strbeg;
2456 marks[sym_ptr->level+1] = from_end+1;
2457 for (m = marks; *m < from_ptr; m++)
2458 *m = to_start + (*m-from_start);
2460 for (;from_ptr < from_end; from_ptr++) {
2461 while (*m == from_ptr) *m++ = to_ptr;
2462 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
2466 while (*m == from_ptr) *m++ = to_ptr;
2467 if (m != marks + sym_ptr->level+1) {
2470 Perl_croak(aTHX_ "panic: marks beyond string end, m=%p, marks=%p, "
2471 "level=%d", m, marks, sym_ptr->level);
2473 for (group=sym_ptr; group; group = group->previous)
2474 group->strbeg = marks[group->level] - to_start;
2479 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
2480 from_start -= SvIVX(sv);
2483 SvFLAGS(sv) &= ~SVf_OOK;
2486 Safefree(from_start);
2487 SvPV_set(sv, to_start);
2488 SvCUR_set(sv, to_ptr - to_start);
2493 /* Exponential string grower. Makes string extension effectively O(n)
2494 needed says how many extra bytes we need (not counting the final '\0')
2495 Only grows the string if there is an actual lack of space
2498 S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
2499 const STRLEN cur = SvCUR(sv);
2500 const STRLEN len = SvLEN(sv);
2503 PERL_ARGS_ASSERT_SV_EXP_GROW;
2505 if (len - cur > needed) return SvPVX(sv);
2506 extend = needed > len ? needed : len;
2507 return SvGROW(sv, len+extend+1);
2512 S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
2515 tempsym_t lookahead;
2516 I32 items = endlist - beglist;
2517 bool found = next_symbol(symptr);
2518 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
2519 bool warn_utf8 = ckWARN(WARN_UTF8);
2521 PERL_ARGS_ASSERT_PACK_REC;
2523 if (symptr->level == 0 && found && symptr->code == 'U') {
2524 marked_upgrade(aTHX_ cat, symptr);
2525 symptr->flags |= FLAG_DO_UTF8;
2528 symptr->strbeg = SvCUR(cat);
2534 SV *lengthcode = NULL;
2535 I32 datumtype = symptr->code;
2536 howlen_t howlen = symptr->howlen;
2537 char *start = SvPVX(cat);
2538 char *cur = start + SvCUR(cat);
2540 #define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2544 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2548 /* e_no_len and e_number */
2549 len = symptr->length;
2554 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
2556 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2557 /* We can process this letter. */
2558 STRLEN size = props & PACK_SIZE_MASK;
2559 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2563 /* Look ahead for next symbol. Do we have code/code? */
2564 lookahead = *symptr;
2565 found = next_symbol(&lookahead);
2566 if (symptr->flags & FLAG_SLASH) {
2568 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
2569 if (strchr("aAZ", lookahead.code)) {
2570 if (lookahead.howlen == e_number) count = lookahead.length;
2573 count = sv_len_utf8(*beglist);
2576 if (lookahead.code == 'Z') count++;
2579 if (lookahead.howlen == e_number && lookahead.length < items)
2580 count = lookahead.length;
2583 lookahead.howlen = e_number;
2584 lookahead.length = count;
2585 lengthcode = sv_2mortal(newSViv(count));
2588 /* Code inside the switch must take care to properly update
2589 cat (CUR length and '\0' termination) if it updated *cur and
2590 doesn't simply leave using break */
2591 switch(TYPE_NO_ENDIANNESS(datumtype)) {
2593 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2594 (int) TYPE_NO_MODIFIERS(datumtype));
2596 Perl_croak(aTHX_ "'%%' may not be used in pack");
2599 #ifdef PERL_PACK_CAN_SHRIEKSIGN
2600 case '.' | TYPE_IS_SHRIEKING:
2603 if (howlen == e_star) from = start;
2604 else if (len == 0) from = cur;
2606 tempsym_t *group = symptr;
2608 while (--len && group) group = group->previous;
2609 from = group ? start + group->strbeg : start;
2612 len = SvIV(fromstr);
2614 #ifdef PERL_PACK_CAN_SHRIEKSIGN
2615 case '@' | TYPE_IS_SHRIEKING:
2618 from = start + symptr->strbeg;
2620 #ifdef PERL_PACK_CAN_SHRIEKSIGN
2621 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
2622 #else /* PERL_PACK_CAN_SHRIEKSIGN */
2626 while (len && from < cur) {
2627 from += UTF8SKIP(from);
2631 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2633 /* Here we know from == cur */
2635 GROWING(0, cat, start, cur, len);
2636 Zero(cur, len, char);
2638 } else if (from < cur) {
2641 } else goto no_change;
2649 if (len > 0) goto grow;
2650 if (len == 0) goto no_change;
2657 tempsym_t savsym = *symptr;
2658 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2659 symptr->flags |= group_modifiers;
2660 symptr->patend = savsym.grpend;
2662 symptr->previous = &lookahead;
2665 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2666 else symptr->flags &= ~FLAG_PARSE_UTF8;
2667 was_utf8 = SvUTF8(cat);
2668 symptr->patptr = savsym.grpbeg;
2669 beglist = pack_rec(cat, symptr, beglist, endlist);
2670 if (SvUTF8(cat) != was_utf8)
2671 /* This had better be an upgrade while in utf8==0 mode */
2674 if (savsym.howlen == e_star && beglist == endlist)
2675 break; /* No way to continue */
2677 items = endlist - beglist;
2678 lookahead.flags = symptr->flags & ~group_modifiers;
2681 case 'X' | TYPE_IS_SHRIEKING:
2682 if (!len) /* Avoid division by 0 */
2689 hop += UTF8SKIP(hop);
2696 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2700 len = (cur-start) % len;
2704 if (len < 1) goto no_change;
2708 Perl_croak(aTHX_ "'%c' outside of string in pack",
2709 (int) TYPE_NO_MODIFIERS(datumtype));
2710 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2712 Perl_croak(aTHX_ "'%c' outside of string in pack",
2713 (int) TYPE_NO_MODIFIERS(datumtype));
2719 if (cur - start < len)
2720 Perl_croak(aTHX_ "'%c' outside of string in pack",
2721 (int) TYPE_NO_MODIFIERS(datumtype));
2724 if (cur < start+symptr->strbeg) {
2725 /* Make sure group starts don't point into the void */
2727 const STRLEN length = cur-start;
2728 for (group = symptr;
2729 group && length < group->strbeg;
2730 group = group->previous) group->strbeg = length;
2731 lookahead.strbeg = length;
2734 case 'x' | TYPE_IS_SHRIEKING: {
2736 if (!len) /* Avoid division by 0 */
2738 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
2739 else ai32 = (cur - start) % len;
2740 if (ai32 == 0) goto no_change;
2752 aptr = SvPV_const(fromstr, fromlen);
2753 if (DO_UTF8(fromstr)) {
2754 const char *end, *s;
2756 if (!utf8 && !SvUTF8(cat)) {
2757 marked_upgrade(aTHX_ cat, symptr);
2758 lookahead.flags |= FLAG_DO_UTF8;
2759 lookahead.strbeg = symptr->strbeg;
2762 cur = start + SvCUR(cat);
2764 if (howlen == e_star) {
2765 if (utf8) goto string_copy;
2769 end = aptr + fromlen;
2770 fromlen = datumtype == 'Z' ? len-1 : len;
2771 while ((I32) fromlen > 0 && s < end) {
2776 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2779 if (datumtype == 'Z') len++;
2785 fromlen = len - fromlen;
2786 if (datumtype == 'Z') fromlen--;
2787 if (howlen == e_star) {
2789 if (datumtype == 'Z') len++;
2791 GROWING(0, cat, start, cur, len);
2792 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
2793 datumtype | TYPE_IS_PACK))
2794 Perl_croak(aTHX_ "panic: predicted utf8 length not available, "
2795 "for '%c', aptr=%p end=%p cur=%p, fromlen=%"UVuf,
2796 (int)datumtype, aptr, end, cur, (UV)fromlen);
2800 if (howlen == e_star) {
2802 if (datumtype == 'Z') len++;
2804 if (len <= (I32) fromlen) {
2806 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2808 /* assumes a byte expands to at most UTF8_EXPAND bytes on
2810 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2811 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
2813 while (fromlen > 0) {
2814 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
2820 if (howlen == e_star) {
2822 if (datumtype == 'Z') len++;
2824 if (len <= (I32) fromlen) {
2826 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2828 GROWING(0, cat, start, cur, len);
2829 Copy(aptr, cur, fromlen, char);
2833 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2840 const char *str, *end;
2847 str = SvPV_const(fromstr, fromlen);
2848 end = str + fromlen;
2849 if (DO_UTF8(fromstr)) {
2851 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2853 utf8_source = FALSE;
2854 utf8_flags = 0; /* Unused, but keep compilers happy */
2856 if (howlen == e_star) len = fromlen;
2857 field_len = (len+7)/8;
2858 GROWING(utf8, cat, start, cur, field_len);
2859 if (len > (I32)fromlen) len = fromlen;
2862 if (datumtype == 'B')
2866 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2868 } else bits |= *str++ & 1;
2869 if (l & 7) bits <<= 1;
2871 PUSH_BYTE(utf8, cur, bits);
2876 /* datumtype == 'b' */
2880 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2881 if (val & 1) bits |= 0x80;
2882 } else if (*str++ & 1)
2884 if (l & 7) bits >>= 1;
2886 PUSH_BYTE(utf8, cur, bits);
2892 if (datumtype == 'B')
2893 bits <<= 7 - (l & 7);
2895 bits >>= 7 - (l & 7);
2896 PUSH_BYTE(utf8, cur, bits);
2899 /* Determine how many chars are left in the requested field */
2901 if (howlen == e_star) field_len = 0;
2902 else field_len -= l;
2903 Zero(cur, field_len, char);
2909 const char *str, *end;
2916 str = SvPV_const(fromstr, fromlen);
2917 end = str + fromlen;
2918 if (DO_UTF8(fromstr)) {
2920 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2922 utf8_source = FALSE;
2923 utf8_flags = 0; /* Unused, but keep compilers happy */
2925 if (howlen == e_star) len = fromlen;
2926 field_len = (len+1)/2;
2927 GROWING(utf8, cat, start, cur, field_len);
2928 if (!utf8 && len > (I32)fromlen) len = fromlen;
2931 if (datumtype == 'H')
2935 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2936 if (val < 256 && isALPHA(val))
2937 bits |= (val + 9) & 0xf;
2940 } else if (isALPHA(*str))
2941 bits |= (*str++ + 9) & 0xf;
2943 bits |= *str++ & 0xf;
2944 if (l & 1) bits <<= 4;
2946 PUSH_BYTE(utf8, cur, bits);
2954 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2955 if (val < 256 && isALPHA(val))
2956 bits |= ((val + 9) & 0xf) << 4;
2958 bits |= (val & 0xf) << 4;
2959 } else if (isALPHA(*str))
2960 bits |= ((*str++ + 9) & 0xf) << 4;
2962 bits |= (*str++ & 0xf) << 4;
2963 if (l & 1) bits >>= 4;
2965 PUSH_BYTE(utf8, cur, bits);
2971 PUSH_BYTE(utf8, cur, bits);
2974 /* Determine how many chars are left in the requested field */
2976 if (howlen == e_star) field_len = 0;
2977 else field_len -= l;
2978 Zero(cur, field_len, char);
2986 aiv = SvIV(fromstr);
2987 if ((-128 > aiv || aiv > 127))
2988 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2989 "Character in 'c' format wrapped in pack");
2990 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
2995 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
3001 aiv = SvIV(fromstr);
3002 if ((0 > aiv || aiv > 0xff))
3003 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3004 "Character in 'C' format wrapped in pack");
3005 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
3010 U8 in_bytes = (U8)IN_BYTES;
3012 end = start+SvLEN(cat)-1;
3013 if (utf8) end -= UTF8_MAXLEN-1;
3017 auv = SvUV(fromstr);
3018 if (in_bytes) auv = auv % 0x100;
3023 SvCUR_set(cat, cur - start);
3025 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3026 end = start+SvLEN(cat)-UTF8_MAXLEN;
3028 cur = (char *) uvuni_to_utf8_flags((U8 *) cur,
3031 0 : UNICODE_ALLOW_ANY);
3036 SvCUR_set(cat, cur - start);
3037 marked_upgrade(aTHX_ cat, symptr);
3038 lookahead.flags |= FLAG_DO_UTF8;
3039 lookahead.strbeg = symptr->strbeg;
3042 cur = start + SvCUR(cat);
3043 end = start+SvLEN(cat)-UTF8_MAXLEN;
3046 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3047 "Character in 'W' format wrapped in pack");
3052 SvCUR_set(cat, cur - start);
3053 GROWING(0, cat, start, cur, len+1);
3054 end = start+SvLEN(cat)-1;
3056 *(U8 *) cur++ = (U8)auv;
3065 if (!(symptr->flags & FLAG_DO_UTF8)) {
3066 marked_upgrade(aTHX_ cat, symptr);
3067 lookahead.flags |= FLAG_DO_UTF8;
3068 lookahead.strbeg = symptr->strbeg;
3074 end = start+SvLEN(cat);
3075 if (!utf8) end -= UTF8_MAXLEN;
3079 auv = SvUV(fromstr);
3081 U8 buffer[UTF8_MAXLEN], *endb;
3082 endb = uvuni_to_utf8_flags(buffer, auv,
3084 0 : UNICODE_ALLOW_ANY);
3085 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
3087 SvCUR_set(cat, cur - start);
3088 GROWING(0, cat, start, cur,
3089 len+(endb-buffer)*UTF8_EXPAND);
3090 end = start+SvLEN(cat);
3092 cur = bytes_to_uni(buffer, endb-buffer, cur);
3096 SvCUR_set(cat, cur - start);
3097 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3098 end = start+SvLEN(cat)-UTF8_MAXLEN;
3100 cur = (char *) uvuni_to_utf8_flags((U8 *) cur, auv,
3102 0 : UNICODE_ALLOW_ANY);
3107 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
3113 anv = SvNV(fromstr);
3114 # if defined(VMS) && !defined(_IEEE_FP)
3115 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
3116 * on Alpha; fake it if we don't have them.
3120 else if (anv < -FLT_MAX)
3122 else afloat = (float)anv;
3124 afloat = (float)anv;
3126 DO_BO_PACK_N(afloat, float);
3127 PUSH_VAR(utf8, cur, afloat);
3135 anv = SvNV(fromstr);
3136 # if defined(VMS) && !defined(_IEEE_FP)
3137 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
3138 * on Alpha; fake it if we don't have them.
3142 else if (anv < -DBL_MAX)
3144 else adouble = (double)anv;
3146 adouble = (double)anv;
3148 DO_BO_PACK_N(adouble, double);
3149 PUSH_VAR(utf8, cur, adouble);
3154 Zero(&anv, 1, NV); /* can be long double with unused bits */
3158 /* to work round a gcc/x86 bug; don't use SvNV */
3159 anv.nv = sv_2nv(fromstr);
3161 anv.nv = SvNV(fromstr);
3163 DO_BO_PACK_N(anv, NV);
3164 PUSH_BYTES(utf8, cur, anv.bytes, sizeof(anv.bytes));
3168 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
3171 /* long doubles can have unused bits, which may be nonzero */
3172 Zero(&aldouble, 1, long double);
3176 /* to work round a gcc/x86 bug; don't use SvNV */
3177 aldouble.ld = (long double)sv_2nv(fromstr);
3179 aldouble.ld = (long double)SvNV(fromstr);
3181 DO_BO_PACK_N(aldouble, long double);
3182 PUSH_BYTES(utf8, cur, aldouble.bytes, sizeof(aldouble.bytes));
3187 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3188 case 'n' | TYPE_IS_SHRIEKING:
3194 ai16 = (I16)SvIV(fromstr);
3196 ai16 = PerlSock_htons(ai16);
3198 PUSH16(utf8, cur, &ai16);
3201 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3202 case 'v' | TYPE_IS_SHRIEKING:
3208 ai16 = (I16)SvIV(fromstr);
3212 PUSH16(utf8, cur, &ai16);
3215 case 'S' | TYPE_IS_SHRIEKING:
3216 #if SHORTSIZE != SIZE16
3218 unsigned short aushort;
3220 aushort = SvUV(fromstr);
3221 DO_BO_PACK(aushort, s);
3222 PUSH_VAR(utf8, cur, aushort);
3232 au16 = (U16)SvUV(fromstr);
3233 DO_BO_PACK(au16, 16);
3234 PUSH16(utf8, cur, &au16);
3237 case 's' | TYPE_IS_SHRIEKING:
3238 #if SHORTSIZE != SIZE16
3242 ashort = SvIV(fromstr);
3243 DO_BO_PACK(ashort, s);
3244 PUSH_VAR(utf8, cur, ashort);
3254 ai16 = (I16)SvIV(fromstr);
3255 DO_BO_PACK(ai16, 16);
3256 PUSH16(utf8, cur, &ai16);
3260 case 'I' | TYPE_IS_SHRIEKING:
3264 auint = SvUV(fromstr);
3265 DO_BO_PACK(auint, i);
3266 PUSH_VAR(utf8, cur, auint);
3273 aiv = SvIV(fromstr);
3274 #if IVSIZE == INTSIZE
3276 #elif IVSIZE == LONGSIZE
3278 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
3279 DO_BO_PACK(aiv, 64);
3281 Perl_croak(aTHX_ "'j' not supported on this platform");
3283 PUSH_VAR(utf8, cur, aiv);
3290 auv = SvUV(fromstr);
3291 #if UVSIZE == INTSIZE
3293 #elif UVSIZE == LONGSIZE
3295 #elif defined(HAS_QUAD) && UVSIZE == U64SIZE
3296 DO_BO_PACK(auv, 64);
3298 Perl_croak(aTHX_ "'J' not supported on this platform");
3300 PUSH_VAR(utf8, cur, auv);
3307 anv = SvNV(fromstr);
3311 SvCUR_set(cat, cur - start);
3312 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
3315 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
3316 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
3317 any negative IVs will have already been got by the croak()
3318 above. IOK is untrue for fractions, so we test them
3319 against UV_MAX_P1. */
3320 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
3321 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
3322 char *in = buf + sizeof(buf);
3323 UV auv = SvUV(fromstr);
3326 *--in = (char)((auv & 0x7f) | 0x80);
3329 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
3330 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3331 in, (buf + sizeof(buf)) - in);
3332 } else if (SvPOKp(fromstr))
3334 else if (SvNOKp(fromstr)) {
3335 /* 10**NV_MAX_10_EXP is the largest power of 10
3336 so 10**(NV_MAX_10_EXP+1) is definitely unrepresentable
3337 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
3338 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
3339 And with that many bytes only Inf can overflow.
3340 Some C compilers are strict about integral constant
3341 expressions so we conservatively divide by a slightly
3342 smaller integer instead of multiplying by the exact
3343 floating-point value.
3345 #ifdef NV_MAX_10_EXP
3346 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
3347 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
3349 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
3350 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
3352 char *in = buf + sizeof(buf);
3354 anv = Perl_floor(anv);
3356 const NV next = Perl_floor(anv / 128);
3357 if (in <= buf) /* this cannot happen ;-) */
3358 Perl_croak(aTHX_ "Cannot compress integer in pack");
3359 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
3362 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
3363 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3364 in, (buf + sizeof(buf)) - in);
3373 /* Copy string and check for compliance */
3374 from = SvPV_const(fromstr, len);
3375 if ((norm = is_an_int(from, len)) == NULL)
3376 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
3378 Newx(result, len, char);
3381 while (!done) *--in = div128(norm, &done) | 0x80;
3382 result[len - 1] &= 0x7F; /* clear continue bit */
3383 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3384 in, (result + len) - in);
3386 SvREFCNT_dec(norm); /* free norm */
3391 case 'i' | TYPE_IS_SHRIEKING:
3395 aint = SvIV(fromstr);
3396 DO_BO_PACK(aint, i);
3397 PUSH_VAR(utf8, cur, aint);
3400 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3401 case 'N' | TYPE_IS_SHRIEKING:
3407 au32 = SvUV(fromstr);
3409 au32 = PerlSock_htonl(au32);
3411 PUSH32(utf8, cur, &au32);
3414 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3415 case 'V' | TYPE_IS_SHRIEKING:
3421 au32 = SvUV(fromstr);
3425 PUSH32(utf8, cur, &au32);
3428 case 'L' | TYPE_IS_SHRIEKING:
3429 #if LONGSIZE != SIZE32
3431 unsigned long aulong;
3433 aulong = SvUV(fromstr);
3434 DO_BO_PACK(aulong, l);
3435 PUSH_VAR(utf8, cur, aulong);
3445 au32 = SvUV(fromstr);
3446 DO_BO_PACK(au32, 32);
3447 PUSH32(utf8, cur, &au32);
3450 case 'l' | TYPE_IS_SHRIEKING:
3451 #if LONGSIZE != SIZE32
3455 along = SvIV(fromstr);
3456 DO_BO_PACK(along, l);
3457 PUSH_VAR(utf8, cur, along);
3467 ai32 = SvIV(fromstr);
3468 DO_BO_PACK(ai32, 32);
3469 PUSH32(utf8, cur, &ai32);
3477 auquad = (Uquad_t) SvUV(fromstr);
3478 DO_BO_PACK(auquad, 64);
3479 PUSH_VAR(utf8, cur, auquad);
3486 aquad = (Quad_t)SvIV(fromstr);
3487 DO_BO_PACK(aquad, 64);
3488 PUSH_VAR(utf8, cur, aquad);
3491 #endif /* HAS_QUAD */
3493 len = 1; /* assume SV is correct length */
3494 GROWING(utf8, cat, start, cur, sizeof(char *));
3501 SvGETMAGIC(fromstr);
3502 if (!SvOK(fromstr)) aptr = NULL;
3504 /* XXX better yet, could spirit away the string to
3505 * a safe spot and hang on to it until the result
3506 * of pack() (and all copies of the result) are
3509 if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
3510 !SvREADONLY(fromstr)))) {
3511 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3512 "Attempt to pack pointer to temporary value");
3514 if (SvPOK(fromstr) || SvNIOK(fromstr))
3515 aptr = SvPV_nomg_const_nolen(fromstr);
3517 aptr = SvPV_force_flags_nolen(fromstr, 0);
3519 DO_BO_PACK_PC(aptr);
3520 PUSH_VAR(utf8, cur, aptr);
3524 const char *aptr, *aend;
3528 if (len <= 2) len = 45;
3529 else len = len / 3 * 3;
3531 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3532 "Field too wide in 'u' format in pack");
3535 aptr = SvPV_const(fromstr, fromlen);
3536 from_utf8 = DO_UTF8(fromstr);
3538 aend = aptr + fromlen;
3539 fromlen = sv_len_utf8_nomg(fromstr);
3540 } else aend = NULL; /* Unused, but keep compilers happy */
3541 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
3542 while (fromlen > 0) {
3545 U8 hunk[1+63/3*4+1];
3547 if ((I32)fromlen > len)
3553 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3554 'u' | TYPE_IS_PACK)) {
3556 SvCUR_set(cat, cur - start);
3557 Perl_croak(aTHX_ "panic: string is shorter than advertised, "
3558 "aptr=%p, aend=%p, buffer=%p, todo=%ld",
3559 aptr, aend, buffer, (long) todo);
3561 end = doencodes(hunk, buffer, todo);
3563 end = doencodes(hunk, aptr, todo);
3566 PUSH_BYTES(utf8, cur, hunk, end-hunk);
3573 SvCUR_set(cat, cur - start);
3575 *symptr = lookahead;
3584 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
3587 SV *pat_sv = *++MARK;
3588 const char *pat = SvPV_const(pat_sv, fromlen);
3589 const char *patend = pat + fromlen;
3595 packlist(cat, pat, patend, MARK, SP + 1);
3605 * c-indentation-style: bsd
3607 * indent-tabs-mode: nil
3610 * ex: set ts=8 sts=4 sw=4 et: