3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
18 * [p.653 of _The Lord of the Rings_, IV/iv: "Of Herbs and Stewed Rabbit"]
21 /* This file contains pp ("push/pop") functions that
22 * execute the opcodes that make up a perl program. A typical pp function
23 * expects to find its arguments on the stack, and usually pushes its
24 * results onto the stack, hence the 'pp' terminology. Each OP structure
25 * contains a pointer to the relevant pp_foo() function.
27 * This particular file just contains pp_pack() and pp_unpack(). See the
28 * other pp*.c files for the rest of the pp_ functions.
32 #define PERL_IN_PP_PACK_C
35 /* Types used by pack/unpack */
37 e_no_len, /* no length */
38 e_number, /* number, [] */
42 typedef struct tempsym {
43 const char* patptr; /* current template char */
44 const char* patend; /* one after last char */
45 const char* grpbeg; /* 1st char of ()-group */
46 const char* grpend; /* end of ()-group */
47 I32 code; /* template code (!<>) */
48 I32 length; /* length/repeat count */
49 howlen_t howlen; /* how length is given */
50 int level; /* () nesting level */
51 U32 flags; /* /=4, comma=2, pack=1 */
52 /* and group modifiers */
53 STRLEN strbeg; /* offset of group start */
54 struct tempsym *previous; /* previous group */
57 #define TEMPSYM_INIT(symptr, p, e, f) \
59 (symptr)->patptr = (p); \
60 (symptr)->patend = (e); \
61 (symptr)->grpbeg = NULL; \
62 (symptr)->grpend = NULL; \
63 (symptr)->grpend = NULL; \
65 (symptr)->length = 0; \
66 (symptr)->howlen = e_no_len; \
67 (symptr)->level = 0; \
68 (symptr)->flags = (f); \
69 (symptr)->strbeg = 0; \
70 (symptr)->previous = NULL; \
78 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
81 U8 bytes[sizeof(long double)];
86 # define PERL_PACK_CAN_BYTEORDER
87 # define PERL_PACK_CAN_SHRIEKSIGN
93 /* Maximum number of bytes to which a byte can grow due to upgrade */
97 * Offset for integer pack/unpack.
99 * On architectures where I16 and I32 aren't really 16 and 32 bits,
100 * which for now are all Crays, pack and unpack have to play games.
104 * These values are required for portability of pack() output.
105 * If they're not right on your machine, then pack() and unpack()
106 * wouldn't work right anyway; you'll need to apply the Cray hack.
107 * (I'd like to check them with #if, but you can't use sizeof() in
108 * the preprocessor.) --???
111 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
112 defines are now in config.h. --Andy Dougherty April 1998
117 /* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
120 #if U16SIZE > SIZE16 || U32SIZE > SIZE32
121 # if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
122 # define OFF16(p) ((char*)(p))
123 # define OFF32(p) ((char*)(p))
125 # if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
126 # define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
127 # define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
129 ++++ bad cray byte order
133 # define OFF16(p) ((char *) (p))
134 # define OFF32(p) ((char *) (p))
137 /* Only to be used inside a loop (see the break) */
138 #define SHIFT16(utf8, s, strend, p, datumtype) STMT_START { \
140 if (!uni_to_bytes(aTHX_ &(s), strend, OFF16(p), SIZE16, datumtype)) break; \
142 Copy(s, OFF16(p), SIZE16, char); \
147 /* Only to be used inside a loop (see the break) */
148 #define SHIFT32(utf8, s, strend, p, datumtype) STMT_START { \
150 if (!uni_to_bytes(aTHX_ &(s), strend, OFF32(p), SIZE32, datumtype)) break; \
152 Copy(s, OFF32(p), SIZE32, char); \
157 #define PUSH16(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF16(p), SIZE16)
158 #define PUSH32(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF32(p), SIZE32)
160 /* Only to be used inside a loop (see the break) */
161 #define SHIFT_BYTES(utf8, s, strend, buf, len, datumtype) \
164 if (!uni_to_bytes(aTHX_ &s, strend, \
165 (char *) (buf), len, datumtype)) break; \
167 Copy(s, (char *) (buf), len, char); \
172 #define SHIFT_VAR(utf8, s, strend, var, datumtype) \
173 SHIFT_BYTES(utf8, s, strend, &(var), sizeof(var), datumtype)
175 #define PUSH_VAR(utf8, aptr, var) \
176 PUSH_BYTES(utf8, aptr, &(var), sizeof(var))
178 /* Avoid stack overflow due to pathological templates. 100 should be plenty. */
179 #define MAX_SUB_TEMPLATE_LEVEL 100
181 /* flags (note that type modifiers can also be used as flags!) */
182 #define FLAG_WAS_UTF8 0x40
183 #define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
184 #define FLAG_UNPACK_ONLY_ONE 0x10
185 #define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
186 #define FLAG_SLASH 0x04
187 #define FLAG_COMMA 0x02
188 #define FLAG_PACK 0x01
191 S_mul128(pTHX_ SV *sv, U8 m)
194 char *s = SvPV(sv, len);
197 PERL_ARGS_ASSERT_MUL128;
199 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
200 SV * const tmpNew = newSVpvs("0000000000");
202 sv_catsv(tmpNew, sv);
203 SvREFCNT_dec(sv); /* free old sv */
208 while (!*t) /* trailing '\0'? */
211 const U32 i = ((*t - '0') << 7) + m;
212 *(t--) = '0' + (char)(i % 10);
218 /* Explosives and implosives. */
220 #if 'I' == 73 && 'J' == 74
221 /* On an ASCII/ISO kind of system */
222 #define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
225 Some other sort of character set - use memchr() so we don't match
228 #define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
232 #define TYPE_IS_SHRIEKING 0x100
233 #define TYPE_IS_BIG_ENDIAN 0x200
234 #define TYPE_IS_LITTLE_ENDIAN 0x400
235 #define TYPE_IS_PACK 0x800
236 #define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
237 #define TYPE_MODIFIERS(t) ((t) & ~0xFF)
238 #define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
240 #ifdef PERL_PACK_CAN_SHRIEKSIGN
241 # define SHRIEKING_ALLOWED_TYPES "sSiIlLxXnNvV@."
243 # define SHRIEKING_ALLOWED_TYPES "sSiIlLxX"
246 #ifndef PERL_PACK_CAN_BYTEORDER
247 /* Put "can't" first because it is shorter */
248 # define TYPE_ENDIANNESS(t) 0
249 # define TYPE_NO_ENDIANNESS(t) (t)
251 # define ENDIANNESS_ALLOWED_TYPES ""
253 # define DO_BO_UNPACK(var, type)
254 # define DO_BO_PACK(var, type)
255 # define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast)
256 # define DO_BO_PACK_PTR(var, type, pre_cast, post_cast)
257 # define DO_BO_UNPACK_N(var, type)
258 # define DO_BO_PACK_N(var, type)
259 # define DO_BO_UNPACK_P(var)
260 # define DO_BO_PACK_P(var)
261 # define DO_BO_UNPACK_PC(var)
262 # define DO_BO_PACK_PC(var)
264 #else /* PERL_PACK_CAN_BYTEORDER */
266 # define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
267 # define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
269 # define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
271 # define DO_BO_UNPACK(var, type) \
273 switch (TYPE_ENDIANNESS(datumtype)) { \
274 case TYPE_IS_BIG_ENDIAN: var = my_betoh ## type (var); break; \
275 case TYPE_IS_LITTLE_ENDIAN: var = my_letoh ## type (var); break; \
280 # define DO_BO_PACK(var, type) \
282 switch (TYPE_ENDIANNESS(datumtype)) { \
283 case TYPE_IS_BIG_ENDIAN: var = my_htobe ## type (var); break; \
284 case TYPE_IS_LITTLE_ENDIAN: var = my_htole ## type (var); break; \
289 # define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast) \
291 switch (TYPE_ENDIANNESS(datumtype)) { \
292 case TYPE_IS_BIG_ENDIAN: \
293 var = (post_cast*) my_betoh ## type ((pre_cast) var); \
295 case TYPE_IS_LITTLE_ENDIAN: \
296 var = (post_cast *) my_letoh ## type ((pre_cast) var); \
303 # define DO_BO_PACK_PTR(var, type, pre_cast, post_cast) \
305 switch (TYPE_ENDIANNESS(datumtype)) { \
306 case TYPE_IS_BIG_ENDIAN: \
307 var = (post_cast *) my_htobe ## type ((pre_cast) var); \
309 case TYPE_IS_LITTLE_ENDIAN: \
310 var = (post_cast *) my_htole ## type ((pre_cast) var); \
317 # define BO_CANT_DOIT(action, type) \
319 switch (TYPE_ENDIANNESS(datumtype)) { \
320 case TYPE_IS_BIG_ENDIAN: \
321 Perl_croak(aTHX_ "Can't %s big-endian %ss on this " \
322 "platform", #action, #type); \
324 case TYPE_IS_LITTLE_ENDIAN: \
325 Perl_croak(aTHX_ "Can't %s little-endian %ss on this " \
326 "platform", #action, #type); \
333 # if PTRSIZE == INTSIZE
334 # define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, i, int, void)
335 # define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, i, int, void)
336 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, i, int, char)
337 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, i, int, char)
338 # elif PTRSIZE == LONGSIZE
339 # if LONGSIZE < IVSIZE && IVSIZE == 8
340 # define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, 64, IV, void)
341 # define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, 64, IV, void)
342 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, 64, IV, char)
343 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, 64, IV, char)
345 # define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, IV, void)
346 # define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, IV, void)
347 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, IV, char)
348 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, IV, char)
350 # elif PTRSIZE == IVSIZE
351 # define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, IV, void)
352 # define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, IV, void)
353 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, IV, char)
354 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, IV, char)
356 # define DO_BO_UNPACK_P(var) BO_CANT_DOIT(unpack, pointer)
357 # define DO_BO_PACK_P(var) BO_CANT_DOIT(pack, pointer)
358 # define DO_BO_UNPACK_PC(var) BO_CANT_DOIT(unpack, pointer)
359 # define DO_BO_PACK_PC(var) BO_CANT_DOIT(pack, pointer)
362 # if defined(my_htolen) && defined(my_letohn) && \
363 defined(my_htoben) && defined(my_betohn)
364 # define DO_BO_UNPACK_N(var, type) \
366 switch (TYPE_ENDIANNESS(datumtype)) { \
367 case TYPE_IS_BIG_ENDIAN: my_betohn(&var, sizeof(type)); break;\
368 case TYPE_IS_LITTLE_ENDIAN: my_letohn(&var, sizeof(type)); break;\
373 # define DO_BO_PACK_N(var, type) \
375 switch (TYPE_ENDIANNESS(datumtype)) { \
376 case TYPE_IS_BIG_ENDIAN: my_htoben(&var, sizeof(type)); break;\
377 case TYPE_IS_LITTLE_ENDIAN: my_htolen(&var, sizeof(type)); break;\
382 # define DO_BO_UNPACK_N(var, type) BO_CANT_DOIT(unpack, type)
383 # define DO_BO_PACK_N(var, type) BO_CANT_DOIT(pack, type)
386 #endif /* PERL_PACK_CAN_BYTEORDER */
388 #define PACK_SIZE_CANNOT_CSUM 0x80
389 #define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
390 #define PACK_SIZE_MASK 0x3F
392 /* These tables are regenerated by genpacksizetables.pl (and then hand pasted
393 in). You're unlikely ever to need to regenerate them. */
395 #if TYPE_IS_SHRIEKING != 0x100
396 ++++shriek offset should be 256
399 typedef U8 packprops_t;
402 STATIC const packprops_t packprops[512] = {
404 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
405 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
406 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
407 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
409 /* C */ sizeof(unsigned char),
410 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
411 /* D */ LONG_DOUBLESIZE,
418 /* I */ sizeof(unsigned int),
425 #if defined(HAS_QUAD)
426 /* Q */ sizeof(Uquad_t),
433 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
435 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
436 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
437 /* c */ sizeof(char),
438 /* d */ sizeof(double),
440 /* f */ sizeof(float),
449 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
450 #if defined(HAS_QUAD)
451 /* q */ sizeof(Quad_t),
459 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
460 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
461 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
462 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
463 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
464 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
465 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
466 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
467 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
468 0, 0, 0, 0, 0, 0, 0, 0,
470 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
471 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
472 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
473 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
474 0, 0, 0, 0, 0, 0, 0, 0, 0,
475 /* I */ sizeof(unsigned int),
477 /* L */ sizeof(unsigned long),
479 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
485 /* S */ sizeof(unsigned short),
487 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
492 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
496 /* l */ sizeof(long),
498 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
504 /* s */ sizeof(short),
506 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
511 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
512 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
513 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
514 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
515 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
516 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
517 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
518 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
519 0, 0, 0, 0, 0, 0, 0, 0, 0
522 /* EBCDIC (or bust) */
523 STATIC const packprops_t packprops[512] = {
525 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
526 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
527 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
528 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
529 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
530 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
531 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
532 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
534 /* c */ sizeof(char),
535 /* d */ sizeof(double),
537 /* f */ sizeof(float),
547 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
548 #if defined(HAS_QUAD)
549 /* q */ sizeof(Quad_t),
553 0, 0, 0, 0, 0, 0, 0, 0, 0,
557 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
558 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
559 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
560 /* C */ sizeof(unsigned char),
561 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
562 /* D */ LONG_DOUBLESIZE,
569 /* I */ sizeof(unsigned int),
577 #if defined(HAS_QUAD)
578 /* Q */ sizeof(Uquad_t),
582 0, 0, 0, 0, 0, 0, 0, 0, 0,
585 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
587 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
588 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
589 0, 0, 0, 0, 0, 0, 0, 0, 0,
591 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
592 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
593 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
594 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
595 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
596 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
597 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
598 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
599 0, 0, 0, 0, 0, 0, 0, 0, 0,
601 0, 0, 0, 0, 0, 0, 0, 0, 0,
602 /* l */ sizeof(long),
604 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
609 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
610 /* s */ sizeof(short),
612 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
617 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
618 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
620 /* I */ sizeof(unsigned int),
621 0, 0, 0, 0, 0, 0, 0, 0, 0,
622 /* L */ sizeof(unsigned long),
624 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
629 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
630 /* S */ sizeof(unsigned short),
632 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
637 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
638 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
643 uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
646 UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
647 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
648 /* We try to process malformed UTF-8 as much as possible (preferably with
649 warnings), but these two mean we make no progress in the string and
650 might enter an infinite loop */
651 if (retlen == (STRLEN) -1 || retlen == 0)
652 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
653 (int) TYPE_NO_MODIFIERS(datumtype));
655 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
656 "Character in '%c' format wrapped in unpack",
657 (int) TYPE_NO_MODIFIERS(datumtype));
664 #define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
665 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
669 uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
673 const char *from = *s;
675 const U32 flags = ckWARN(WARN_UTF8) ?
676 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
677 for (;buf_len > 0; buf_len--) {
678 if (from >= end) return FALSE;
679 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
680 if (retlen == (STRLEN) -1 || retlen == 0) {
681 from += UTF8SKIP(from);
683 } else from += retlen;
688 *(U8 *)buf++ = (U8)val;
690 /* We have enough characters for the buffer. Did we have problems ? */
693 /* Rewalk the string fragment while warning */
695 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
696 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
697 if (ptr >= end) break;
698 utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
700 if (from > end) from = end;
703 Perl_ck_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
704 WARN_PACK : WARN_UNPACK),
705 "Character(s) in '%c' format wrapped in %s",
706 (int) TYPE_NO_MODIFIERS(datumtype),
707 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
714 next_uni_uu(pTHX_ const char **s, const char *end, I32 *out)
718 const UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
719 if (val >= 0x100 || !ISUUCHAR(val) ||
720 retlen == (STRLEN) -1 || retlen == 0) {
724 *out = PL_uudmap[val] & 077;
730 S_bytes_to_uni(const U8 *start, STRLEN len, char *dest) {
731 const U8 * const end = start + len;
733 PERL_ARGS_ASSERT_BYTES_TO_UNI;
735 while (start < end) {
736 const UV uv = NATIVE_TO_ASCII(*start);
737 if (UNI_IS_INVARIANT(uv))
738 *dest++ = (char)(U8)UTF_TO_NATIVE(uv);
740 *dest++ = (char)(U8)UTF8_EIGHT_BIT_HI(uv);
741 *dest++ = (char)(U8)UTF8_EIGHT_BIT_LO(uv);
748 #define PUSH_BYTES(utf8, cur, buf, len) \
751 (cur) = bytes_to_uni((U8 *) buf, len, (cur)); \
753 Copy(buf, cur, len, char); \
758 #define GROWING(utf8, cat, start, cur, in_len) \
760 STRLEN glen = (in_len); \
761 if (utf8) glen *= UTF8_EXPAND; \
762 if ((cur) + glen >= (start) + SvLEN(cat)) { \
763 (start) = sv_exp_grow(cat, glen); \
764 (cur) = (start) + SvCUR(cat); \
768 #define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
770 const STRLEN glen = (in_len); \
772 if (utf8) gl *= UTF8_EXPAND; \
773 if ((cur) + gl >= (start) + SvLEN(cat)) { \
775 SvCUR_set((cat), (cur) - (start)); \
776 (start) = sv_exp_grow(cat, gl); \
777 (cur) = (start) + SvCUR(cat); \
779 PUSH_BYTES(utf8, cur, buf, glen); \
782 #define PUSH_BYTE(utf8, s, byte) \
785 const U8 au8 = (byte); \
786 (s) = bytes_to_uni(&au8, 1, (s)); \
787 } else *(U8 *)(s)++ = (byte); \
790 /* Only to be used inside a loop (see the break) */
791 #define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
794 if (str >= end) break; \
795 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
796 if (retlen == (STRLEN) -1 || retlen == 0) { \
798 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
803 static const char *_action( const tempsym_t* symptr )
805 return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
808 /* Returns the sizeof() struct described by pat */
810 S_measure_struct(pTHX_ tempsym_t* symptr)
814 PERL_ARGS_ASSERT_MEASURE_STRUCT;
816 while (next_symbol(symptr)) {
820 switch (symptr->howlen) {
822 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
826 /* e_no_len and e_number */
827 len = symptr->length;
831 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
834 /* endianness doesn't influence the size of a type */
835 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
837 Perl_croak(aTHX_ "Invalid type '%c' in %s",
838 (int)TYPE_NO_MODIFIERS(symptr->code),
840 #ifdef PERL_PACK_CAN_SHRIEKSIGN
841 case '.' | TYPE_IS_SHRIEKING:
842 case '@' | TYPE_IS_SHRIEKING:
847 case 'U': /* XXXX Is it correct? */
850 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
851 (int) TYPE_NO_MODIFIERS(symptr->code),
858 tempsym_t savsym = *symptr;
859 symptr->patptr = savsym.grpbeg;
860 symptr->patend = savsym.grpend;
861 /* XXXX Theoretically, we need to measure many times at
862 different positions, since the subexpression may contain
863 alignment commands, but be not of aligned length.
864 Need to detect this and croak(). */
865 size = measure_struct(symptr);
869 case 'X' | TYPE_IS_SHRIEKING:
870 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
872 if (!len) /* Avoid division by 0 */
874 len = total % len; /* Assumed: the start is aligned. */
879 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
881 case 'x' | TYPE_IS_SHRIEKING:
882 if (!len) /* Avoid division by 0 */
884 star = total % len; /* Assumed: the start is aligned. */
885 if (star) /* Other portable ways? */
909 size = sizeof(char*);
919 /* locate matching closing parenthesis or bracket
920 * returns char pointer to char after match, or NULL
923 S_group_end(pTHX_ register const char *patptr, register const char *patend, char ender)
925 PERL_ARGS_ASSERT_GROUP_END;
927 while (patptr < patend) {
928 const char c = *patptr++;
935 while (patptr < patend && *patptr != '\n')
939 patptr = group_end(patptr, patend, ')') + 1;
941 patptr = group_end(patptr, patend, ']') + 1;
943 Perl_croak(aTHX_ "No group ending character '%c' found in template",
949 /* Convert unsigned decimal number to binary.
950 * Expects a pointer to the first digit and address of length variable
951 * Advances char pointer to 1st non-digit char and returns number
954 S_get_num(pTHX_ register const char *patptr, I32 *lenptr )
956 I32 len = *patptr++ - '0';
958 PERL_ARGS_ASSERT_GET_NUM;
960 while (isDIGIT(*patptr)) {
961 if (len >= 0x7FFFFFFF/10)
962 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
963 len = (len * 10) + (*patptr++ - '0');
969 /* The marvellous template parsing routine: Using state stored in *symptr,
970 * locates next template code and count
973 S_next_symbol(pTHX_ tempsym_t* symptr )
975 const char* patptr = symptr->patptr;
976 const char* const patend = symptr->patend;
978 PERL_ARGS_ASSERT_NEXT_SYMBOL;
980 symptr->flags &= ~FLAG_SLASH;
982 while (patptr < patend) {
983 if (isSPACE(*patptr))
985 else if (*patptr == '#') {
987 while (patptr < patend && *patptr != '\n')
992 /* We should have found a template code */
993 I32 code = *patptr++ & 0xFF;
994 U32 inherited_modifiers = 0;
996 if (code == ','){ /* grandfather in commas but with a warning */
997 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
998 symptr->flags |= FLAG_COMMA;
999 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
1000 "Invalid type ',' in %s", _action( symptr ) );
1005 /* for '(', skip to ')' */
1007 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
1008 Perl_croak(aTHX_ "()-group starts with a count in %s",
1009 _action( symptr ) );
1010 symptr->grpbeg = patptr;
1011 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
1012 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
1013 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
1014 _action( symptr ) );
1017 /* look for group modifiers to inherit */
1018 if (TYPE_ENDIANNESS(symptr->flags)) {
1019 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
1020 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
1023 /* look for modifiers */
1024 while (patptr < patend) {
1025 const char *allowed;
1029 modifier = TYPE_IS_SHRIEKING;
1030 allowed = SHRIEKING_ALLOWED_TYPES;
1032 #ifdef PERL_PACK_CAN_BYTEORDER
1034 modifier = TYPE_IS_BIG_ENDIAN;
1035 allowed = ENDIANNESS_ALLOWED_TYPES;
1038 modifier = TYPE_IS_LITTLE_ENDIAN;
1039 allowed = ENDIANNESS_ALLOWED_TYPES;
1041 #endif /* PERL_PACK_CAN_BYTEORDER */
1051 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
1052 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
1053 allowed, _action( symptr ) );
1055 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
1056 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
1057 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
1058 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
1059 TYPE_ENDIANNESS_MASK)
1060 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
1061 *patptr, _action( symptr ) );
1063 if ((code & modifier)) {
1064 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
1065 "Duplicate modifier '%c' after '%c' in %s",
1066 *patptr, (int) TYPE_NO_MODIFIERS(code),
1067 _action( symptr ) );
1074 /* inherit modifiers */
1075 code |= inherited_modifiers;
1077 /* look for count and/or / */
1078 if (patptr < patend) {
1079 if (isDIGIT(*patptr)) {
1080 patptr = get_num( patptr, &symptr->length );
1081 symptr->howlen = e_number;
1083 } else if (*patptr == '*') {
1085 symptr->howlen = e_star;
1087 } else if (*patptr == '[') {
1088 const char* lenptr = ++patptr;
1089 symptr->howlen = e_number;
1090 patptr = group_end( patptr, patend, ']' ) + 1;
1091 /* what kind of [] is it? */
1092 if (isDIGIT(*lenptr)) {
1093 lenptr = get_num( lenptr, &symptr->length );
1094 if( *lenptr != ']' )
1095 Perl_croak(aTHX_ "Malformed integer in [] in %s",
1096 _action( symptr ) );
1098 tempsym_t savsym = *symptr;
1099 symptr->patend = patptr-1;
1100 symptr->patptr = lenptr;
1101 savsym.length = measure_struct(symptr);
1105 symptr->howlen = e_no_len;
1110 while (patptr < patend) {
1111 if (isSPACE(*patptr))
1113 else if (*patptr == '#') {
1115 while (patptr < patend && *patptr != '\n')
1117 if (patptr < patend)
1120 if (*patptr == '/') {
1121 symptr->flags |= FLAG_SLASH;
1123 if (patptr < patend &&
1124 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
1125 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
1126 _action( symptr ) );
1132 /* at end - no count, no / */
1133 symptr->howlen = e_no_len;
1137 symptr->code = code;
1138 symptr->patptr = patptr;
1142 symptr->patptr = patptr;
1147 There is no way to cleanly handle the case where we should process the
1148 string per byte in its upgraded form while it's really in downgraded form
1149 (e.g. estimates like strend-s as an upper bound for the number of
1150 characters left wouldn't work). So if we foresee the need of this
1151 (pattern starts with U or contains U0), we want to work on the encoded
1152 version of the string. Users are advised to upgrade their pack string
1153 themselves if they need to do a lot of unpacks like this on it
1156 need_utf8(const char *pat, const char *patend)
1160 PERL_ARGS_ASSERT_NEED_UTF8;
1162 while (pat < patend) {
1163 if (pat[0] == '#') {
1165 pat = (const char *) memchr(pat, '\n', patend-pat);
1166 if (!pat) return FALSE;
1167 } else if (pat[0] == 'U') {
1168 if (first || pat[1] == '0') return TRUE;
1169 } else first = FALSE;
1176 first_symbol(const char *pat, const char *patend) {
1177 PERL_ARGS_ASSERT_FIRST_SYMBOL;
1179 while (pat < patend) {
1180 if (pat[0] != '#') return pat[0];
1182 pat = (const char *) memchr(pat, '\n', patend-pat);
1190 =for apidoc unpackstring
1192 The engine implementing unpack() Perl function. C<unpackstring> puts the
1193 extracted list items on the stack and returns the number of elements.
1194 Issue C<PUTBACK> before and C<SPAGAIN> after the call to this function.
1199 Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
1203 PERL_ARGS_ASSERT_UNPACKSTRING;
1205 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
1206 else if (need_utf8(pat, patend)) {
1207 /* We probably should try to avoid this in case a scalar context call
1208 wouldn't get to the "U0" */
1209 STRLEN len = strend - s;
1210 s = (char *) bytes_to_utf8((U8 *) s, &len);
1213 flags |= FLAG_DO_UTF8;
1216 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
1217 flags |= FLAG_PARSE_UTF8;
1219 TEMPSYM_INIT(&sym, pat, patend, flags);
1221 return unpack_rec(&sym, s, s, strend, NULL );
1225 S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
1229 const I32 start_sp_offset = SP - PL_stack_base;
1234 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
1235 bool beyond = FALSE;
1236 bool explicit_length;
1237 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
1238 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
1240 PERL_ARGS_ASSERT_UNPACK_REC;
1242 symptr->strbeg = s - strbeg;
1244 while (next_symbol(symptr)) {
1247 I32 datumtype = symptr->code;
1248 /* do first one only unless in list context
1249 / is implemented by unpacking the count, then popping it from the
1250 stack, so must check that we're not in the middle of a / */
1251 if ( unpack_only_one
1252 && (SP - PL_stack_base == start_sp_offset + 1)
1253 && (datumtype != '/') ) /* XXX can this be omitted */
1256 switch (howlen = symptr->howlen) {
1258 len = strend - strbeg; /* long enough */
1261 /* e_no_len and e_number */
1262 len = symptr->length;
1266 explicit_length = TRUE;
1268 beyond = s >= strend;
1270 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
1272 /* props nonzero means we can process this letter. */
1273 const long size = props & PACK_SIZE_MASK;
1274 const long howmany = (strend - s) / size;
1278 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
1279 if (len && unpack_only_one) len = 1;
1285 switch(TYPE_NO_ENDIANNESS(datumtype)) {
1287 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
1290 if (howlen == e_no_len)
1291 len = 16; /* len is not specified */
1299 tempsym_t savsym = *symptr;
1300 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
1301 symptr->flags |= group_modifiers;
1302 symptr->patend = savsym.grpend;
1303 symptr->previous = &savsym;
1306 if (len && unpack_only_one) len = 1;
1308 symptr->patptr = savsym.grpbeg;
1309 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
1310 else symptr->flags &= ~FLAG_PARSE_UTF8;
1311 unpack_rec(symptr, s, strbeg, strend, &s);
1312 if (s == strend && savsym.howlen == e_star)
1313 break; /* No way to continue */
1316 savsym.flags = symptr->flags & ~group_modifiers;
1320 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1321 case '.' | TYPE_IS_SHRIEKING:
1326 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1327 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
1328 #else /* PERL_PACK_CAN_SHRIEKSIGN */
1329 const bool u8 = utf8;
1331 if (howlen == e_star) from = strbeg;
1332 else if (len <= 0) from = s;
1334 tempsym_t *group = symptr;
1336 while (--len && group) group = group->previous;
1337 from = group ? strbeg + group->strbeg : strbeg;
1340 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
1341 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
1345 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1346 case '@' | TYPE_IS_SHRIEKING:
1349 s = strbeg + symptr->strbeg;
1350 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1351 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
1352 #else /* PERL_PACK_CAN_SHRIEKSIGN */
1358 Perl_croak(aTHX_ "'@' outside of string in unpack");
1363 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
1366 Perl_croak(aTHX_ "'@' outside of string in unpack");
1370 case 'X' | TYPE_IS_SHRIEKING:
1371 if (!len) /* Avoid division by 0 */
1374 const char *hop, *last;
1376 hop = last = strbeg;
1378 hop += UTF8SKIP(hop);
1385 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1389 len = (s - strbeg) % len;
1395 Perl_croak(aTHX_ "'X' outside of string in unpack");
1396 while (--s, UTF8_IS_CONTINUATION(*s)) {
1398 Perl_croak(aTHX_ "'X' outside of string in unpack");
1403 if (len > s - strbeg)
1404 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1408 case 'x' | TYPE_IS_SHRIEKING: {
1410 if (!len) /* Avoid division by 0 */
1412 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1413 else ai32 = (s - strbeg) % len;
1414 if (ai32 == 0) break;
1422 Perl_croak(aTHX_ "'x' outside of string in unpack");
1427 if (len > strend - s)
1428 Perl_croak(aTHX_ "'x' outside of string in unpack");
1433 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1439 /* Preliminary length estimate is assumed done in 'W' */
1440 if (len > strend - s) len = strend - s;
1446 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1447 if (hop >= strend) {
1449 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1454 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1456 } else if (len > strend - s)
1459 if (datumtype == 'Z') {
1460 /* 'Z' strips stuff after first null */
1461 const char *ptr, *end;
1463 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
1464 sv = newSVpvn(s, ptr-s);
1465 if (howlen == e_star) /* exact for 'Z*' */
1466 len = ptr-s + (ptr != strend ? 1 : 0);
1467 } else if (datumtype == 'A') {
1468 /* 'A' strips both nulls and spaces */
1470 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1471 for (ptr = s+len-1; ptr >= s; ptr--)
1472 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
1473 !is_utf8_space((U8 *) ptr)) break;
1474 if (ptr >= s) ptr += UTF8SKIP(ptr);
1477 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1479 for (ptr = s+len-1; ptr >= s; ptr--)
1480 if (*ptr != 0 && !isSPACE(*ptr)) break;
1483 sv = newSVpvn(s, ptr-s);
1484 } else sv = newSVpvn(s, len);
1488 /* Undo any upgrade done due to need_utf8() */
1489 if (!(symptr->flags & FLAG_WAS_UTF8))
1490 sv_utf8_downgrade(sv, 0);
1498 if (howlen == e_star || len > (strend - s) * 8)
1499 len = (strend - s) * 8;
1502 while (len >= 8 && s < strend) {
1503 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
1508 cuv += PL_bitcount[*(U8 *)s++];
1511 if (len && s < strend) {
1513 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1514 if (datumtype == 'b')
1516 if (bits & 1) cuv++;
1521 if (bits & 0x80) cuv++;
1528 sv = sv_2mortal(newSV(len ? len : 1));
1531 if (datumtype == 'b') {
1533 const I32 ai32 = len;
1534 for (len = 0; len < ai32; len++) {
1535 if (len & 7) bits >>= 1;
1537 if (s >= strend) break;
1538 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1539 } else bits = *(U8 *) s++;
1540 *str++ = bits & 1 ? '1' : '0';
1544 const I32 ai32 = len;
1545 for (len = 0; len < ai32; len++) {
1546 if (len & 7) bits <<= 1;
1548 if (s >= strend) break;
1549 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1550 } else bits = *(U8 *) s++;
1551 *str++ = bits & 0x80 ? '1' : '0';
1555 SvCUR_set(sv, str - SvPVX_const(sv));
1562 /* Preliminary length estimate, acceptable for utf8 too */
1563 if (howlen == e_star || len > (strend - s) * 2)
1564 len = (strend - s) * 2;
1566 sv = sv_2mortal(newSV(len ? len : 1));
1570 if (datumtype == 'h') {
1573 for (len = 0; len < ai32; len++) {
1574 if (len & 1) bits >>= 4;
1576 if (s >= strend) break;
1577 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1578 } else bits = * (U8 *) s++;
1580 *str++ = PL_hexdigit[bits & 15];
1584 const I32 ai32 = len;
1585 for (len = 0; len < ai32; len++) {
1586 if (len & 1) bits <<= 4;
1588 if (s >= strend) break;
1589 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1590 } else bits = *(U8 *) s++;
1592 *str++ = PL_hexdigit[(bits >> 4) & 15];
1597 SvCUR_set(sv, str - SvPVX_const(sv));
1604 if (explicit_length)
1605 /* Switch to "character" mode */
1606 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1611 while (len-- > 0 && s < strend) {
1616 aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1617 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1618 if (retlen == (STRLEN) -1 || retlen == 0)
1619 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1623 aint = *(U8 *)(s)++;
1624 if (aint >= 128 && datumtype != 'C') /* fake up signed chars */
1628 else if (checksum > bits_in_uv)
1629 cdouble += (NV)aint;
1637 while (len-- > 0 && s < strend) {
1639 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1640 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1641 if (retlen == (STRLEN) -1 || retlen == 0)
1642 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1646 else if (checksum > bits_in_uv)
1647 cdouble += (NV) val;
1651 } else if (!checksum)
1653 const U8 ch = *(U8 *) s++;
1656 else if (checksum > bits_in_uv)
1657 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1659 while (len-- > 0) cuv += *(U8 *) s++;
1663 if (explicit_length && howlen != e_star) {
1664 /* Switch to "bytes in UTF-8" mode */
1665 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
1667 /* Should be impossible due to the need_utf8() test */
1668 Perl_croak(aTHX_ "U0 mode on a byte string");
1672 if (len > strend - s) len = strend - s;
1674 if (len && unpack_only_one) len = 1;
1678 while (len-- > 0 && s < strend) {
1682 U8 result[UTF8_MAXLEN];
1683 const char *ptr = s;
1685 /* Bug: warns about bad utf8 even if we are short on bytes
1686 and will break out of the loop */
1687 if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1690 len = UTF8SKIP(result);
1691 if (!uni_to_bytes(aTHX_ &ptr, strend,
1692 (char *) &result[1], len-1, 'U')) break;
1693 auv = utf8n_to_uvuni(result, len, &retlen, UTF8_ALLOW_DEFAULT);
1696 auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, UTF8_ALLOW_DEFAULT);
1697 if (retlen == (STRLEN) -1 || retlen == 0)
1698 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1703 else if (checksum > bits_in_uv)
1704 cdouble += (NV) auv;
1709 case 's' | TYPE_IS_SHRIEKING:
1710 #if SHORTSIZE != SIZE16
1713 SHIFT_VAR(utf8, s, strend, ashort, datumtype);
1714 DO_BO_UNPACK(ashort, s);
1717 else if (checksum > bits_in_uv)
1718 cdouble += (NV)ashort;
1730 #if U16SIZE > SIZE16
1733 SHIFT16(utf8, s, strend, &ai16, datumtype);
1734 DO_BO_UNPACK(ai16, 16);
1735 #if U16SIZE > SIZE16
1741 else if (checksum > bits_in_uv)
1742 cdouble += (NV)ai16;
1747 case 'S' | TYPE_IS_SHRIEKING:
1748 #if SHORTSIZE != SIZE16
1750 unsigned short aushort;
1751 SHIFT_VAR(utf8, s, strend, aushort, datumtype);
1752 DO_BO_UNPACK(aushort, s);
1755 else if (checksum > bits_in_uv)
1756 cdouble += (NV)aushort;
1769 #if U16SIZE > SIZE16
1772 SHIFT16(utf8, s, strend, &au16, datumtype);
1773 DO_BO_UNPACK(au16, 16);
1775 if (datumtype == 'n')
1776 au16 = PerlSock_ntohs(au16);
1779 if (datumtype == 'v')
1784 else if (checksum > bits_in_uv)
1785 cdouble += (NV) au16;
1790 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1791 case 'v' | TYPE_IS_SHRIEKING:
1792 case 'n' | TYPE_IS_SHRIEKING:
1795 # if U16SIZE > SIZE16
1798 SHIFT16(utf8, s, strend, &ai16, datumtype);
1800 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
1801 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1802 # endif /* HAS_NTOHS */
1804 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
1805 ai16 = (I16) vtohs((U16) ai16);
1806 # endif /* HAS_VTOHS */
1809 else if (checksum > bits_in_uv)
1810 cdouble += (NV) ai16;
1815 #endif /* PERL_PACK_CAN_SHRIEKSIGN */
1817 case 'i' | TYPE_IS_SHRIEKING:
1820 SHIFT_VAR(utf8, s, strend, aint, datumtype);
1821 DO_BO_UNPACK(aint, i);
1824 else if (checksum > bits_in_uv)
1825 cdouble += (NV)aint;
1831 case 'I' | TYPE_IS_SHRIEKING:
1834 SHIFT_VAR(utf8, s, strend, auint, datumtype);
1835 DO_BO_UNPACK(auint, i);
1838 else if (checksum > bits_in_uv)
1839 cdouble += (NV)auint;
1847 SHIFT_VAR(utf8, s, strend, aiv, datumtype);
1848 #if IVSIZE == INTSIZE
1849 DO_BO_UNPACK(aiv, i);
1850 #elif IVSIZE == LONGSIZE
1851 DO_BO_UNPACK(aiv, l);
1852 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
1853 DO_BO_UNPACK(aiv, 64);
1855 Perl_croak(aTHX_ "'j' not supported on this platform");
1859 else if (checksum > bits_in_uv)
1868 SHIFT_VAR(utf8, s, strend, auv, datumtype);
1869 #if IVSIZE == INTSIZE
1870 DO_BO_UNPACK(auv, i);
1871 #elif IVSIZE == LONGSIZE
1872 DO_BO_UNPACK(auv, l);
1873 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
1874 DO_BO_UNPACK(auv, 64);
1876 Perl_croak(aTHX_ "'J' not supported on this platform");
1880 else if (checksum > bits_in_uv)
1886 case 'l' | TYPE_IS_SHRIEKING:
1887 #if LONGSIZE != SIZE32
1890 SHIFT_VAR(utf8, s, strend, along, datumtype);
1891 DO_BO_UNPACK(along, l);
1894 else if (checksum > bits_in_uv)
1895 cdouble += (NV)along;
1906 #if U32SIZE > SIZE32
1909 SHIFT32(utf8, s, strend, &ai32, datumtype);
1910 DO_BO_UNPACK(ai32, 32);
1911 #if U32SIZE > SIZE32
1912 if (ai32 > 2147483647) ai32 -= 4294967296;
1916 else if (checksum > bits_in_uv)
1917 cdouble += (NV)ai32;
1922 case 'L' | TYPE_IS_SHRIEKING:
1923 #if LONGSIZE != SIZE32
1925 unsigned long aulong;
1926 SHIFT_VAR(utf8, s, strend, aulong, datumtype);
1927 DO_BO_UNPACK(aulong, l);
1930 else if (checksum > bits_in_uv)
1931 cdouble += (NV)aulong;
1944 #if U32SIZE > SIZE32
1947 SHIFT32(utf8, s, strend, &au32, datumtype);
1948 DO_BO_UNPACK(au32, 32);
1950 if (datumtype == 'N')
1951 au32 = PerlSock_ntohl(au32);
1954 if (datumtype == 'V')
1959 else if (checksum > bits_in_uv)
1960 cdouble += (NV)au32;
1965 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1966 case 'V' | TYPE_IS_SHRIEKING:
1967 case 'N' | TYPE_IS_SHRIEKING:
1970 # if U32SIZE > SIZE32
1973 SHIFT32(utf8, s, strend, &ai32, datumtype);
1975 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1976 ai32 = (I32)PerlSock_ntohl((U32)ai32);
1979 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1980 ai32 = (I32)vtohl((U32)ai32);
1984 else if (checksum > bits_in_uv)
1985 cdouble += (NV)ai32;
1990 #endif /* PERL_PACK_CAN_SHRIEKSIGN */
1994 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
1995 DO_BO_UNPACK_PC(aptr);
1996 /* newSVpv generates undef if aptr is NULL */
1997 mPUSHs(newSVpv(aptr, 0));
2005 while (len > 0 && s < strend) {
2007 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
2008 auv = (auv << 7) | (ch & 0x7f);
2009 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
2017 if (++bytes >= sizeof(UV)) { /* promote to string */
2020 sv = Perl_newSVpvf(aTHX_ "%.*"UVuf, (int)TYPE_DIGITS(UV), auv);
2021 while (s < strend) {
2022 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
2023 sv = mul128(sv, (U8)(ch & 0x7f));
2029 t = SvPV_nolen_const(sv);
2038 if ((s >= strend) && bytes)
2039 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
2043 if (symptr->howlen == e_star)
2044 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
2046 if (s + sizeof(char*) <= strend) {
2048 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
2049 DO_BO_UNPACK_PC(aptr);
2050 /* newSVpvn generates undef if aptr is NULL */
2051 PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP));
2058 SHIFT_VAR(utf8, s, strend, aquad, datumtype);
2059 DO_BO_UNPACK(aquad, 64);
2061 mPUSHs(aquad >= IV_MIN && aquad <= IV_MAX ?
2062 newSViv((IV)aquad) : newSVnv((NV)aquad));
2063 else if (checksum > bits_in_uv)
2064 cdouble += (NV)aquad;
2072 SHIFT_VAR(utf8, s, strend, auquad, datumtype);
2073 DO_BO_UNPACK(auquad, 64);
2075 mPUSHs(auquad <= UV_MAX ?
2076 newSVuv((UV)auquad) : newSVnv((NV)auquad));
2077 else if (checksum > bits_in_uv)
2078 cdouble += (NV)auquad;
2083 #endif /* HAS_QUAD */
2084 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
2088 SHIFT_VAR(utf8, s, strend, afloat, datumtype);
2089 DO_BO_UNPACK_N(afloat, float);
2099 SHIFT_VAR(utf8, s, strend, adouble, datumtype);
2100 DO_BO_UNPACK_N(adouble, double);
2110 SHIFT_BYTES(utf8, s, strend, anv.bytes, sizeof(anv.bytes), datumtype);
2111 DO_BO_UNPACK_N(anv.nv, NV);
2118 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2122 SHIFT_BYTES(utf8, s, strend, aldouble.bytes, sizeof(aldouble.bytes), datumtype);
2123 DO_BO_UNPACK_N(aldouble.ld, long double);
2125 mPUSHn(aldouble.ld);
2127 cdouble += aldouble.ld;
2133 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
2134 sv = sv_2mortal(newSV(l));
2135 if (l) SvPOK_on(sv);
2138 while (next_uni_uu(aTHX_ &s, strend, &len)) {
2143 next_uni_uu(aTHX_ &s, strend, &a);
2144 next_uni_uu(aTHX_ &s, strend, &b);
2145 next_uni_uu(aTHX_ &s, strend, &c);
2146 next_uni_uu(aTHX_ &s, strend, &d);
2147 hunk[0] = (char)((a << 2) | (b >> 4));
2148 hunk[1] = (char)((b << 4) | (c >> 2));
2149 hunk[2] = (char)((c << 6) | d);
2151 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2159 /* possible checksum byte */
2160 const char *skip = s+UTF8SKIP(s);
2161 if (skip < strend && *skip == '\n')
2167 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
2171 len = PL_uudmap[*(U8*)s++] & 077;
2173 if (s < strend && ISUUCHAR(*s))
2174 a = PL_uudmap[*(U8*)s++] & 077;
2177 if (s < strend && ISUUCHAR(*s))
2178 b = PL_uudmap[*(U8*)s++] & 077;
2181 if (s < strend && ISUUCHAR(*s))
2182 c = PL_uudmap[*(U8*)s++] & 077;
2185 if (s < strend && ISUUCHAR(*s))
2186 d = PL_uudmap[*(U8*)s++] & 077;
2189 hunk[0] = (char)((a << 2) | (b >> 4));
2190 hunk[1] = (char)((b << 4) | (c >> 2));
2191 hunk[2] = (char)((c << 6) | d);
2193 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2198 else /* possible checksum byte */
2199 if (s + 1 < strend && s[1] == '\n')
2209 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
2210 (checksum > bits_in_uv &&
2211 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
2214 anv = (NV) (1 << (checksum & 15));
2215 while (checksum >= 16) {
2219 while (cdouble < 0.0)
2221 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
2222 sv = newSVnv(cdouble);
2225 if (checksum < bits_in_uv) {
2226 UV mask = ((UV)1 << checksum) - 1;
2235 if (symptr->flags & FLAG_SLASH){
2236 if (SP - PL_stack_base - start_sp_offset <= 0)
2238 if( next_symbol(symptr) ){
2239 if( symptr->howlen == e_number )
2240 Perl_croak(aTHX_ "Count after length/code in unpack" );
2242 /* ...end of char buffer then no decent length available */
2243 Perl_croak(aTHX_ "length/code after end of string in unpack" );
2245 /* take top of stack (hope it's numeric) */
2248 Perl_croak(aTHX_ "Negative '/' count in unpack" );
2251 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
2253 datumtype = symptr->code;
2254 explicit_length = FALSE;
2262 return SP - PL_stack_base - start_sp_offset;
2270 I32 gimme = GIMME_V;
2273 const char *pat = SvPV_const(left, llen);
2274 const char *s = SvPV_const(right, rlen);
2275 const char *strend = s + rlen;
2276 const char *patend = pat + llen;
2280 cnt = unpackstring(pat, patend, s, strend,
2281 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
2282 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
2285 if ( !cnt && gimme == G_SCALAR )
2286 PUSHs(&PL_sv_undef);
2291 doencodes(U8 *h, const char *s, I32 len)
2293 *h++ = PL_uuemap[len];
2295 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2296 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
2297 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
2298 *h++ = PL_uuemap[(077 & (s[2] & 077))];
2303 const char r = (len > 1 ? s[1] : '\0');
2304 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2305 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
2306 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
2307 *h++ = PL_uuemap[0];
2314 S_is_an_int(pTHX_ const char *s, STRLEN l)
2316 SV *result = newSVpvn(s, l);
2317 char *const result_c = SvPV_nolen(result); /* convenience */
2318 char *out = result_c;
2322 PERL_ARGS_ASSERT_IS_AN_INT;
2330 SvREFCNT_dec(result);
2353 SvREFCNT_dec(result);
2359 SvCUR_set(result, out - result_c);
2363 /* pnum must be '\0' terminated */
2365 S_div128(pTHX_ SV *pnum, bool *done)
2368 char * const s = SvPV(pnum, len);
2372 PERL_ARGS_ASSERT_DIV128;
2376 const int i = m * 10 + (*t - '0');
2377 const int r = (i >> 7); /* r < 10 */
2385 SvCUR_set(pnum, (STRLEN) (t - s));
2390 =for apidoc packlist
2392 The engine implementing pack() Perl function.
2398 Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, register SV **beglist, SV **endlist )
2403 PERL_ARGS_ASSERT_PACKLIST;
2405 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
2407 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
2408 Also make sure any UTF8 flag is loaded */
2409 SvPV_force_nolen(cat);
2411 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
2413 (void)pack_rec( cat, &sym, beglist, endlist );
2416 /* like sv_utf8_upgrade, but also repoint the group start markers */
2418 marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2421 const char *from_ptr, *from_start, *from_end, **marks, **m;
2422 char *to_start, *to_ptr;
2424 if (SvUTF8(sv)) return;
2426 from_start = SvPVX_const(sv);
2427 from_end = from_start + SvCUR(sv);
2428 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2429 if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
2430 if (from_ptr == from_end) {
2431 /* Simple case: no character needs to be changed */
2436 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
2437 Newx(to_start, len, char);
2438 Copy(from_start, to_start, from_ptr-from_start, char);
2439 to_ptr = to_start + (from_ptr-from_start);
2441 Newx(marks, sym_ptr->level+2, const char *);
2442 for (group=sym_ptr; group; group = group->previous)
2443 marks[group->level] = from_start + group->strbeg;
2444 marks[sym_ptr->level+1] = from_end+1;
2445 for (m = marks; *m < from_ptr; m++)
2446 *m = to_start + (*m-from_start);
2448 for (;from_ptr < from_end; from_ptr++) {
2449 while (*m == from_ptr) *m++ = to_ptr;
2450 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
2454 while (*m == from_ptr) *m++ = to_ptr;
2455 if (m != marks + sym_ptr->level+1) {
2458 Perl_croak(aTHX_ "panic: marks beyond string end, m=%p, marks=%p, "
2459 "level=%d", m, marks, sym_ptr->level);
2461 for (group=sym_ptr; group; group = group->previous)
2462 group->strbeg = marks[group->level] - to_start;
2467 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
2468 from_start -= SvIVX(sv);
2471 SvFLAGS(sv) &= ~SVf_OOK;
2474 Safefree(from_start);
2475 SvPV_set(sv, to_start);
2476 SvCUR_set(sv, to_ptr - to_start);
2481 /* Exponential string grower. Makes string extension effectively O(n)
2482 needed says how many extra bytes we need (not counting the final '\0')
2483 Only grows the string if there is an actual lack of space
2486 S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
2487 const STRLEN cur = SvCUR(sv);
2488 const STRLEN len = SvLEN(sv);
2491 PERL_ARGS_ASSERT_SV_EXP_GROW;
2493 if (len - cur > needed) return SvPVX(sv);
2494 extend = needed > len ? needed : len;
2495 return SvGROW(sv, len+extend+1);
2500 S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
2503 tempsym_t lookahead;
2504 I32 items = endlist - beglist;
2505 bool found = next_symbol(symptr);
2506 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
2507 bool warn_utf8 = ckWARN(WARN_UTF8);
2509 PERL_ARGS_ASSERT_PACK_REC;
2511 if (symptr->level == 0 && found && symptr->code == 'U') {
2512 marked_upgrade(aTHX_ cat, symptr);
2513 symptr->flags |= FLAG_DO_UTF8;
2516 symptr->strbeg = SvCUR(cat);
2522 SV *lengthcode = NULL;
2523 I32 datumtype = symptr->code;
2524 howlen_t howlen = symptr->howlen;
2525 char *start = SvPVX(cat);
2526 char *cur = start + SvCUR(cat);
2528 #define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2532 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2536 /* e_no_len and e_number */
2537 len = symptr->length;
2542 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
2544 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2545 /* We can process this letter. */
2546 STRLEN size = props & PACK_SIZE_MASK;
2547 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2551 /* Look ahead for next symbol. Do we have code/code? */
2552 lookahead = *symptr;
2553 found = next_symbol(&lookahead);
2554 if (symptr->flags & FLAG_SLASH) {
2556 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
2557 if (strchr("aAZ", lookahead.code)) {
2558 if (lookahead.howlen == e_number) count = lookahead.length;
2561 count = sv_len_utf8(*beglist);
2564 if (lookahead.code == 'Z') count++;
2567 if (lookahead.howlen == e_number && lookahead.length < items)
2568 count = lookahead.length;
2571 lookahead.howlen = e_number;
2572 lookahead.length = count;
2573 lengthcode = sv_2mortal(newSViv(count));
2576 /* Code inside the switch must take care to properly update
2577 cat (CUR length and '\0' termination) if it updated *cur and
2578 doesn't simply leave using break */
2579 switch(TYPE_NO_ENDIANNESS(datumtype)) {
2581 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2582 (int) TYPE_NO_MODIFIERS(datumtype));
2584 Perl_croak(aTHX_ "'%%' may not be used in pack");
2587 #ifdef PERL_PACK_CAN_SHRIEKSIGN
2588 case '.' | TYPE_IS_SHRIEKING:
2591 if (howlen == e_star) from = start;
2592 else if (len == 0) from = cur;
2594 tempsym_t *group = symptr;
2596 while (--len && group) group = group->previous;
2597 from = group ? start + group->strbeg : start;
2600 len = SvIV(fromstr);
2602 #ifdef PERL_PACK_CAN_SHRIEKSIGN
2603 case '@' | TYPE_IS_SHRIEKING:
2606 from = start + symptr->strbeg;
2608 #ifdef PERL_PACK_CAN_SHRIEKSIGN
2609 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
2610 #else /* PERL_PACK_CAN_SHRIEKSIGN */
2614 while (len && from < cur) {
2615 from += UTF8SKIP(from);
2619 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2621 /* Here we know from == cur */
2623 GROWING(0, cat, start, cur, len);
2624 Zero(cur, len, char);
2626 } else if (from < cur) {
2629 } else goto no_change;
2637 if (len > 0) goto grow;
2638 if (len == 0) goto no_change;
2645 tempsym_t savsym = *symptr;
2646 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2647 symptr->flags |= group_modifiers;
2648 symptr->patend = savsym.grpend;
2650 symptr->previous = &lookahead;
2653 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2654 else symptr->flags &= ~FLAG_PARSE_UTF8;
2655 was_utf8 = SvUTF8(cat);
2656 symptr->patptr = savsym.grpbeg;
2657 beglist = pack_rec(cat, symptr, beglist, endlist);
2658 if (SvUTF8(cat) != was_utf8)
2659 /* This had better be an upgrade while in utf8==0 mode */
2662 if (savsym.howlen == e_star && beglist == endlist)
2663 break; /* No way to continue */
2665 items = endlist - beglist;
2666 lookahead.flags = symptr->flags & ~group_modifiers;
2669 case 'X' | TYPE_IS_SHRIEKING:
2670 if (!len) /* Avoid division by 0 */
2677 hop += UTF8SKIP(hop);
2684 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2688 len = (cur-start) % len;
2692 if (len < 1) goto no_change;
2696 Perl_croak(aTHX_ "'%c' outside of string in pack",
2697 (int) TYPE_NO_MODIFIERS(datumtype));
2698 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2700 Perl_croak(aTHX_ "'%c' outside of string in pack",
2701 (int) TYPE_NO_MODIFIERS(datumtype));
2707 if (cur - start < len)
2708 Perl_croak(aTHX_ "'%c' outside of string in pack",
2709 (int) TYPE_NO_MODIFIERS(datumtype));
2712 if (cur < start+symptr->strbeg) {
2713 /* Make sure group starts don't point into the void */
2715 const STRLEN length = cur-start;
2716 for (group = symptr;
2717 group && length < group->strbeg;
2718 group = group->previous) group->strbeg = length;
2719 lookahead.strbeg = length;
2722 case 'x' | TYPE_IS_SHRIEKING: {
2724 if (!len) /* Avoid division by 0 */
2726 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
2727 else ai32 = (cur - start) % len;
2728 if (ai32 == 0) goto no_change;
2740 aptr = SvPV_const(fromstr, fromlen);
2741 if (DO_UTF8(fromstr)) {
2742 const char *end, *s;
2744 if (!utf8 && !SvUTF8(cat)) {
2745 marked_upgrade(aTHX_ cat, symptr);
2746 lookahead.flags |= FLAG_DO_UTF8;
2747 lookahead.strbeg = symptr->strbeg;
2750 cur = start + SvCUR(cat);
2752 if (howlen == e_star) {
2753 if (utf8) goto string_copy;
2757 end = aptr + fromlen;
2758 fromlen = datumtype == 'Z' ? len-1 : len;
2759 while ((I32) fromlen > 0 && s < end) {
2764 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2767 if (datumtype == 'Z') len++;
2773 fromlen = len - fromlen;
2774 if (datumtype == 'Z') fromlen--;
2775 if (howlen == e_star) {
2777 if (datumtype == 'Z') len++;
2779 GROWING(0, cat, start, cur, len);
2780 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
2781 datumtype | TYPE_IS_PACK))
2782 Perl_croak(aTHX_ "panic: predicted utf8 length not available, "
2783 "for '%c', aptr=%p end=%p cur=%p, fromlen=%"UVuf,
2784 (int)datumtype, aptr, end, cur, (UV)fromlen);
2788 if (howlen == e_star) {
2790 if (datumtype == 'Z') len++;
2792 if (len <= (I32) fromlen) {
2794 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2796 /* assumes a byte expands to at most UTF8_EXPAND bytes on
2798 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2799 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
2801 while (fromlen > 0) {
2802 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
2808 if (howlen == e_star) {
2810 if (datumtype == 'Z') len++;
2812 if (len <= (I32) fromlen) {
2814 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2816 GROWING(0, cat, start, cur, len);
2817 Copy(aptr, cur, fromlen, char);
2821 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2828 const char *str, *end;
2835 str = SvPV_const(fromstr, fromlen);
2836 end = str + fromlen;
2837 if (DO_UTF8(fromstr)) {
2839 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2841 utf8_source = FALSE;
2842 utf8_flags = 0; /* Unused, but keep compilers happy */
2844 if (howlen == e_star) len = fromlen;
2845 field_len = (len+7)/8;
2846 GROWING(utf8, cat, start, cur, field_len);
2847 if (len > (I32)fromlen) len = fromlen;
2850 if (datumtype == 'B')
2854 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2856 } else bits |= *str++ & 1;
2857 if (l & 7) bits <<= 1;
2859 PUSH_BYTE(utf8, cur, bits);
2864 /* datumtype == 'b' */
2868 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2869 if (val & 1) bits |= 0x80;
2870 } else if (*str++ & 1)
2872 if (l & 7) bits >>= 1;
2874 PUSH_BYTE(utf8, cur, bits);
2880 if (datumtype == 'B')
2881 bits <<= 7 - (l & 7);
2883 bits >>= 7 - (l & 7);
2884 PUSH_BYTE(utf8, cur, bits);
2887 /* Determine how many chars are left in the requested field */
2889 if (howlen == e_star) field_len = 0;
2890 else field_len -= l;
2891 Zero(cur, field_len, char);
2897 const char *str, *end;
2904 str = SvPV_const(fromstr, fromlen);
2905 end = str + fromlen;
2906 if (DO_UTF8(fromstr)) {
2908 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2910 utf8_source = FALSE;
2911 utf8_flags = 0; /* Unused, but keep compilers happy */
2913 if (howlen == e_star) len = fromlen;
2914 field_len = (len+1)/2;
2915 GROWING(utf8, cat, start, cur, field_len);
2916 if (!utf8 && len > (I32)fromlen) len = fromlen;
2919 if (datumtype == 'H')
2923 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2924 if (val < 256 && isALPHA(val))
2925 bits |= (val + 9) & 0xf;
2928 } else if (isALPHA(*str))
2929 bits |= (*str++ + 9) & 0xf;
2931 bits |= *str++ & 0xf;
2932 if (l & 1) bits <<= 4;
2934 PUSH_BYTE(utf8, cur, bits);
2942 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2943 if (val < 256 && isALPHA(val))
2944 bits |= ((val + 9) & 0xf) << 4;
2946 bits |= (val & 0xf) << 4;
2947 } else if (isALPHA(*str))
2948 bits |= ((*str++ + 9) & 0xf) << 4;
2950 bits |= (*str++ & 0xf) << 4;
2951 if (l & 1) bits >>= 4;
2953 PUSH_BYTE(utf8, cur, bits);
2959 PUSH_BYTE(utf8, cur, bits);
2962 /* Determine how many chars are left in the requested field */
2964 if (howlen == e_star) field_len = 0;
2965 else field_len -= l;
2966 Zero(cur, field_len, char);
2974 aiv = SvIV(fromstr);
2975 if ((-128 > aiv || aiv > 127))
2976 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2977 "Character in 'c' format wrapped in pack");
2978 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
2983 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2989 aiv = SvIV(fromstr);
2990 if ((0 > aiv || aiv > 0xff))
2991 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2992 "Character in 'C' format wrapped in pack");
2993 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
2998 U8 in_bytes = (U8)IN_BYTES;
3000 end = start+SvLEN(cat)-1;
3001 if (utf8) end -= UTF8_MAXLEN-1;
3005 auv = SvUV(fromstr);
3006 if (in_bytes) auv = auv % 0x100;
3011 SvCUR_set(cat, cur - start);
3013 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3014 end = start+SvLEN(cat)-UTF8_MAXLEN;
3016 cur = (char *) uvuni_to_utf8_flags((U8 *) cur,
3019 0 : UNICODE_ALLOW_ANY);
3024 SvCUR_set(cat, cur - start);
3025 marked_upgrade(aTHX_ cat, symptr);
3026 lookahead.flags |= FLAG_DO_UTF8;
3027 lookahead.strbeg = symptr->strbeg;
3030 cur = start + SvCUR(cat);
3031 end = start+SvLEN(cat)-UTF8_MAXLEN;
3034 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3035 "Character in 'W' format wrapped in pack");
3040 SvCUR_set(cat, cur - start);
3041 GROWING(0, cat, start, cur, len+1);
3042 end = start+SvLEN(cat)-1;
3044 *(U8 *) cur++ = (U8)auv;
3053 if (!(symptr->flags & FLAG_DO_UTF8)) {
3054 marked_upgrade(aTHX_ cat, symptr);
3055 lookahead.flags |= FLAG_DO_UTF8;
3056 lookahead.strbeg = symptr->strbeg;
3062 end = start+SvLEN(cat);
3063 if (!utf8) end -= UTF8_MAXLEN;
3067 auv = SvUV(fromstr);
3069 U8 buffer[UTF8_MAXLEN], *endb;
3070 endb = uvuni_to_utf8_flags(buffer, auv,
3072 0 : UNICODE_ALLOW_ANY);
3073 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
3075 SvCUR_set(cat, cur - start);
3076 GROWING(0, cat, start, cur,
3077 len+(endb-buffer)*UTF8_EXPAND);
3078 end = start+SvLEN(cat);
3080 cur = bytes_to_uni(buffer, endb-buffer, cur);
3084 SvCUR_set(cat, cur - start);
3085 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3086 end = start+SvLEN(cat)-UTF8_MAXLEN;
3088 cur = (char *) uvuni_to_utf8_flags((U8 *) cur, auv,
3090 0 : UNICODE_ALLOW_ANY);
3095 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
3101 anv = SvNV(fromstr);
3103 /* VOS does not automatically map a floating-point overflow
3104 during conversion from double to float into infinity, so we
3105 do it by hand. This code should either be generalized for
3106 any OS that needs it, or removed if and when VOS implements
3107 posix-976 (suggestion to support mapping to infinity).
3108 Paul.Green@stratus.com 02-04-02. */
3110 extern const float _float_constants[];
3112 afloat = _float_constants[0]; /* single prec. inf. */
3113 else if (anv < -FLT_MAX)
3114 afloat = _float_constants[0]; /* single prec. inf. */
3115 else afloat = (float) anv;
3118 # if defined(VMS) && !defined(__IEEE_FP)
3119 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
3120 * on Alpha; fake it if we don't have them.
3124 else if (anv < -FLT_MAX)
3126 else afloat = (float)anv;
3128 afloat = (float)anv;
3130 #endif /* __VOS__ */
3131 DO_BO_PACK_N(afloat, float);
3132 PUSH_VAR(utf8, cur, afloat);
3140 anv = SvNV(fromstr);
3142 /* VOS does not automatically map a floating-point overflow
3143 during conversion from long double to double into infinity,
3144 so we do it by hand. This code should either be generalized
3145 for any OS that needs it, or removed if and when VOS
3146 implements posix-976 (suggestion to support mapping to
3147 infinity). Paul.Green@stratus.com 02-04-02. */
3149 extern const double _double_constants[];
3151 adouble = _double_constants[0]; /* double prec. inf. */
3152 else if (anv < -DBL_MAX)
3153 adouble = _double_constants[0]; /* double prec. inf. */
3154 else adouble = (double) anv;
3157 # if defined(VMS) && !defined(__IEEE_FP)
3158 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
3159 * on Alpha; fake it if we don't have them.
3163 else if (anv < -DBL_MAX)
3165 else adouble = (double)anv;
3167 adouble = (double)anv;
3169 #endif /* __VOS__ */
3170 DO_BO_PACK_N(adouble, double);
3171 PUSH_VAR(utf8, cur, adouble);
3176 Zero(&anv, 1, NV); /* can be long double with unused bits */
3180 /* to work round a gcc/x86 bug; don't use SvNV */
3181 anv.nv = sv_2nv(fromstr);
3183 anv.nv = SvNV(fromstr);
3185 DO_BO_PACK_N(anv, NV);
3186 PUSH_BYTES(utf8, cur, anv.bytes, sizeof(anv.bytes));
3190 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
3193 /* long doubles can have unused bits, which may be nonzero */
3194 Zero(&aldouble, 1, long double);
3198 /* to work round a gcc/x86 bug; don't use SvNV */
3199 aldouble.ld = (long double)sv_2nv(fromstr);
3201 aldouble.ld = (long double)SvNV(fromstr);
3203 DO_BO_PACK_N(aldouble, long double);
3204 PUSH_BYTES(utf8, cur, aldouble.bytes, sizeof(aldouble.bytes));
3209 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3210 case 'n' | TYPE_IS_SHRIEKING:
3216 ai16 = (I16)SvIV(fromstr);
3218 ai16 = PerlSock_htons(ai16);
3220 PUSH16(utf8, cur, &ai16);
3223 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3224 case 'v' | TYPE_IS_SHRIEKING:
3230 ai16 = (I16)SvIV(fromstr);
3234 PUSH16(utf8, cur, &ai16);
3237 case 'S' | TYPE_IS_SHRIEKING:
3238 #if SHORTSIZE != SIZE16
3240 unsigned short aushort;
3242 aushort = SvUV(fromstr);
3243 DO_BO_PACK(aushort, s);
3244 PUSH_VAR(utf8, cur, aushort);
3254 au16 = (U16)SvUV(fromstr);
3255 DO_BO_PACK(au16, 16);
3256 PUSH16(utf8, cur, &au16);
3259 case 's' | TYPE_IS_SHRIEKING:
3260 #if SHORTSIZE != SIZE16
3264 ashort = SvIV(fromstr);
3265 DO_BO_PACK(ashort, s);
3266 PUSH_VAR(utf8, cur, ashort);
3276 ai16 = (I16)SvIV(fromstr);
3277 DO_BO_PACK(ai16, 16);
3278 PUSH16(utf8, cur, &ai16);
3282 case 'I' | TYPE_IS_SHRIEKING:
3286 auint = SvUV(fromstr);
3287 DO_BO_PACK(auint, i);
3288 PUSH_VAR(utf8, cur, auint);
3295 aiv = SvIV(fromstr);
3296 #if IVSIZE == INTSIZE
3298 #elif IVSIZE == LONGSIZE
3300 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
3301 DO_BO_PACK(aiv, 64);
3303 Perl_croak(aTHX_ "'j' not supported on this platform");
3305 PUSH_VAR(utf8, cur, aiv);
3312 auv = SvUV(fromstr);
3313 #if UVSIZE == INTSIZE
3315 #elif UVSIZE == LONGSIZE
3317 #elif defined(HAS_QUAD) && UVSIZE == U64SIZE
3318 DO_BO_PACK(auv, 64);
3320 Perl_croak(aTHX_ "'J' not supported on this platform");
3322 PUSH_VAR(utf8, cur, auv);
3329 anv = SvNV(fromstr);
3333 SvCUR_set(cat, cur - start);
3334 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
3337 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
3338 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
3339 any negative IVs will have already been got by the croak()
3340 above. IOK is untrue for fractions, so we test them
3341 against UV_MAX_P1. */
3342 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
3343 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
3344 char *in = buf + sizeof(buf);
3345 UV auv = SvUV(fromstr);
3348 *--in = (char)((auv & 0x7f) | 0x80);
3351 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
3352 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3353 in, (buf + sizeof(buf)) - in);
3354 } else if (SvPOKp(fromstr))
3356 else if (SvNOKp(fromstr)) {
3357 /* 10**NV_MAX_10_EXP is the largest power of 10
3358 so 10**(NV_MAX_10_EXP+1) is definitely unrepresentable
3359 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
3360 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
3361 And with that many bytes only Inf can overflow.
3362 Some C compilers are strict about integral constant
3363 expressions so we conservatively divide by a slightly
3364 smaller integer instead of multiplying by the exact
3365 floating-point value.
3367 #ifdef NV_MAX_10_EXP
3368 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
3369 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
3371 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
3372 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
3374 char *in = buf + sizeof(buf);
3376 anv = Perl_floor(anv);
3378 const NV next = Perl_floor(anv / 128);
3379 if (in <= buf) /* this cannot happen ;-) */
3380 Perl_croak(aTHX_ "Cannot compress integer in pack");
3381 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
3384 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
3385 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3386 in, (buf + sizeof(buf)) - in);
3395 /* Copy string and check for compliance */
3396 from = SvPV_const(fromstr, len);
3397 if ((norm = is_an_int(from, len)) == NULL)
3398 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
3400 Newx(result, len, char);
3403 while (!done) *--in = div128(norm, &done) | 0x80;
3404 result[len - 1] &= 0x7F; /* clear continue bit */
3405 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3406 in, (result + len) - in);
3408 SvREFCNT_dec(norm); /* free norm */
3413 case 'i' | TYPE_IS_SHRIEKING:
3417 aint = SvIV(fromstr);
3418 DO_BO_PACK(aint, i);
3419 PUSH_VAR(utf8, cur, aint);
3422 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3423 case 'N' | TYPE_IS_SHRIEKING:
3429 au32 = SvUV(fromstr);
3431 au32 = PerlSock_htonl(au32);
3433 PUSH32(utf8, cur, &au32);
3436 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3437 case 'V' | TYPE_IS_SHRIEKING:
3443 au32 = SvUV(fromstr);
3447 PUSH32(utf8, cur, &au32);
3450 case 'L' | TYPE_IS_SHRIEKING:
3451 #if LONGSIZE != SIZE32
3453 unsigned long aulong;
3455 aulong = SvUV(fromstr);
3456 DO_BO_PACK(aulong, l);
3457 PUSH_VAR(utf8, cur, aulong);
3467 au32 = SvUV(fromstr);
3468 DO_BO_PACK(au32, 32);
3469 PUSH32(utf8, cur, &au32);
3472 case 'l' | TYPE_IS_SHRIEKING:
3473 #if LONGSIZE != SIZE32
3477 along = SvIV(fromstr);
3478 DO_BO_PACK(along, l);
3479 PUSH_VAR(utf8, cur, along);
3489 ai32 = SvIV(fromstr);
3490 DO_BO_PACK(ai32, 32);
3491 PUSH32(utf8, cur, &ai32);
3499 auquad = (Uquad_t) SvUV(fromstr);
3500 DO_BO_PACK(auquad, 64);
3501 PUSH_VAR(utf8, cur, auquad);
3508 aquad = (Quad_t)SvIV(fromstr);
3509 DO_BO_PACK(aquad, 64);
3510 PUSH_VAR(utf8, cur, aquad);
3513 #endif /* HAS_QUAD */
3515 len = 1; /* assume SV is correct length */
3516 GROWING(utf8, cat, start, cur, sizeof(char *));
3523 SvGETMAGIC(fromstr);
3524 if (!SvOK(fromstr)) aptr = NULL;
3526 /* XXX better yet, could spirit away the string to
3527 * a safe spot and hang on to it until the result
3528 * of pack() (and all copies of the result) are
3531 if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
3532 !SvREADONLY(fromstr)))) {
3533 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3534 "Attempt to pack pointer to temporary value");
3536 if (SvPOK(fromstr) || SvNIOK(fromstr))
3537 aptr = SvPV_nomg_const_nolen(fromstr);
3539 aptr = SvPV_force_flags_nolen(fromstr, 0);
3541 DO_BO_PACK_PC(aptr);
3542 PUSH_VAR(utf8, cur, aptr);
3546 const char *aptr, *aend;
3550 if (len <= 2) len = 45;
3551 else len = len / 3 * 3;
3553 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3554 "Field too wide in 'u' format in pack");
3557 aptr = SvPV_const(fromstr, fromlen);
3558 from_utf8 = DO_UTF8(fromstr);
3560 aend = aptr + fromlen;
3561 fromlen = sv_len_utf8_nomg(fromstr);
3562 } else aend = NULL; /* Unused, but keep compilers happy */
3563 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
3564 while (fromlen > 0) {
3567 U8 hunk[1+63/3*4+1];
3569 if ((I32)fromlen > len)
3575 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3576 'u' | TYPE_IS_PACK)) {
3578 SvCUR_set(cat, cur - start);
3579 Perl_croak(aTHX_ "panic: string is shorter than advertised, "
3580 "aptr=%p, aend=%p, buffer=%p, todo=%ld",
3581 aptr, aend, buffer, (long) todo);
3583 end = doencodes(hunk, buffer, todo);
3585 end = doencodes(hunk, aptr, todo);
3588 PUSH_BYTES(utf8, cur, hunk, end-hunk);
3595 SvCUR_set(cat, cur - start);
3597 *symptr = lookahead;
3606 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
3609 SV *pat_sv = *++MARK;
3610 const char *pat = SvPV_const(pat_sv, fromlen);
3611 const char *patend = pat + fromlen;
3617 packlist(cat, pat, patend, MARK, SP + 1);
3627 * c-indentation-style: bsd
3629 * indent-tabs-mode: nil
3632 * ex: set ts=8 sts=4 sw=4 et: