3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
18 * [p.653 of _The Lord of the Rings_, IV/iv: "Of Herbs and Stewed Rabbit"]
21 /* This file contains pp ("push/pop") functions that
22 * execute the opcodes that make up a perl program. A typical pp function
23 * expects to find its arguments on the stack, and usually pushes its
24 * results onto the stack, hence the 'pp' terminology. Each OP structure
25 * contains a pointer to the relevant pp_foo() function.
27 * This particular file just contains pp_pack() and pp_unpack(). See the
28 * other pp*.c files for the rest of the pp_ functions.
32 #define PERL_IN_PP_PACK_C
35 /* Types used by pack/unpack */
37 e_no_len, /* no length */
38 e_number, /* number, [] */
42 typedef struct tempsym {
43 const char* patptr; /* current template char */
44 const char* patend; /* one after last char */
45 const char* grpbeg; /* 1st char of ()-group */
46 const char* grpend; /* end of ()-group */
47 I32 code; /* template code (!<>) */
48 I32 length; /* length/repeat count */
49 howlen_t howlen; /* how length is given */
50 int level; /* () nesting level */
51 U32 flags; /* /=4, comma=2, pack=1 */
52 /* and group modifiers */
53 STRLEN strbeg; /* offset of group start */
54 struct tempsym *previous; /* previous group */
57 #define TEMPSYM_INIT(symptr, p, e, f) \
59 (symptr)->patptr = (p); \
60 (symptr)->patend = (e); \
61 (symptr)->grpbeg = NULL; \
62 (symptr)->grpend = NULL; \
63 (symptr)->grpend = NULL; \
65 (symptr)->length = 0; \
66 (symptr)->howlen = e_no_len; \
67 (symptr)->level = 0; \
68 (symptr)->flags = (f); \
69 (symptr)->strbeg = 0; \
70 (symptr)->previous = NULL; \
78 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
81 U8 bytes[sizeof(long double)];
86 # define PERL_PACK_CAN_BYTEORDER
87 # define PERL_PACK_CAN_SHRIEKSIGN
93 /* Maximum number of bytes to which a byte can grow due to upgrade */
97 * Offset for integer pack/unpack.
99 * On architectures where I16 and I32 aren't really 16 and 32 bits,
100 * which for now are all Crays, pack and unpack have to play games.
104 * These values are required for portability of pack() output.
105 * If they're not right on your machine, then pack() and unpack()
106 * wouldn't work right anyway; you'll need to apply the Cray hack.
107 * (I'd like to check them with #if, but you can't use sizeof() in
108 * the preprocessor.) --???
111 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
112 defines are now in config.h. --Andy Dougherty April 1998
117 /* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
120 #if U16SIZE > SIZE16 || U32SIZE > SIZE32
121 # if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
122 # define OFF16(p) ((char*)(p))
123 # define OFF32(p) ((char*)(p))
125 # if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
126 # define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
127 # define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
129 ++++ bad cray byte order
133 # define OFF16(p) ((char *) (p))
134 # define OFF32(p) ((char *) (p))
137 /* Only to be used inside a loop (see the break) */
138 #define SHIFT16(utf8, s, strend, p, datumtype) STMT_START { \
140 if (!uni_to_bytes(aTHX_ &(s), strend, OFF16(p), SIZE16, datumtype)) break; \
142 Copy(s, OFF16(p), SIZE16, char); \
147 /* Only to be used inside a loop (see the break) */
148 #define SHIFT32(utf8, s, strend, p, datumtype) STMT_START { \
150 if (!uni_to_bytes(aTHX_ &(s), strend, OFF32(p), SIZE32, datumtype)) break; \
152 Copy(s, OFF32(p), SIZE32, char); \
157 #define PUSH16(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF16(p), SIZE16)
158 #define PUSH32(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF32(p), SIZE32)
160 /* Only to be used inside a loop (see the break) */
161 #define SHIFT_BYTES(utf8, s, strend, buf, len, datumtype) \
164 if (!uni_to_bytes(aTHX_ &s, strend, \
165 (char *) (buf), len, datumtype)) break; \
167 Copy(s, (char *) (buf), len, char); \
172 #define SHIFT_VAR(utf8, s, strend, var, datumtype) \
173 SHIFT_BYTES(utf8, s, strend, &(var), sizeof(var), datumtype)
175 #define PUSH_VAR(utf8, aptr, var) \
176 PUSH_BYTES(utf8, aptr, &(var), sizeof(var))
178 /* Avoid stack overflow due to pathological templates. 100 should be plenty. */
179 #define MAX_SUB_TEMPLATE_LEVEL 100
181 /* flags (note that type modifiers can also be used as flags!) */
182 #define FLAG_WAS_UTF8 0x40
183 #define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
184 #define FLAG_UNPACK_ONLY_ONE 0x10
185 #define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
186 #define FLAG_SLASH 0x04
187 #define FLAG_COMMA 0x02
188 #define FLAG_PACK 0x01
191 S_mul128(pTHX_ SV *sv, U8 m)
194 char *s = SvPV(sv, len);
197 PERL_ARGS_ASSERT_MUL128;
199 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
200 SV * const tmpNew = newSVpvs("0000000000");
202 sv_catsv(tmpNew, sv);
203 SvREFCNT_dec(sv); /* free old sv */
208 while (!*t) /* trailing '\0'? */
211 const U32 i = ((*t - '0') << 7) + m;
212 *(t--) = '0' + (char)(i % 10);
218 /* Explosives and implosives. */
220 #if 'I' == 73 && 'J' == 74
221 /* On an ASCII/ISO kind of system */
222 #define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
225 Some other sort of character set - use memchr() so we don't match
228 #define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
232 #define TYPE_IS_SHRIEKING 0x100
233 #define TYPE_IS_BIG_ENDIAN 0x200
234 #define TYPE_IS_LITTLE_ENDIAN 0x400
235 #define TYPE_IS_PACK 0x800
236 #define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
237 #define TYPE_MODIFIERS(t) ((t) & ~0xFF)
238 #define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
240 #ifdef PERL_PACK_CAN_SHRIEKSIGN
241 # define SHRIEKING_ALLOWED_TYPES "sSiIlLxXnNvV@."
243 # define SHRIEKING_ALLOWED_TYPES "sSiIlLxX"
246 #ifndef PERL_PACK_CAN_BYTEORDER
247 /* Put "can't" first because it is shorter */
248 # define TYPE_ENDIANNESS(t) 0
249 # define TYPE_NO_ENDIANNESS(t) (t)
251 # define ENDIANNESS_ALLOWED_TYPES ""
253 # define DO_BO_UNPACK(var, type)
254 # define DO_BO_PACK(var, type)
255 # define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast)
256 # define DO_BO_PACK_PTR(var, type, pre_cast, post_cast)
257 # define DO_BO_UNPACK_N(var, type)
258 # define DO_BO_PACK_N(var, type)
259 # define DO_BO_UNPACK_P(var)
260 # define DO_BO_PACK_P(var)
261 # define DO_BO_UNPACK_PC(var)
262 # define DO_BO_PACK_PC(var)
264 #else /* PERL_PACK_CAN_BYTEORDER */
266 # define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
267 # define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
269 # define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
271 # define DO_BO_UNPACK(var, type) \
273 switch (TYPE_ENDIANNESS(datumtype)) { \
274 case TYPE_IS_BIG_ENDIAN: var = my_betoh ## type (var); break; \
275 case TYPE_IS_LITTLE_ENDIAN: var = my_letoh ## type (var); break; \
280 # define DO_BO_PACK(var, type) \
282 switch (TYPE_ENDIANNESS(datumtype)) { \
283 case TYPE_IS_BIG_ENDIAN: var = my_htobe ## type (var); break; \
284 case TYPE_IS_LITTLE_ENDIAN: var = my_htole ## type (var); break; \
289 # define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast) \
291 switch (TYPE_ENDIANNESS(datumtype)) { \
292 case TYPE_IS_BIG_ENDIAN: \
293 var = (post_cast*) my_betoh ## type ((pre_cast) var); \
295 case TYPE_IS_LITTLE_ENDIAN: \
296 var = (post_cast *) my_letoh ## type ((pre_cast) var); \
303 # define DO_BO_PACK_PTR(var, type, pre_cast, post_cast) \
305 switch (TYPE_ENDIANNESS(datumtype)) { \
306 case TYPE_IS_BIG_ENDIAN: \
307 var = (post_cast *) my_htobe ## type ((pre_cast) var); \
309 case TYPE_IS_LITTLE_ENDIAN: \
310 var = (post_cast *) my_htole ## type ((pre_cast) var); \
317 # define BO_CANT_DOIT(action, type) \
319 switch (TYPE_ENDIANNESS(datumtype)) { \
320 case TYPE_IS_BIG_ENDIAN: \
321 Perl_croak(aTHX_ "Can't %s big-endian %ss on this " \
322 "platform", #action, #type); \
324 case TYPE_IS_LITTLE_ENDIAN: \
325 Perl_croak(aTHX_ "Can't %s little-endian %ss on this " \
326 "platform", #action, #type); \
333 # if PTRSIZE == INTSIZE
334 # define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, i, int, void)
335 # define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, i, int, void)
336 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, i, int, char)
337 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, i, int, char)
338 # elif PTRSIZE == LONGSIZE
339 # if LONGSIZE < IVSIZE && IVSIZE == 8
340 # define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, 64, IV, void)
341 # define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, 64, IV, void)
342 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, 64, IV, char)
343 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, 64, IV, char)
345 # define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, IV, void)
346 # define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, IV, void)
347 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, IV, char)
348 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, IV, char)
350 # elif PTRSIZE == IVSIZE
351 # define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, IV, void)
352 # define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, IV, void)
353 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, IV, char)
354 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, IV, char)
356 # define DO_BO_UNPACK_P(var) BO_CANT_DOIT(unpack, pointer)
357 # define DO_BO_PACK_P(var) BO_CANT_DOIT(pack, pointer)
358 # define DO_BO_UNPACK_PC(var) BO_CANT_DOIT(unpack, pointer)
359 # define DO_BO_PACK_PC(var) BO_CANT_DOIT(pack, pointer)
362 # if defined(my_htolen) && defined(my_letohn) && \
363 defined(my_htoben) && defined(my_betohn)
364 # define DO_BO_UNPACK_N(var, type) \
366 switch (TYPE_ENDIANNESS(datumtype)) { \
367 case TYPE_IS_BIG_ENDIAN: my_betohn(&var, sizeof(type)); break;\
368 case TYPE_IS_LITTLE_ENDIAN: my_letohn(&var, sizeof(type)); break;\
373 # define DO_BO_PACK_N(var, type) \
375 switch (TYPE_ENDIANNESS(datumtype)) { \
376 case TYPE_IS_BIG_ENDIAN: my_htoben(&var, sizeof(type)); break;\
377 case TYPE_IS_LITTLE_ENDIAN: my_htolen(&var, sizeof(type)); break;\
382 # define DO_BO_UNPACK_N(var, type) BO_CANT_DOIT(unpack, type)
383 # define DO_BO_PACK_N(var, type) BO_CANT_DOIT(pack, type)
386 #endif /* PERL_PACK_CAN_BYTEORDER */
388 #define PACK_SIZE_CANNOT_CSUM 0x80
389 #define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
390 #define PACK_SIZE_MASK 0x3F
392 /* These tables are regenerated by genpacksizetables.pl (and then hand pasted
393 in). You're unlikely ever to need to regenerate them. */
395 #if TYPE_IS_SHRIEKING != 0x100
396 ++++shriek offset should be 256
399 typedef U8 packprops_t;
402 STATIC const packprops_t packprops[512] = {
404 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
405 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
406 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
407 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
409 /* C */ sizeof(unsigned char),
410 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
411 /* D */ LONG_DOUBLESIZE,
418 /* I */ sizeof(unsigned int),
425 #if defined(HAS_QUAD)
426 /* Q */ sizeof(Uquad_t),
433 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
435 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
436 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
437 /* c */ sizeof(char),
438 /* d */ sizeof(double),
440 /* f */ sizeof(float),
449 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
450 #if defined(HAS_QUAD)
451 /* q */ sizeof(Quad_t),
459 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
460 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
461 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
462 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
463 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
464 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
465 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
466 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
467 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
468 0, 0, 0, 0, 0, 0, 0, 0,
470 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
471 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
472 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
473 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
474 0, 0, 0, 0, 0, 0, 0, 0, 0,
475 /* I */ sizeof(unsigned int),
477 /* L */ sizeof(unsigned long),
479 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
485 /* S */ sizeof(unsigned short),
487 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
492 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
496 /* l */ sizeof(long),
498 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
504 /* s */ sizeof(short),
506 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
511 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
512 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
513 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
514 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
515 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
516 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
517 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
518 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
519 0, 0, 0, 0, 0, 0, 0, 0, 0
522 /* EBCDIC (or bust) */
523 STATIC const packprops_t packprops[512] = {
525 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
526 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
527 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
528 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
529 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
530 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
531 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
532 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
534 /* c */ sizeof(char),
535 /* d */ sizeof(double),
537 /* f */ sizeof(float),
547 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
548 #if defined(HAS_QUAD)
549 /* q */ sizeof(Quad_t),
553 0, 0, 0, 0, 0, 0, 0, 0, 0,
557 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
558 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
559 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
560 /* C */ sizeof(unsigned char),
561 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
562 /* D */ LONG_DOUBLESIZE,
569 /* I */ sizeof(unsigned int),
577 #if defined(HAS_QUAD)
578 /* Q */ sizeof(Uquad_t),
582 0, 0, 0, 0, 0, 0, 0, 0, 0,
585 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
587 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
588 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
589 0, 0, 0, 0, 0, 0, 0, 0, 0,
591 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
592 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
593 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
594 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
595 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
596 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
597 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
598 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
599 0, 0, 0, 0, 0, 0, 0, 0, 0,
601 0, 0, 0, 0, 0, 0, 0, 0, 0,
602 /* l */ sizeof(long),
604 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
609 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
610 /* s */ sizeof(short),
612 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
617 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
618 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
620 /* I */ sizeof(unsigned int),
621 0, 0, 0, 0, 0, 0, 0, 0, 0,
622 /* L */ sizeof(unsigned long),
624 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
629 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
630 /* S */ sizeof(unsigned short),
632 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
637 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
638 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
643 uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
646 UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
647 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
648 /* We try to process malformed UTF-8 as much as possible (preferably with
649 warnings), but these two mean we make no progress in the string and
650 might enter an infinite loop */
651 if (retlen == (STRLEN) -1 || retlen == 0)
652 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
653 (int) TYPE_NO_MODIFIERS(datumtype));
655 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
656 "Character in '%c' format wrapped in unpack",
657 (int) TYPE_NO_MODIFIERS(datumtype));
664 #define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
665 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
669 uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
673 const char *from = *s;
675 const U32 flags = ckWARN(WARN_UTF8) ?
676 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
677 for (;buf_len > 0; buf_len--) {
678 if (from >= end) return FALSE;
679 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
680 if (retlen == (STRLEN) -1 || retlen == 0) {
681 from += UTF8SKIP(from);
683 } else from += retlen;
688 *(U8 *)buf++ = (U8)val;
690 /* We have enough characters for the buffer. Did we have problems ? */
693 /* Rewalk the string fragment while warning */
695 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
696 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
697 if (ptr >= end) break;
698 utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
700 if (from > end) from = end;
703 Perl_ck_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
704 WARN_PACK : WARN_UNPACK),
705 "Character(s) in '%c' format wrapped in %s",
706 (int) TYPE_NO_MODIFIERS(datumtype),
707 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
714 next_uni_uu(pTHX_ const char **s, const char *end, I32 *out)
718 const UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
719 if (val >= 0x100 || !ISUUCHAR(val) ||
720 retlen == (STRLEN) -1 || retlen == 0) {
724 *out = PL_uudmap[val] & 077;
730 S_bytes_to_uni(const U8 *start, STRLEN len, char *dest) {
731 const U8 * const end = start + len;
733 PERL_ARGS_ASSERT_BYTES_TO_UNI;
735 while (start < end) {
736 const UV uv = NATIVE_TO_ASCII(*start);
737 if (UNI_IS_INVARIANT(uv))
738 *dest++ = (char)(U8)UTF_TO_NATIVE(uv);
740 *dest++ = (char)(U8)UTF8_EIGHT_BIT_HI(uv);
741 *dest++ = (char)(U8)UTF8_EIGHT_BIT_LO(uv);
748 #define PUSH_BYTES(utf8, cur, buf, len) \
751 (cur) = bytes_to_uni((U8 *) buf, len, (cur)); \
753 Copy(buf, cur, len, char); \
758 #define GROWING(utf8, cat, start, cur, in_len) \
760 STRLEN glen = (in_len); \
761 if (utf8) glen *= UTF8_EXPAND; \
762 if ((cur) + glen >= (start) + SvLEN(cat)) { \
763 (start) = sv_exp_grow(cat, glen); \
764 (cur) = (start) + SvCUR(cat); \
768 #define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
770 const STRLEN glen = (in_len); \
772 if (utf8) gl *= UTF8_EXPAND; \
773 if ((cur) + gl >= (start) + SvLEN(cat)) { \
775 SvCUR_set((cat), (cur) - (start)); \
776 (start) = sv_exp_grow(cat, gl); \
777 (cur) = (start) + SvCUR(cat); \
779 PUSH_BYTES(utf8, cur, buf, glen); \
782 #define PUSH_BYTE(utf8, s, byte) \
785 const U8 au8 = (byte); \
786 (s) = bytes_to_uni(&au8, 1, (s)); \
787 } else *(U8 *)(s)++ = (byte); \
790 /* Only to be used inside a loop (see the break) */
791 #define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
794 if (str >= end) break; \
795 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
796 if (retlen == (STRLEN) -1 || retlen == 0) { \
798 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
803 static const char *_action( const tempsym_t* symptr )
805 return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
808 /* Returns the sizeof() struct described by pat */
810 S_measure_struct(pTHX_ tempsym_t* symptr)
814 PERL_ARGS_ASSERT_MEASURE_STRUCT;
816 while (next_symbol(symptr)) {
820 switch (symptr->howlen) {
822 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
826 /* e_no_len and e_number */
827 len = symptr->length;
831 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
834 /* endianness doesn't influence the size of a type */
835 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
837 Perl_croak(aTHX_ "Invalid type '%c' in %s",
838 (int)TYPE_NO_MODIFIERS(symptr->code),
840 #ifdef PERL_PACK_CAN_SHRIEKSIGN
841 case '.' | TYPE_IS_SHRIEKING:
842 case '@' | TYPE_IS_SHRIEKING:
847 case 'U': /* XXXX Is it correct? */
850 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
851 (int) TYPE_NO_MODIFIERS(symptr->code),
858 tempsym_t savsym = *symptr;
859 symptr->patptr = savsym.grpbeg;
860 symptr->patend = savsym.grpend;
861 /* XXXX Theoretically, we need to measure many times at
862 different positions, since the subexpression may contain
863 alignment commands, but be not of aligned length.
864 Need to detect this and croak(). */
865 size = measure_struct(symptr);
869 case 'X' | TYPE_IS_SHRIEKING:
870 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
872 if (!len) /* Avoid division by 0 */
874 len = total % len; /* Assumed: the start is aligned. */
879 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
881 case 'x' | TYPE_IS_SHRIEKING:
882 if (!len) /* Avoid division by 0 */
884 star = total % len; /* Assumed: the start is aligned. */
885 if (star) /* Other portable ways? */
909 size = sizeof(char*);
919 /* locate matching closing parenthesis or bracket
920 * returns char pointer to char after match, or NULL
923 S_group_end(pTHX_ const char *patptr, const char *patend, char ender)
925 PERL_ARGS_ASSERT_GROUP_END;
927 while (patptr < patend) {
928 const char c = *patptr++;
935 while (patptr < patend && *patptr != '\n')
939 patptr = group_end(patptr, patend, ')') + 1;
941 patptr = group_end(patptr, patend, ']') + 1;
943 Perl_croak(aTHX_ "No group ending character '%c' found in template",
949 /* Convert unsigned decimal number to binary.
950 * Expects a pointer to the first digit and address of length variable
951 * Advances char pointer to 1st non-digit char and returns number
954 S_get_num(pTHX_ const char *patptr, I32 *lenptr )
956 I32 len = *patptr++ - '0';
958 PERL_ARGS_ASSERT_GET_NUM;
960 while (isDIGIT(*patptr)) {
961 if (len >= 0x7FFFFFFF/10)
962 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
963 len = (len * 10) + (*patptr++ - '0');
969 /* The marvellous template parsing routine: Using state stored in *symptr,
970 * locates next template code and count
973 S_next_symbol(pTHX_ tempsym_t* symptr )
975 const char* patptr = symptr->patptr;
976 const char* const patend = symptr->patend;
978 PERL_ARGS_ASSERT_NEXT_SYMBOL;
980 symptr->flags &= ~FLAG_SLASH;
982 while (patptr < patend) {
983 if (isSPACE(*patptr))
985 else if (*patptr == '#') {
987 while (patptr < patend && *patptr != '\n')
992 /* We should have found a template code */
993 I32 code = *patptr++ & 0xFF;
994 U32 inherited_modifiers = 0;
996 if (code == ','){ /* grandfather in commas but with a warning */
997 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
998 symptr->flags |= FLAG_COMMA;
999 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
1000 "Invalid type ',' in %s", _action( symptr ) );
1005 /* for '(', skip to ')' */
1007 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
1008 Perl_croak(aTHX_ "()-group starts with a count in %s",
1009 _action( symptr ) );
1010 symptr->grpbeg = patptr;
1011 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
1012 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
1013 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
1014 _action( symptr ) );
1017 /* look for group modifiers to inherit */
1018 if (TYPE_ENDIANNESS(symptr->flags)) {
1019 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
1020 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
1023 /* look for modifiers */
1024 while (patptr < patend) {
1025 const char *allowed;
1029 modifier = TYPE_IS_SHRIEKING;
1030 allowed = SHRIEKING_ALLOWED_TYPES;
1032 #ifdef PERL_PACK_CAN_BYTEORDER
1034 modifier = TYPE_IS_BIG_ENDIAN;
1035 allowed = ENDIANNESS_ALLOWED_TYPES;
1038 modifier = TYPE_IS_LITTLE_ENDIAN;
1039 allowed = ENDIANNESS_ALLOWED_TYPES;
1041 #endif /* PERL_PACK_CAN_BYTEORDER */
1051 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
1052 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
1053 allowed, _action( symptr ) );
1055 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
1056 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
1057 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
1058 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
1059 TYPE_ENDIANNESS_MASK)
1060 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
1061 *patptr, _action( symptr ) );
1063 if ((code & modifier)) {
1064 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
1065 "Duplicate modifier '%c' after '%c' in %s",
1066 *patptr, (int) TYPE_NO_MODIFIERS(code),
1067 _action( symptr ) );
1074 /* inherit modifiers */
1075 code |= inherited_modifiers;
1077 /* look for count and/or / */
1078 if (patptr < patend) {
1079 if (isDIGIT(*patptr)) {
1080 patptr = get_num( patptr, &symptr->length );
1081 symptr->howlen = e_number;
1083 } else if (*patptr == '*') {
1085 symptr->howlen = e_star;
1087 } else if (*patptr == '[') {
1088 const char* lenptr = ++patptr;
1089 symptr->howlen = e_number;
1090 patptr = group_end( patptr, patend, ']' ) + 1;
1091 /* what kind of [] is it? */
1092 if (isDIGIT(*lenptr)) {
1093 lenptr = get_num( lenptr, &symptr->length );
1094 if( *lenptr != ']' )
1095 Perl_croak(aTHX_ "Malformed integer in [] in %s",
1096 _action( symptr ) );
1098 tempsym_t savsym = *symptr;
1099 symptr->patend = patptr-1;
1100 symptr->patptr = lenptr;
1101 savsym.length = measure_struct(symptr);
1105 symptr->howlen = e_no_len;
1110 while (patptr < patend) {
1111 if (isSPACE(*patptr))
1113 else if (*patptr == '#') {
1115 while (patptr < patend && *patptr != '\n')
1117 if (patptr < patend)
1120 if (*patptr == '/') {
1121 symptr->flags |= FLAG_SLASH;
1123 if (patptr < patend &&
1124 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
1125 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
1126 _action( symptr ) );
1132 /* at end - no count, no / */
1133 symptr->howlen = e_no_len;
1137 symptr->code = code;
1138 symptr->patptr = patptr;
1142 symptr->patptr = patptr;
1147 There is no way to cleanly handle the case where we should process the
1148 string per byte in its upgraded form while it's really in downgraded form
1149 (e.g. estimates like strend-s as an upper bound for the number of
1150 characters left wouldn't work). So if we foresee the need of this
1151 (pattern starts with U or contains U0), we want to work on the encoded
1152 version of the string. Users are advised to upgrade their pack string
1153 themselves if they need to do a lot of unpacks like this on it
1156 need_utf8(const char *pat, const char *patend)
1160 PERL_ARGS_ASSERT_NEED_UTF8;
1162 while (pat < patend) {
1163 if (pat[0] == '#') {
1165 pat = (const char *) memchr(pat, '\n', patend-pat);
1166 if (!pat) return FALSE;
1167 } else if (pat[0] == 'U') {
1168 if (first || pat[1] == '0') return TRUE;
1169 } else first = FALSE;
1176 first_symbol(const char *pat, const char *patend) {
1177 PERL_ARGS_ASSERT_FIRST_SYMBOL;
1179 while (pat < patend) {
1180 if (pat[0] != '#') return pat[0];
1182 pat = (const char *) memchr(pat, '\n', patend-pat);
1190 =for apidoc unpackstring
1192 The engine implementing unpack() Perl function. C<unpackstring> puts the
1193 extracted list items on the @_ stack and returns the number of elements.
1194 Issue C<PUTBACK> before and C<SPAGAIN> after the call to this function.
1195 Unlike L</call_pv>, do not do a C<PUSHMARK>. This function takes no parameters
1201 Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
1205 PERL_ARGS_ASSERT_UNPACKSTRING;
1207 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
1208 else if (need_utf8(pat, patend)) {
1209 /* We probably should try to avoid this in case a scalar context call
1210 wouldn't get to the "U0" */
1211 STRLEN len = strend - s;
1212 s = (char *) bytes_to_utf8((U8 *) s, &len);
1215 flags |= FLAG_DO_UTF8;
1218 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
1219 flags |= FLAG_PARSE_UTF8;
1221 TEMPSYM_INIT(&sym, pat, patend, flags);
1223 return unpack_rec(&sym, s, s, strend, NULL );
1227 S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
1231 const I32 start_sp_offset = SP - PL_stack_base;
1236 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
1237 bool beyond = FALSE;
1238 bool explicit_length;
1239 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
1240 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
1242 PERL_ARGS_ASSERT_UNPACK_REC;
1244 symptr->strbeg = s - strbeg;
1246 while (next_symbol(symptr)) {
1249 I32 datumtype = symptr->code;
1250 /* do first one only unless in list context
1251 / is implemented by unpacking the count, then popping it from the
1252 stack, so must check that we're not in the middle of a / */
1253 if ( unpack_only_one
1254 && (SP - PL_stack_base == start_sp_offset + 1)
1255 && (datumtype != '/') ) /* XXX can this be omitted */
1258 switch (howlen = symptr->howlen) {
1260 len = strend - strbeg; /* long enough */
1263 /* e_no_len and e_number */
1264 len = symptr->length;
1268 explicit_length = TRUE;
1270 beyond = s >= strend;
1272 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
1274 /* props nonzero means we can process this letter. */
1275 const long size = props & PACK_SIZE_MASK;
1276 const long howmany = (strend - s) / size;
1280 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
1281 if (len && unpack_only_one) len = 1;
1287 switch(TYPE_NO_ENDIANNESS(datumtype)) {
1289 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
1292 if (howlen == e_no_len)
1293 len = 16; /* len is not specified */
1301 tempsym_t savsym = *symptr;
1302 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
1303 symptr->flags |= group_modifiers;
1304 symptr->patend = savsym.grpend;
1305 symptr->previous = &savsym;
1308 if (len && unpack_only_one) len = 1;
1310 symptr->patptr = savsym.grpbeg;
1311 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
1312 else symptr->flags &= ~FLAG_PARSE_UTF8;
1313 unpack_rec(symptr, s, strbeg, strend, &s);
1314 if (s == strend && savsym.howlen == e_star)
1315 break; /* No way to continue */
1318 savsym.flags = symptr->flags & ~group_modifiers;
1322 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1323 case '.' | TYPE_IS_SHRIEKING:
1328 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1329 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
1330 #else /* PERL_PACK_CAN_SHRIEKSIGN */
1331 const bool u8 = utf8;
1333 if (howlen == e_star) from = strbeg;
1334 else if (len <= 0) from = s;
1336 tempsym_t *group = symptr;
1338 while (--len && group) group = group->previous;
1339 from = group ? strbeg + group->strbeg : strbeg;
1342 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
1343 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
1347 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1348 case '@' | TYPE_IS_SHRIEKING:
1351 s = strbeg + symptr->strbeg;
1352 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1353 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
1354 #else /* PERL_PACK_CAN_SHRIEKSIGN */
1360 Perl_croak(aTHX_ "'@' outside of string in unpack");
1365 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
1368 Perl_croak(aTHX_ "'@' outside of string in unpack");
1372 case 'X' | TYPE_IS_SHRIEKING:
1373 if (!len) /* Avoid division by 0 */
1376 const char *hop, *last;
1378 hop = last = strbeg;
1380 hop += UTF8SKIP(hop);
1387 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1391 len = (s - strbeg) % len;
1397 Perl_croak(aTHX_ "'X' outside of string in unpack");
1398 while (--s, UTF8_IS_CONTINUATION(*s)) {
1400 Perl_croak(aTHX_ "'X' outside of string in unpack");
1405 if (len > s - strbeg)
1406 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1410 case 'x' | TYPE_IS_SHRIEKING: {
1412 if (!len) /* Avoid division by 0 */
1414 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1415 else ai32 = (s - strbeg) % len;
1416 if (ai32 == 0) break;
1424 Perl_croak(aTHX_ "'x' outside of string in unpack");
1429 if (len > strend - s)
1430 Perl_croak(aTHX_ "'x' outside of string in unpack");
1435 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1441 /* Preliminary length estimate is assumed done in 'W' */
1442 if (len > strend - s) len = strend - s;
1448 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1449 if (hop >= strend) {
1451 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1456 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1458 } else if (len > strend - s)
1461 if (datumtype == 'Z') {
1462 /* 'Z' strips stuff after first null */
1463 const char *ptr, *end;
1465 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
1466 sv = newSVpvn(s, ptr-s);
1467 if (howlen == e_star) /* exact for 'Z*' */
1468 len = ptr-s + (ptr != strend ? 1 : 0);
1469 } else if (datumtype == 'A') {
1470 /* 'A' strips both nulls and spaces */
1472 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1473 for (ptr = s+len-1; ptr >= s; ptr--)
1474 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
1475 !isSPACE_utf8(ptr)) break;
1476 if (ptr >= s) ptr += UTF8SKIP(ptr);
1479 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1481 for (ptr = s+len-1; ptr >= s; ptr--)
1482 if (*ptr != 0 && !isSPACE(*ptr)) break;
1485 sv = newSVpvn(s, ptr-s);
1486 } else sv = newSVpvn(s, len);
1490 /* Undo any upgrade done due to need_utf8() */
1491 if (!(symptr->flags & FLAG_WAS_UTF8))
1492 sv_utf8_downgrade(sv, 0);
1500 if (howlen == e_star || len > (strend - s) * 8)
1501 len = (strend - s) * 8;
1504 while (len >= 8 && s < strend) {
1505 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
1510 cuv += PL_bitcount[*(U8 *)s++];
1513 if (len && s < strend) {
1515 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1516 if (datumtype == 'b')
1518 if (bits & 1) cuv++;
1523 if (bits & 0x80) cuv++;
1530 sv = sv_2mortal(newSV(len ? len : 1));
1533 if (datumtype == 'b') {
1535 const I32 ai32 = len;
1536 for (len = 0; len < ai32; len++) {
1537 if (len & 7) bits >>= 1;
1539 if (s >= strend) break;
1540 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1541 } else bits = *(U8 *) s++;
1542 *str++ = bits & 1 ? '1' : '0';
1546 const I32 ai32 = len;
1547 for (len = 0; len < ai32; len++) {
1548 if (len & 7) bits <<= 1;
1550 if (s >= strend) break;
1551 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1552 } else bits = *(U8 *) s++;
1553 *str++ = bits & 0x80 ? '1' : '0';
1557 SvCUR_set(sv, str - SvPVX_const(sv));
1564 /* Preliminary length estimate, acceptable for utf8 too */
1565 if (howlen == e_star || len > (strend - s) * 2)
1566 len = (strend - s) * 2;
1568 sv = sv_2mortal(newSV(len ? len : 1));
1572 if (datumtype == 'h') {
1575 for (len = 0; len < ai32; len++) {
1576 if (len & 1) bits >>= 4;
1578 if (s >= strend) break;
1579 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1580 } else bits = * (U8 *) s++;
1582 *str++ = PL_hexdigit[bits & 15];
1586 const I32 ai32 = len;
1587 for (len = 0; len < ai32; len++) {
1588 if (len & 1) bits <<= 4;
1590 if (s >= strend) break;
1591 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1592 } else bits = *(U8 *) s++;
1594 *str++ = PL_hexdigit[(bits >> 4) & 15];
1599 SvCUR_set(sv, str - SvPVX_const(sv));
1606 if (explicit_length)
1607 /* Switch to "character" mode */
1608 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1613 while (len-- > 0 && s < strend) {
1618 aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1619 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1620 if (retlen == (STRLEN) -1 || retlen == 0)
1621 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1625 aint = *(U8 *)(s)++;
1626 if (aint >= 128 && datumtype != 'C') /* fake up signed chars */
1630 else if (checksum > bits_in_uv)
1631 cdouble += (NV)aint;
1639 while (len-- > 0 && s < strend) {
1641 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1642 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1643 if (retlen == (STRLEN) -1 || retlen == 0)
1644 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1648 else if (checksum > bits_in_uv)
1649 cdouble += (NV) val;
1653 } else if (!checksum)
1655 const U8 ch = *(U8 *) s++;
1658 else if (checksum > bits_in_uv)
1659 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1661 while (len-- > 0) cuv += *(U8 *) s++;
1665 if (explicit_length && howlen != e_star) {
1666 /* Switch to "bytes in UTF-8" mode */
1667 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
1669 /* Should be impossible due to the need_utf8() test */
1670 Perl_croak(aTHX_ "U0 mode on a byte string");
1674 if (len > strend - s) len = strend - s;
1676 if (len && unpack_only_one) len = 1;
1680 while (len-- > 0 && s < strend) {
1684 U8 result[UTF8_MAXLEN];
1685 const char *ptr = s;
1687 /* Bug: warns about bad utf8 even if we are short on bytes
1688 and will break out of the loop */
1689 if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1692 len = UTF8SKIP(result);
1693 if (!uni_to_bytes(aTHX_ &ptr, strend,
1694 (char *) &result[1], len-1, 'U')) break;
1695 auv = utf8n_to_uvuni(result, len, &retlen, UTF8_ALLOW_DEFAULT);
1698 auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, UTF8_ALLOW_DEFAULT);
1699 if (retlen == (STRLEN) -1 || retlen == 0)
1700 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1705 else if (checksum > bits_in_uv)
1706 cdouble += (NV) auv;
1711 case 's' | TYPE_IS_SHRIEKING:
1712 #if SHORTSIZE != SIZE16
1715 SHIFT_VAR(utf8, s, strend, ashort, datumtype);
1716 DO_BO_UNPACK(ashort, s);
1719 else if (checksum > bits_in_uv)
1720 cdouble += (NV)ashort;
1732 #if U16SIZE > SIZE16
1735 SHIFT16(utf8, s, strend, &ai16, datumtype);
1736 DO_BO_UNPACK(ai16, 16);
1737 #if U16SIZE > SIZE16
1743 else if (checksum > bits_in_uv)
1744 cdouble += (NV)ai16;
1749 case 'S' | TYPE_IS_SHRIEKING:
1750 #if SHORTSIZE != SIZE16
1752 unsigned short aushort;
1753 SHIFT_VAR(utf8, s, strend, aushort, datumtype);
1754 DO_BO_UNPACK(aushort, s);
1757 else if (checksum > bits_in_uv)
1758 cdouble += (NV)aushort;
1771 #if U16SIZE > SIZE16
1774 SHIFT16(utf8, s, strend, &au16, datumtype);
1775 DO_BO_UNPACK(au16, 16);
1777 if (datumtype == 'n')
1778 au16 = PerlSock_ntohs(au16);
1781 if (datumtype == 'v')
1786 else if (checksum > bits_in_uv)
1787 cdouble += (NV) au16;
1792 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1793 case 'v' | TYPE_IS_SHRIEKING:
1794 case 'n' | TYPE_IS_SHRIEKING:
1797 # if U16SIZE > SIZE16
1800 SHIFT16(utf8, s, strend, &ai16, datumtype);
1802 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
1803 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1804 # endif /* HAS_NTOHS */
1806 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
1807 ai16 = (I16) vtohs((U16) ai16);
1808 # endif /* HAS_VTOHS */
1811 else if (checksum > bits_in_uv)
1812 cdouble += (NV) ai16;
1817 #endif /* PERL_PACK_CAN_SHRIEKSIGN */
1819 case 'i' | TYPE_IS_SHRIEKING:
1822 SHIFT_VAR(utf8, s, strend, aint, datumtype);
1823 DO_BO_UNPACK(aint, i);
1826 else if (checksum > bits_in_uv)
1827 cdouble += (NV)aint;
1833 case 'I' | TYPE_IS_SHRIEKING:
1836 SHIFT_VAR(utf8, s, strend, auint, datumtype);
1837 DO_BO_UNPACK(auint, i);
1840 else if (checksum > bits_in_uv)
1841 cdouble += (NV)auint;
1849 SHIFT_VAR(utf8, s, strend, aiv, datumtype);
1850 #if IVSIZE == INTSIZE
1851 DO_BO_UNPACK(aiv, i);
1852 #elif IVSIZE == LONGSIZE
1853 DO_BO_UNPACK(aiv, l);
1854 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
1855 DO_BO_UNPACK(aiv, 64);
1857 Perl_croak(aTHX_ "'j' not supported on this platform");
1861 else if (checksum > bits_in_uv)
1870 SHIFT_VAR(utf8, s, strend, auv, datumtype);
1871 #if IVSIZE == INTSIZE
1872 DO_BO_UNPACK(auv, i);
1873 #elif IVSIZE == LONGSIZE
1874 DO_BO_UNPACK(auv, l);
1875 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
1876 DO_BO_UNPACK(auv, 64);
1878 Perl_croak(aTHX_ "'J' not supported on this platform");
1882 else if (checksum > bits_in_uv)
1888 case 'l' | TYPE_IS_SHRIEKING:
1889 #if LONGSIZE != SIZE32
1892 SHIFT_VAR(utf8, s, strend, along, datumtype);
1893 DO_BO_UNPACK(along, l);
1896 else if (checksum > bits_in_uv)
1897 cdouble += (NV)along;
1908 #if U32SIZE > SIZE32
1911 SHIFT32(utf8, s, strend, &ai32, datumtype);
1912 DO_BO_UNPACK(ai32, 32);
1913 #if U32SIZE > SIZE32
1914 if (ai32 > 2147483647) ai32 -= 4294967296;
1918 else if (checksum > bits_in_uv)
1919 cdouble += (NV)ai32;
1924 case 'L' | TYPE_IS_SHRIEKING:
1925 #if LONGSIZE != SIZE32
1927 unsigned long aulong;
1928 SHIFT_VAR(utf8, s, strend, aulong, datumtype);
1929 DO_BO_UNPACK(aulong, l);
1932 else if (checksum > bits_in_uv)
1933 cdouble += (NV)aulong;
1946 #if U32SIZE > SIZE32
1949 SHIFT32(utf8, s, strend, &au32, datumtype);
1950 DO_BO_UNPACK(au32, 32);
1952 if (datumtype == 'N')
1953 au32 = PerlSock_ntohl(au32);
1956 if (datumtype == 'V')
1961 else if (checksum > bits_in_uv)
1962 cdouble += (NV)au32;
1967 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1968 case 'V' | TYPE_IS_SHRIEKING:
1969 case 'N' | TYPE_IS_SHRIEKING:
1972 # if U32SIZE > SIZE32
1975 SHIFT32(utf8, s, strend, &ai32, datumtype);
1977 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1978 ai32 = (I32)PerlSock_ntohl((U32)ai32);
1981 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1982 ai32 = (I32)vtohl((U32)ai32);
1986 else if (checksum > bits_in_uv)
1987 cdouble += (NV)ai32;
1992 #endif /* PERL_PACK_CAN_SHRIEKSIGN */
1996 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
1997 DO_BO_UNPACK_PC(aptr);
1998 /* newSVpv generates undef if aptr is NULL */
1999 mPUSHs(newSVpv(aptr, 0));
2007 while (len > 0 && s < strend) {
2009 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
2010 auv = (auv << 7) | (ch & 0x7f);
2011 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
2019 if (++bytes >= sizeof(UV)) { /* promote to string */
2022 sv = Perl_newSVpvf(aTHX_ "%.*"UVuf, (int)TYPE_DIGITS(UV), auv);
2023 while (s < strend) {
2024 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
2025 sv = mul128(sv, (U8)(ch & 0x7f));
2031 t = SvPV_nolen_const(sv);
2040 if ((s >= strend) && bytes)
2041 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
2045 if (symptr->howlen == e_star)
2046 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
2048 if (s + sizeof(char*) <= strend) {
2050 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
2051 DO_BO_UNPACK_PC(aptr);
2052 /* newSVpvn generates undef if aptr is NULL */
2053 PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP));
2060 SHIFT_VAR(utf8, s, strend, aquad, datumtype);
2061 DO_BO_UNPACK(aquad, 64);
2063 mPUSHs(aquad >= IV_MIN && aquad <= IV_MAX ?
2064 newSViv((IV)aquad) : newSVnv((NV)aquad));
2065 else if (checksum > bits_in_uv)
2066 cdouble += (NV)aquad;
2074 SHIFT_VAR(utf8, s, strend, auquad, datumtype);
2075 DO_BO_UNPACK(auquad, 64);
2077 mPUSHs(auquad <= UV_MAX ?
2078 newSVuv((UV)auquad) : newSVnv((NV)auquad));
2079 else if (checksum > bits_in_uv)
2080 cdouble += (NV)auquad;
2085 #endif /* HAS_QUAD */
2086 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
2090 SHIFT_VAR(utf8, s, strend, afloat, datumtype);
2091 DO_BO_UNPACK_N(afloat, float);
2101 SHIFT_VAR(utf8, s, strend, adouble, datumtype);
2102 DO_BO_UNPACK_N(adouble, double);
2112 SHIFT_BYTES(utf8, s, strend, anv.bytes, sizeof(anv.bytes), datumtype);
2113 DO_BO_UNPACK_N(anv.nv, NV);
2120 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2124 SHIFT_BYTES(utf8, s, strend, aldouble.bytes, sizeof(aldouble.bytes), datumtype);
2125 DO_BO_UNPACK_N(aldouble.ld, long double);
2127 mPUSHn(aldouble.ld);
2129 cdouble += aldouble.ld;
2135 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
2136 sv = sv_2mortal(newSV(l));
2137 if (l) SvPOK_on(sv);
2140 while (next_uni_uu(aTHX_ &s, strend, &len)) {
2145 next_uni_uu(aTHX_ &s, strend, &a);
2146 next_uni_uu(aTHX_ &s, strend, &b);
2147 next_uni_uu(aTHX_ &s, strend, &c);
2148 next_uni_uu(aTHX_ &s, strend, &d);
2149 hunk[0] = (char)((a << 2) | (b >> 4));
2150 hunk[1] = (char)((b << 4) | (c >> 2));
2151 hunk[2] = (char)((c << 6) | d);
2153 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2161 /* possible checksum byte */
2162 const char *skip = s+UTF8SKIP(s);
2163 if (skip < strend && *skip == '\n')
2169 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
2173 len = PL_uudmap[*(U8*)s++] & 077;
2175 if (s < strend && ISUUCHAR(*s))
2176 a = PL_uudmap[*(U8*)s++] & 077;
2179 if (s < strend && ISUUCHAR(*s))
2180 b = PL_uudmap[*(U8*)s++] & 077;
2183 if (s < strend && ISUUCHAR(*s))
2184 c = PL_uudmap[*(U8*)s++] & 077;
2187 if (s < strend && ISUUCHAR(*s))
2188 d = PL_uudmap[*(U8*)s++] & 077;
2191 hunk[0] = (char)((a << 2) | (b >> 4));
2192 hunk[1] = (char)((b << 4) | (c >> 2));
2193 hunk[2] = (char)((c << 6) | d);
2195 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2200 else /* possible checksum byte */
2201 if (s + 1 < strend && s[1] == '\n')
2211 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
2212 (checksum > bits_in_uv &&
2213 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
2216 anv = (NV) (1 << (checksum & 15));
2217 while (checksum >= 16) {
2221 while (cdouble < 0.0)
2223 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
2224 sv = newSVnv(cdouble);
2227 if (checksum < bits_in_uv) {
2228 UV mask = ((UV)1 << checksum) - 1;
2237 if (symptr->flags & FLAG_SLASH){
2238 if (SP - PL_stack_base - start_sp_offset <= 0)
2240 if( next_symbol(symptr) ){
2241 if( symptr->howlen == e_number )
2242 Perl_croak(aTHX_ "Count after length/code in unpack" );
2244 /* ...end of char buffer then no decent length available */
2245 Perl_croak(aTHX_ "length/code after end of string in unpack" );
2247 /* take top of stack (hope it's numeric) */
2250 Perl_croak(aTHX_ "Negative '/' count in unpack" );
2253 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
2255 datumtype = symptr->code;
2256 explicit_length = FALSE;
2264 return SP - PL_stack_base - start_sp_offset;
2272 I32 gimme = GIMME_V;
2275 const char *pat = SvPV_const(left, llen);
2276 const char *s = SvPV_const(right, rlen);
2277 const char *strend = s + rlen;
2278 const char *patend = pat + llen;
2282 cnt = unpackstring(pat, patend, s, strend,
2283 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
2284 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
2287 if ( !cnt && gimme == G_SCALAR )
2288 PUSHs(&PL_sv_undef);
2293 doencodes(U8 *h, const char *s, I32 len)
2295 *h++ = PL_uuemap[len];
2297 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2298 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
2299 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
2300 *h++ = PL_uuemap[(077 & (s[2] & 077))];
2305 const char r = (len > 1 ? s[1] : '\0');
2306 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2307 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
2308 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
2309 *h++ = PL_uuemap[0];
2316 S_is_an_int(pTHX_ const char *s, STRLEN l)
2318 SV *result = newSVpvn(s, l);
2319 char *const result_c = SvPV_nolen(result); /* convenience */
2320 char *out = result_c;
2324 PERL_ARGS_ASSERT_IS_AN_INT;
2332 SvREFCNT_dec(result);
2355 SvREFCNT_dec(result);
2361 SvCUR_set(result, out - result_c);
2365 /* pnum must be '\0' terminated */
2367 S_div128(pTHX_ SV *pnum, bool *done)
2370 char * const s = SvPV(pnum, len);
2374 PERL_ARGS_ASSERT_DIV128;
2378 const int i = m * 10 + (*t - '0');
2379 const int r = (i >> 7); /* r < 10 */
2387 SvCUR_set(pnum, (STRLEN) (t - s));
2392 =for apidoc packlist
2394 The engine implementing pack() Perl function.
2400 Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, SV **beglist, SV **endlist )
2405 PERL_ARGS_ASSERT_PACKLIST;
2407 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
2409 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
2410 Also make sure any UTF8 flag is loaded */
2411 SvPV_force_nolen(cat);
2413 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
2415 (void)pack_rec( cat, &sym, beglist, endlist );
2418 /* like sv_utf8_upgrade, but also repoint the group start markers */
2420 marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2423 const char *from_ptr, *from_start, *from_end, **marks, **m;
2424 char *to_start, *to_ptr;
2426 if (SvUTF8(sv)) return;
2428 from_start = SvPVX_const(sv);
2429 from_end = from_start + SvCUR(sv);
2430 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2431 if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
2432 if (from_ptr == from_end) {
2433 /* Simple case: no character needs to be changed */
2438 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
2439 Newx(to_start, len, char);
2440 Copy(from_start, to_start, from_ptr-from_start, char);
2441 to_ptr = to_start + (from_ptr-from_start);
2443 Newx(marks, sym_ptr->level+2, const char *);
2444 for (group=sym_ptr; group; group = group->previous)
2445 marks[group->level] = from_start + group->strbeg;
2446 marks[sym_ptr->level+1] = from_end+1;
2447 for (m = marks; *m < from_ptr; m++)
2448 *m = to_start + (*m-from_start);
2450 for (;from_ptr < from_end; from_ptr++) {
2451 while (*m == from_ptr) *m++ = to_ptr;
2452 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
2456 while (*m == from_ptr) *m++ = to_ptr;
2457 if (m != marks + sym_ptr->level+1) {
2460 Perl_croak(aTHX_ "panic: marks beyond string end, m=%p, marks=%p, "
2461 "level=%d", m, marks, sym_ptr->level);
2463 for (group=sym_ptr; group; group = group->previous)
2464 group->strbeg = marks[group->level] - to_start;
2469 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
2470 from_start -= SvIVX(sv);
2473 SvFLAGS(sv) &= ~SVf_OOK;
2476 Safefree(from_start);
2477 SvPV_set(sv, to_start);
2478 SvCUR_set(sv, to_ptr - to_start);
2483 /* Exponential string grower. Makes string extension effectively O(n)
2484 needed says how many extra bytes we need (not counting the final '\0')
2485 Only grows the string if there is an actual lack of space
2488 S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
2489 const STRLEN cur = SvCUR(sv);
2490 const STRLEN len = SvLEN(sv);
2493 PERL_ARGS_ASSERT_SV_EXP_GROW;
2495 if (len - cur > needed) return SvPVX(sv);
2496 extend = needed > len ? needed : len;
2497 return SvGROW(sv, len+extend+1);
2502 S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
2505 tempsym_t lookahead;
2506 I32 items = endlist - beglist;
2507 bool found = next_symbol(symptr);
2508 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
2509 bool warn_utf8 = ckWARN(WARN_UTF8);
2511 PERL_ARGS_ASSERT_PACK_REC;
2513 if (symptr->level == 0 && found && symptr->code == 'U') {
2514 marked_upgrade(aTHX_ cat, symptr);
2515 symptr->flags |= FLAG_DO_UTF8;
2518 symptr->strbeg = SvCUR(cat);
2524 SV *lengthcode = NULL;
2525 I32 datumtype = symptr->code;
2526 howlen_t howlen = symptr->howlen;
2527 char *start = SvPVX(cat);
2528 char *cur = start + SvCUR(cat);
2530 #define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2534 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2538 /* e_no_len and e_number */
2539 len = symptr->length;
2544 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
2546 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2547 /* We can process this letter. */
2548 STRLEN size = props & PACK_SIZE_MASK;
2549 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2553 /* Look ahead for next symbol. Do we have code/code? */
2554 lookahead = *symptr;
2555 found = next_symbol(&lookahead);
2556 if (symptr->flags & FLAG_SLASH) {
2558 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
2559 if (strchr("aAZ", lookahead.code)) {
2560 if (lookahead.howlen == e_number) count = lookahead.length;
2563 count = sv_len_utf8(*beglist);
2566 if (lookahead.code == 'Z') count++;
2569 if (lookahead.howlen == e_number && lookahead.length < items)
2570 count = lookahead.length;
2573 lookahead.howlen = e_number;
2574 lookahead.length = count;
2575 lengthcode = sv_2mortal(newSViv(count));
2578 /* Code inside the switch must take care to properly update
2579 cat (CUR length and '\0' termination) if it updated *cur and
2580 doesn't simply leave using break */
2581 switch(TYPE_NO_ENDIANNESS(datumtype)) {
2583 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2584 (int) TYPE_NO_MODIFIERS(datumtype));
2586 Perl_croak(aTHX_ "'%%' may not be used in pack");
2589 #ifdef PERL_PACK_CAN_SHRIEKSIGN
2590 case '.' | TYPE_IS_SHRIEKING:
2593 if (howlen == e_star) from = start;
2594 else if (len == 0) from = cur;
2596 tempsym_t *group = symptr;
2598 while (--len && group) group = group->previous;
2599 from = group ? start + group->strbeg : start;
2602 len = SvIV(fromstr);
2604 #ifdef PERL_PACK_CAN_SHRIEKSIGN
2605 case '@' | TYPE_IS_SHRIEKING:
2608 from = start + symptr->strbeg;
2610 #ifdef PERL_PACK_CAN_SHRIEKSIGN
2611 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
2612 #else /* PERL_PACK_CAN_SHRIEKSIGN */
2616 while (len && from < cur) {
2617 from += UTF8SKIP(from);
2621 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2623 /* Here we know from == cur */
2625 GROWING(0, cat, start, cur, len);
2626 Zero(cur, len, char);
2628 } else if (from < cur) {
2631 } else goto no_change;
2639 if (len > 0) goto grow;
2640 if (len == 0) goto no_change;
2647 tempsym_t savsym = *symptr;
2648 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2649 symptr->flags |= group_modifiers;
2650 symptr->patend = savsym.grpend;
2652 symptr->previous = &lookahead;
2655 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2656 else symptr->flags &= ~FLAG_PARSE_UTF8;
2657 was_utf8 = SvUTF8(cat);
2658 symptr->patptr = savsym.grpbeg;
2659 beglist = pack_rec(cat, symptr, beglist, endlist);
2660 if (SvUTF8(cat) != was_utf8)
2661 /* This had better be an upgrade while in utf8==0 mode */
2664 if (savsym.howlen == e_star && beglist == endlist)
2665 break; /* No way to continue */
2667 items = endlist - beglist;
2668 lookahead.flags = symptr->flags & ~group_modifiers;
2671 case 'X' | TYPE_IS_SHRIEKING:
2672 if (!len) /* Avoid division by 0 */
2679 hop += UTF8SKIP(hop);
2686 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2690 len = (cur-start) % len;
2694 if (len < 1) goto no_change;
2698 Perl_croak(aTHX_ "'%c' outside of string in pack",
2699 (int) TYPE_NO_MODIFIERS(datumtype));
2700 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2702 Perl_croak(aTHX_ "'%c' outside of string in pack",
2703 (int) TYPE_NO_MODIFIERS(datumtype));
2709 if (cur - start < len)
2710 Perl_croak(aTHX_ "'%c' outside of string in pack",
2711 (int) TYPE_NO_MODIFIERS(datumtype));
2714 if (cur < start+symptr->strbeg) {
2715 /* Make sure group starts don't point into the void */
2717 const STRLEN length = cur-start;
2718 for (group = symptr;
2719 group && length < group->strbeg;
2720 group = group->previous) group->strbeg = length;
2721 lookahead.strbeg = length;
2724 case 'x' | TYPE_IS_SHRIEKING: {
2726 if (!len) /* Avoid division by 0 */
2728 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
2729 else ai32 = (cur - start) % len;
2730 if (ai32 == 0) goto no_change;
2742 aptr = SvPV_const(fromstr, fromlen);
2743 if (DO_UTF8(fromstr)) {
2744 const char *end, *s;
2746 if (!utf8 && !SvUTF8(cat)) {
2747 marked_upgrade(aTHX_ cat, symptr);
2748 lookahead.flags |= FLAG_DO_UTF8;
2749 lookahead.strbeg = symptr->strbeg;
2752 cur = start + SvCUR(cat);
2754 if (howlen == e_star) {
2755 if (utf8) goto string_copy;
2759 end = aptr + fromlen;
2760 fromlen = datumtype == 'Z' ? len-1 : len;
2761 while ((I32) fromlen > 0 && s < end) {
2766 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2769 if (datumtype == 'Z') len++;
2775 fromlen = len - fromlen;
2776 if (datumtype == 'Z') fromlen--;
2777 if (howlen == e_star) {
2779 if (datumtype == 'Z') len++;
2781 GROWING(0, cat, start, cur, len);
2782 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
2783 datumtype | TYPE_IS_PACK))
2784 Perl_croak(aTHX_ "panic: predicted utf8 length not available, "
2785 "for '%c', aptr=%p end=%p cur=%p, fromlen=%"UVuf,
2786 (int)datumtype, aptr, end, cur, (UV)fromlen);
2790 if (howlen == e_star) {
2792 if (datumtype == 'Z') len++;
2794 if (len <= (I32) fromlen) {
2796 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2798 /* assumes a byte expands to at most UTF8_EXPAND bytes on
2800 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2801 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
2803 while (fromlen > 0) {
2804 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
2810 if (howlen == e_star) {
2812 if (datumtype == 'Z') len++;
2814 if (len <= (I32) fromlen) {
2816 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2818 GROWING(0, cat, start, cur, len);
2819 Copy(aptr, cur, fromlen, char);
2823 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2830 const char *str, *end;
2837 str = SvPV_const(fromstr, fromlen);
2838 end = str + fromlen;
2839 if (DO_UTF8(fromstr)) {
2841 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2843 utf8_source = FALSE;
2844 utf8_flags = 0; /* Unused, but keep compilers happy */
2846 if (howlen == e_star) len = fromlen;
2847 field_len = (len+7)/8;
2848 GROWING(utf8, cat, start, cur, field_len);
2849 if (len > (I32)fromlen) len = fromlen;
2852 if (datumtype == 'B')
2856 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2858 } else bits |= *str++ & 1;
2859 if (l & 7) bits <<= 1;
2861 PUSH_BYTE(utf8, cur, bits);
2866 /* datumtype == 'b' */
2870 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2871 if (val & 1) bits |= 0x80;
2872 } else if (*str++ & 1)
2874 if (l & 7) bits >>= 1;
2876 PUSH_BYTE(utf8, cur, bits);
2882 if (datumtype == 'B')
2883 bits <<= 7 - (l & 7);
2885 bits >>= 7 - (l & 7);
2886 PUSH_BYTE(utf8, cur, bits);
2889 /* Determine how many chars are left in the requested field */
2891 if (howlen == e_star) field_len = 0;
2892 else field_len -= l;
2893 Zero(cur, field_len, char);
2899 const char *str, *end;
2906 str = SvPV_const(fromstr, fromlen);
2907 end = str + fromlen;
2908 if (DO_UTF8(fromstr)) {
2910 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2912 utf8_source = FALSE;
2913 utf8_flags = 0; /* Unused, but keep compilers happy */
2915 if (howlen == e_star) len = fromlen;
2916 field_len = (len+1)/2;
2917 GROWING(utf8, cat, start, cur, field_len);
2918 if (!utf8 && len > (I32)fromlen) len = fromlen;
2921 if (datumtype == 'H')
2925 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2926 if (val < 256 && isALPHA(val))
2927 bits |= (val + 9) & 0xf;
2930 } else if (isALPHA(*str))
2931 bits |= (*str++ + 9) & 0xf;
2933 bits |= *str++ & 0xf;
2934 if (l & 1) bits <<= 4;
2936 PUSH_BYTE(utf8, cur, bits);
2944 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2945 if (val < 256 && isALPHA(val))
2946 bits |= ((val + 9) & 0xf) << 4;
2948 bits |= (val & 0xf) << 4;
2949 } else if (isALPHA(*str))
2950 bits |= ((*str++ + 9) & 0xf) << 4;
2952 bits |= (*str++ & 0xf) << 4;
2953 if (l & 1) bits >>= 4;
2955 PUSH_BYTE(utf8, cur, bits);
2961 PUSH_BYTE(utf8, cur, bits);
2964 /* Determine how many chars are left in the requested field */
2966 if (howlen == e_star) field_len = 0;
2967 else field_len -= l;
2968 Zero(cur, field_len, char);
2976 aiv = SvIV(fromstr);
2977 if ((-128 > aiv || aiv > 127))
2978 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2979 "Character in 'c' format wrapped in pack");
2980 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
2985 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2991 aiv = SvIV(fromstr);
2992 if ((0 > aiv || aiv > 0xff))
2993 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2994 "Character in 'C' format wrapped in pack");
2995 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
3000 U8 in_bytes = (U8)IN_BYTES;
3002 end = start+SvLEN(cat)-1;
3003 if (utf8) end -= UTF8_MAXLEN-1;
3007 auv = SvUV(fromstr);
3008 if (in_bytes) auv = auv % 0x100;
3013 SvCUR_set(cat, cur - start);
3015 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3016 end = start+SvLEN(cat)-UTF8_MAXLEN;
3018 cur = (char *) uvuni_to_utf8_flags((U8 *) cur,
3021 0 : UNICODE_ALLOW_ANY);
3026 SvCUR_set(cat, cur - start);
3027 marked_upgrade(aTHX_ cat, symptr);
3028 lookahead.flags |= FLAG_DO_UTF8;
3029 lookahead.strbeg = symptr->strbeg;
3032 cur = start + SvCUR(cat);
3033 end = start+SvLEN(cat)-UTF8_MAXLEN;
3036 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3037 "Character in 'W' format wrapped in pack");
3042 SvCUR_set(cat, cur - start);
3043 GROWING(0, cat, start, cur, len+1);
3044 end = start+SvLEN(cat)-1;
3046 *(U8 *) cur++ = (U8)auv;
3055 if (!(symptr->flags & FLAG_DO_UTF8)) {
3056 marked_upgrade(aTHX_ cat, symptr);
3057 lookahead.flags |= FLAG_DO_UTF8;
3058 lookahead.strbeg = symptr->strbeg;
3064 end = start+SvLEN(cat);
3065 if (!utf8) end -= UTF8_MAXLEN;
3069 auv = SvUV(fromstr);
3071 U8 buffer[UTF8_MAXLEN], *endb;
3072 endb = uvuni_to_utf8_flags(buffer, auv,
3074 0 : UNICODE_ALLOW_ANY);
3075 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
3077 SvCUR_set(cat, cur - start);
3078 GROWING(0, cat, start, cur,
3079 len+(endb-buffer)*UTF8_EXPAND);
3080 end = start+SvLEN(cat);
3082 cur = bytes_to_uni(buffer, endb-buffer, cur);
3086 SvCUR_set(cat, cur - start);
3087 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3088 end = start+SvLEN(cat)-UTF8_MAXLEN;
3090 cur = (char *) uvuni_to_utf8_flags((U8 *) cur, auv,
3092 0 : UNICODE_ALLOW_ANY);
3097 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
3103 anv = SvNV(fromstr);
3105 /* VOS does not automatically map a floating-point overflow
3106 during conversion from double to float into infinity, so we
3107 do it by hand. This code should either be generalized for
3108 any OS that needs it, or removed if and when VOS implements
3109 posix-976 (suggestion to support mapping to infinity).
3110 Paul.Green@stratus.com 02-04-02. */
3112 extern const float _float_constants[];
3114 afloat = _float_constants[0]; /* single prec. inf. */
3115 else if (anv < -FLT_MAX)
3116 afloat = _float_constants[0]; /* single prec. inf. */
3117 else afloat = (float) anv;
3120 # if defined(VMS) && !defined(_IEEE_FP)
3121 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
3122 * on Alpha; fake it if we don't have them.
3126 else if (anv < -FLT_MAX)
3128 else afloat = (float)anv;
3130 afloat = (float)anv;
3132 #endif /* __VOS__ */
3133 DO_BO_PACK_N(afloat, float);
3134 PUSH_VAR(utf8, cur, afloat);
3142 anv = SvNV(fromstr);
3144 /* VOS does not automatically map a floating-point overflow
3145 during conversion from long double to double into infinity,
3146 so we do it by hand. This code should either be generalized
3147 for any OS that needs it, or removed if and when VOS
3148 implements posix-976 (suggestion to support mapping to
3149 infinity). Paul.Green@stratus.com 02-04-02. */
3151 extern const double _double_constants[];
3153 adouble = _double_constants[0]; /* double prec. inf. */
3154 else if (anv < -DBL_MAX)
3155 adouble = _double_constants[0]; /* double prec. inf. */
3156 else adouble = (double) anv;
3159 # if defined(VMS) && !defined(_IEEE_FP)
3160 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
3161 * on Alpha; fake it if we don't have them.
3165 else if (anv < -DBL_MAX)
3167 else adouble = (double)anv;
3169 adouble = (double)anv;
3171 #endif /* __VOS__ */
3172 DO_BO_PACK_N(adouble, double);
3173 PUSH_VAR(utf8, cur, adouble);
3178 Zero(&anv, 1, NV); /* can be long double with unused bits */
3182 /* to work round a gcc/x86 bug; don't use SvNV */
3183 anv.nv = sv_2nv(fromstr);
3185 anv.nv = SvNV(fromstr);
3187 DO_BO_PACK_N(anv, NV);
3188 PUSH_BYTES(utf8, cur, anv.bytes, sizeof(anv.bytes));
3192 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
3195 /* long doubles can have unused bits, which may be nonzero */
3196 Zero(&aldouble, 1, long double);
3200 /* to work round a gcc/x86 bug; don't use SvNV */
3201 aldouble.ld = (long double)sv_2nv(fromstr);
3203 aldouble.ld = (long double)SvNV(fromstr);
3205 DO_BO_PACK_N(aldouble, long double);
3206 PUSH_BYTES(utf8, cur, aldouble.bytes, sizeof(aldouble.bytes));
3211 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3212 case 'n' | TYPE_IS_SHRIEKING:
3218 ai16 = (I16)SvIV(fromstr);
3220 ai16 = PerlSock_htons(ai16);
3222 PUSH16(utf8, cur, &ai16);
3225 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3226 case 'v' | TYPE_IS_SHRIEKING:
3232 ai16 = (I16)SvIV(fromstr);
3236 PUSH16(utf8, cur, &ai16);
3239 case 'S' | TYPE_IS_SHRIEKING:
3240 #if SHORTSIZE != SIZE16
3242 unsigned short aushort;
3244 aushort = SvUV(fromstr);
3245 DO_BO_PACK(aushort, s);
3246 PUSH_VAR(utf8, cur, aushort);
3256 au16 = (U16)SvUV(fromstr);
3257 DO_BO_PACK(au16, 16);
3258 PUSH16(utf8, cur, &au16);
3261 case 's' | TYPE_IS_SHRIEKING:
3262 #if SHORTSIZE != SIZE16
3266 ashort = SvIV(fromstr);
3267 DO_BO_PACK(ashort, s);
3268 PUSH_VAR(utf8, cur, ashort);
3278 ai16 = (I16)SvIV(fromstr);
3279 DO_BO_PACK(ai16, 16);
3280 PUSH16(utf8, cur, &ai16);
3284 case 'I' | TYPE_IS_SHRIEKING:
3288 auint = SvUV(fromstr);
3289 DO_BO_PACK(auint, i);
3290 PUSH_VAR(utf8, cur, auint);
3297 aiv = SvIV(fromstr);
3298 #if IVSIZE == INTSIZE
3300 #elif IVSIZE == LONGSIZE
3302 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
3303 DO_BO_PACK(aiv, 64);
3305 Perl_croak(aTHX_ "'j' not supported on this platform");
3307 PUSH_VAR(utf8, cur, aiv);
3314 auv = SvUV(fromstr);
3315 #if UVSIZE == INTSIZE
3317 #elif UVSIZE == LONGSIZE
3319 #elif defined(HAS_QUAD) && UVSIZE == U64SIZE
3320 DO_BO_PACK(auv, 64);
3322 Perl_croak(aTHX_ "'J' not supported on this platform");
3324 PUSH_VAR(utf8, cur, auv);
3331 anv = SvNV(fromstr);
3335 SvCUR_set(cat, cur - start);
3336 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
3339 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
3340 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
3341 any negative IVs will have already been got by the croak()
3342 above. IOK is untrue for fractions, so we test them
3343 against UV_MAX_P1. */
3344 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
3345 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
3346 char *in = buf + sizeof(buf);
3347 UV auv = SvUV(fromstr);
3350 *--in = (char)((auv & 0x7f) | 0x80);
3353 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
3354 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3355 in, (buf + sizeof(buf)) - in);
3356 } else if (SvPOKp(fromstr))
3358 else if (SvNOKp(fromstr)) {
3359 /* 10**NV_MAX_10_EXP is the largest power of 10
3360 so 10**(NV_MAX_10_EXP+1) is definitely unrepresentable
3361 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
3362 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
3363 And with that many bytes only Inf can overflow.
3364 Some C compilers are strict about integral constant
3365 expressions so we conservatively divide by a slightly
3366 smaller integer instead of multiplying by the exact
3367 floating-point value.
3369 #ifdef NV_MAX_10_EXP
3370 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
3371 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
3373 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
3374 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
3376 char *in = buf + sizeof(buf);
3378 anv = Perl_floor(anv);
3380 const NV next = Perl_floor(anv / 128);
3381 if (in <= buf) /* this cannot happen ;-) */
3382 Perl_croak(aTHX_ "Cannot compress integer in pack");
3383 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
3386 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
3387 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3388 in, (buf + sizeof(buf)) - in);
3397 /* Copy string and check for compliance */
3398 from = SvPV_const(fromstr, len);
3399 if ((norm = is_an_int(from, len)) == NULL)
3400 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
3402 Newx(result, len, char);
3405 while (!done) *--in = div128(norm, &done) | 0x80;
3406 result[len - 1] &= 0x7F; /* clear continue bit */
3407 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3408 in, (result + len) - in);
3410 SvREFCNT_dec(norm); /* free norm */
3415 case 'i' | TYPE_IS_SHRIEKING:
3419 aint = SvIV(fromstr);
3420 DO_BO_PACK(aint, i);
3421 PUSH_VAR(utf8, cur, aint);
3424 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3425 case 'N' | TYPE_IS_SHRIEKING:
3431 au32 = SvUV(fromstr);
3433 au32 = PerlSock_htonl(au32);
3435 PUSH32(utf8, cur, &au32);
3438 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3439 case 'V' | TYPE_IS_SHRIEKING:
3445 au32 = SvUV(fromstr);
3449 PUSH32(utf8, cur, &au32);
3452 case 'L' | TYPE_IS_SHRIEKING:
3453 #if LONGSIZE != SIZE32
3455 unsigned long aulong;
3457 aulong = SvUV(fromstr);
3458 DO_BO_PACK(aulong, l);
3459 PUSH_VAR(utf8, cur, aulong);
3469 au32 = SvUV(fromstr);
3470 DO_BO_PACK(au32, 32);
3471 PUSH32(utf8, cur, &au32);
3474 case 'l' | TYPE_IS_SHRIEKING:
3475 #if LONGSIZE != SIZE32
3479 along = SvIV(fromstr);
3480 DO_BO_PACK(along, l);
3481 PUSH_VAR(utf8, cur, along);
3491 ai32 = SvIV(fromstr);
3492 DO_BO_PACK(ai32, 32);
3493 PUSH32(utf8, cur, &ai32);
3501 auquad = (Uquad_t) SvUV(fromstr);
3502 DO_BO_PACK(auquad, 64);
3503 PUSH_VAR(utf8, cur, auquad);
3510 aquad = (Quad_t)SvIV(fromstr);
3511 DO_BO_PACK(aquad, 64);
3512 PUSH_VAR(utf8, cur, aquad);
3515 #endif /* HAS_QUAD */
3517 len = 1; /* assume SV is correct length */
3518 GROWING(utf8, cat, start, cur, sizeof(char *));
3525 SvGETMAGIC(fromstr);
3526 if (!SvOK(fromstr)) aptr = NULL;
3528 /* XXX better yet, could spirit away the string to
3529 * a safe spot and hang on to it until the result
3530 * of pack() (and all copies of the result) are
3533 if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
3534 !SvREADONLY(fromstr)))) {
3535 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3536 "Attempt to pack pointer to temporary value");
3538 if (SvPOK(fromstr) || SvNIOK(fromstr))
3539 aptr = SvPV_nomg_const_nolen(fromstr);
3541 aptr = SvPV_force_flags_nolen(fromstr, 0);
3543 DO_BO_PACK_PC(aptr);
3544 PUSH_VAR(utf8, cur, aptr);
3548 const char *aptr, *aend;
3552 if (len <= 2) len = 45;
3553 else len = len / 3 * 3;
3555 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3556 "Field too wide in 'u' format in pack");
3559 aptr = SvPV_const(fromstr, fromlen);
3560 from_utf8 = DO_UTF8(fromstr);
3562 aend = aptr + fromlen;
3563 fromlen = sv_len_utf8_nomg(fromstr);
3564 } else aend = NULL; /* Unused, but keep compilers happy */
3565 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
3566 while (fromlen > 0) {
3569 U8 hunk[1+63/3*4+1];
3571 if ((I32)fromlen > len)
3577 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3578 'u' | TYPE_IS_PACK)) {
3580 SvCUR_set(cat, cur - start);
3581 Perl_croak(aTHX_ "panic: string is shorter than advertised, "
3582 "aptr=%p, aend=%p, buffer=%p, todo=%ld",
3583 aptr, aend, buffer, (long) todo);
3585 end = doencodes(hunk, buffer, todo);
3587 end = doencodes(hunk, aptr, todo);
3590 PUSH_BYTES(utf8, cur, hunk, end-hunk);
3597 SvCUR_set(cat, cur - start);
3599 *symptr = lookahead;
3608 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
3611 SV *pat_sv = *++MARK;
3612 const char *pat = SvPV_const(pat_sv, fromlen);
3613 const char *patend = pat + fromlen;
3619 packlist(cat, pat, patend, MARK, SP + 1);
3629 * c-indentation-style: bsd
3631 * indent-tabs-mode: nil
3634 * ex: set ts=8 sts=4 sw=4 et: