3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
4 * 2000, 2001, 2002, 2003, 2004, 2005, 2006, by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
19 /* This file contains pp ("push/pop") functions that
20 * execute the opcodes that make up a perl program. A typical pp function
21 * expects to find its arguments on the stack, and usually pushes its
22 * results onto the stack, hence the 'pp' terminology. Each OP structure
23 * contains a pointer to the relevant pp_foo() function.
25 * This particular file just contains pp_pack() and pp_unpack(). See the
26 * other pp*.c files for the rest of the pp_ functions.
31 #define PERL_IN_PP_PACK_C
34 /* Types used by pack/unpack */
36 e_no_len, /* no length */
37 e_number, /* number, [] */
41 typedef struct tempsym {
42 const char* patptr; /* current template char */
43 const char* patend; /* one after last char */
44 const char* grpbeg; /* 1st char of ()-group */
45 const char* grpend; /* end of ()-group */
46 I32 code; /* template code (!<>) */
47 I32 length; /* length/repeat count */
48 howlen_t howlen; /* how length is given */
49 int level; /* () nesting level */
50 U32 flags; /* /=4, comma=2, pack=1 */
51 /* and group modifiers */
52 STRLEN strbeg; /* offset of group start */
53 struct tempsym *previous; /* previous group */
56 #define TEMPSYM_INIT(symptr, p, e, f) \
58 (symptr)->patptr = (p); \
59 (symptr)->patend = (e); \
60 (symptr)->grpbeg = NULL; \
61 (symptr)->grpend = NULL; \
62 (symptr)->grpend = NULL; \
64 (symptr)->length = 0; \
65 (symptr)->howlen = e_no_len; \
66 (symptr)->level = 0; \
67 (symptr)->flags = (f); \
68 (symptr)->strbeg = 0; \
69 (symptr)->previous = NULL; \
73 # define PERL_PACK_CAN_BYTEORDER
74 # define PERL_PACK_CAN_SHRIEKSIGN
79 #define PERL_PACK_CAN_W
80 #define PERL_PACK_CAN_DOT
82 #define PERL_PACK_CHAR_TEMPLATE_BUG_COMPATIBILITY
83 #define PERL_PACK_REVERSE_UTF8_MODE_COMPATIBILITY
84 #define PERL_PACK_NEVER_UPGRADE_COMPATIBILITY
85 #define PERL_PACK_POSITIONS_ARE_BYTES_COMPATIBILITY
87 /* Maximum number of bytes to which a byte can grow due to upgrade */
91 * Offset for integer pack/unpack.
93 * On architectures where I16 and I32 aren't really 16 and 32 bits,
94 * which for now are all Crays, pack and unpack have to play games.
98 * These values are required for portability of pack() output.
99 * If they're not right on your machine, then pack() and unpack()
100 * wouldn't work right anyway; you'll need to apply the Cray hack.
101 * (I'd like to check them with #if, but you can't use sizeof() in
102 * the preprocessor.) --???
105 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
106 defines are now in config.h. --Andy Dougherty April 1998
111 /* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
114 #if U16SIZE > SIZE16 || U32SIZE > SIZE32
115 # if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
116 # define OFF16(p) ((char*)(p))
117 # define OFF32(p) ((char*)(p))
119 # if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
120 # define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
121 # define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
123 ++++ bad cray byte order
127 # define OFF16(p) ((char *) (p))
128 # define OFF32(p) ((char *) (p))
131 /* Only to be used inside a loop (see the break) */
132 #define SHIFT16(utf8, s, strend, p, datumtype) STMT_START { \
134 if (!uni_to_bytes(aTHX_ &(s), strend, OFF16(p), SIZE16, datumtype)) break; \
136 Copy(s, OFF16(p), SIZE16, char); \
141 /* Only to be used inside a loop (see the break) */
142 #define SHIFT32(utf8, s, strend, p, datumtype) STMT_START { \
144 if (!uni_to_bytes(aTHX_ &(s), strend, OFF32(p), SIZE32, datumtype)) break; \
146 Copy(s, OFF32(p), SIZE32, char); \
151 #define PUSH16(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF16(p), SIZE16)
152 #define PUSH32(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF32(p), SIZE32)
154 /* Only to be used inside a loop (see the break) */
155 #define SHIFT_VAR(utf8, s, strend, var, datumtype) \
158 if (!uni_to_bytes(aTHX_ &s, strend, \
159 (char *) &var, sizeof(var), datumtype)) break;\
161 Copy(s, (char *) &var, sizeof(var), char); \
166 #define PUSH_VAR(utf8, aptr, var) \
167 PUSH_BYTES(utf8, aptr, &(var), sizeof(var))
169 /* Avoid stack overflow due to pathological templates. 100 should be plenty. */
170 #define MAX_SUB_TEMPLATE_LEVEL 100
172 /* flags (note that type modifiers can also be used as flags!) */
173 #define FLAG_WAS_UTF8 0x40
174 #define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
175 #define FLAG_UNPACK_ONLY_ONE 0x10
176 #define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
177 #define FLAG_SLASH 0x04
178 #define FLAG_COMMA 0x02
179 #define FLAG_PACK 0x01
182 S_mul128(pTHX_ SV *sv, U8 m)
185 char *s = SvPV(sv, len);
188 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
189 SV * const tmpNew = newSVpvs("0000000000");
191 sv_catsv(tmpNew, sv);
192 SvREFCNT_dec(sv); /* free old sv */
197 while (!*t) /* trailing '\0'? */
200 const U32 i = ((*t - '0') << 7) + m;
201 *(t--) = '0' + (char)(i % 10);
207 /* Explosives and implosives. */
209 #if 'I' == 73 && 'J' == 74
210 /* On an ASCII/ISO kind of system */
211 #define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
214 Some other sort of character set - use memchr() so we don't match
217 #define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
221 #define TYPE_IS_SHRIEKING 0x100
222 #define TYPE_IS_BIG_ENDIAN 0x200
223 #define TYPE_IS_LITTLE_ENDIAN 0x400
224 #define TYPE_IS_PACK 0x800
225 #define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
226 #define TYPE_MODIFIERS(t) ((t) & ~0xFF)
227 #define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
229 #ifdef PERL_PACK_CAN_SHRIEKSIGN
230 # define SHRIEKING_ALLOWED_TYPES "sSiIlLxXnNvV@."
232 # define SHRIEKING_ALLOWED_TYPES "sSiIlLxX"
235 #ifndef PERL_PACK_CAN_BYTEORDER
236 /* Put "can't" first because it is shorter */
237 # define TYPE_ENDIANNESS(t) 0
238 # define TYPE_NO_ENDIANNESS(t) (t)
240 # define ENDIANNESS_ALLOWED_TYPES ""
242 # define DO_BO_UNPACK(var, type)
243 # define DO_BO_PACK(var, type)
244 # define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast)
245 # define DO_BO_PACK_PTR(var, type, pre_cast, post_cast)
246 # define DO_BO_UNPACK_N(var, type)
247 # define DO_BO_PACK_N(var, type)
248 # define DO_BO_UNPACK_P(var)
249 # define DO_BO_PACK_P(var)
250 # define DO_BO_UNPACK_PC(var)
251 # define DO_BO_PACK_PC(var)
253 #else /* PERL_PACK_CAN_BYTEORDER */
255 # define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
256 # define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
258 # define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
260 # define DO_BO_UNPACK(var, type) \
262 switch (TYPE_ENDIANNESS(datumtype)) { \
263 case TYPE_IS_BIG_ENDIAN: var = my_betoh ## type (var); break; \
264 case TYPE_IS_LITTLE_ENDIAN: var = my_letoh ## type (var); break; \
269 # define DO_BO_PACK(var, type) \
271 switch (TYPE_ENDIANNESS(datumtype)) { \
272 case TYPE_IS_BIG_ENDIAN: var = my_htobe ## type (var); break; \
273 case TYPE_IS_LITTLE_ENDIAN: var = my_htole ## type (var); break; \
278 # define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast) \
280 switch (TYPE_ENDIANNESS(datumtype)) { \
281 case TYPE_IS_BIG_ENDIAN: \
282 var = (post_cast*) my_betoh ## type ((pre_cast) var); \
284 case TYPE_IS_LITTLE_ENDIAN: \
285 var = (post_cast *) my_letoh ## type ((pre_cast) var); \
292 # define DO_BO_PACK_PTR(var, type, pre_cast, post_cast) \
294 switch (TYPE_ENDIANNESS(datumtype)) { \
295 case TYPE_IS_BIG_ENDIAN: \
296 var = (post_cast *) my_htobe ## type ((pre_cast) var); \
298 case TYPE_IS_LITTLE_ENDIAN: \
299 var = (post_cast *) my_htole ## type ((pre_cast) var); \
306 # define BO_CANT_DOIT(action, type) \
308 switch (TYPE_ENDIANNESS(datumtype)) { \
309 case TYPE_IS_BIG_ENDIAN: \
310 Perl_croak(aTHX_ "Can't %s big-endian %ss on this " \
311 "platform", #action, #type); \
313 case TYPE_IS_LITTLE_ENDIAN: \
314 Perl_croak(aTHX_ "Can't %s little-endian %ss on this " \
315 "platform", #action, #type); \
322 # if PTRSIZE == INTSIZE
323 # define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, i, int, void)
324 # define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, i, int, void)
325 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, i, int, char)
326 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, i, int, char)
327 # elif PTRSIZE == LONGSIZE
328 # define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, long, void)
329 # define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, long, void)
330 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, long, char)
331 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, long, char)
332 # elif PTRSIZE == IVSIZE
333 # define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, IV, void)
334 # define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, IV, void)
335 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, IV, char)
336 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, IV, char)
338 # define DO_BO_UNPACK_P(var) BO_CANT_DOIT(unpack, pointer)
339 # define DO_BO_PACK_P(var) BO_CANT_DOIT(pack, pointer)
340 # define DO_BO_UNPACK_PC(var) BO_CANT_DOIT(unpack, pointer)
341 # define DO_BO_PACK_PC(var) BO_CANT_DOIT(pack, pointer)
344 # if defined(my_htolen) && defined(my_letohn) && \
345 defined(my_htoben) && defined(my_betohn)
346 # define DO_BO_UNPACK_N(var, type) \
348 switch (TYPE_ENDIANNESS(datumtype)) { \
349 case TYPE_IS_BIG_ENDIAN: my_betohn(&var, sizeof(type)); break;\
350 case TYPE_IS_LITTLE_ENDIAN: my_letohn(&var, sizeof(type)); break;\
355 # define DO_BO_PACK_N(var, type) \
357 switch (TYPE_ENDIANNESS(datumtype)) { \
358 case TYPE_IS_BIG_ENDIAN: my_htoben(&var, sizeof(type)); break;\
359 case TYPE_IS_LITTLE_ENDIAN: my_htolen(&var, sizeof(type)); break;\
364 # define DO_BO_UNPACK_N(var, type) BO_CANT_DOIT(unpack, type)
365 # define DO_BO_PACK_N(var, type) BO_CANT_DOIT(pack, type)
368 #endif /* PERL_PACK_CAN_BYTEORDER */
370 #define PACK_SIZE_CANNOT_CSUM 0x80
371 #define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
372 #define PACK_SIZE_MASK 0x3F
374 /* These tables are regenerated by genpacksizetables.pl (and then hand pasted
375 in). You're unlikely ever to need to regenerate them. */
377 #if TYPE_IS_SHRIEKING != 0x100
378 ++++shriek offset should be 256
381 typedef U8 packprops_t;
384 STATIC const packprops_t packprops[512] = {
386 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
387 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
388 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
389 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
391 /* C */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
392 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
393 /* D */ LONG_DOUBLESIZE,
400 /* I */ sizeof(unsigned int),
407 #if defined(HAS_QUAD)
408 /* Q */ sizeof(Uquad_t),
415 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
417 #if defined(PERL_PACK_CAN_W)
418 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
422 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
423 /* c */ sizeof(char),
424 /* d */ sizeof(double),
426 /* f */ sizeof(float),
435 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
436 #if defined(HAS_QUAD)
437 /* q */ sizeof(Quad_t),
445 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
446 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
447 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
448 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
449 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
450 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
451 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
452 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
453 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
454 0, 0, 0, 0, 0, 0, 0, 0,
456 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
457 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
458 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
459 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
460 0, 0, 0, 0, 0, 0, 0, 0, 0,
461 /* I */ sizeof(unsigned int),
463 /* L */ sizeof(unsigned long),
465 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
471 /* S */ sizeof(unsigned short),
473 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
478 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
482 /* l */ sizeof(long),
484 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
490 /* s */ sizeof(short),
492 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
497 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
498 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
499 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
500 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
501 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
502 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
503 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
504 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
505 0, 0, 0, 0, 0, 0, 0, 0, 0
508 /* EBCDIC (or bust) */
509 STATIC const packprops_t packprops[512] = {
511 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
512 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
513 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
514 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
515 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
516 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
517 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
518 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
520 /* c */ sizeof(char),
521 /* d */ sizeof(double),
523 /* f */ sizeof(float),
533 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
534 #if defined(HAS_QUAD)
535 /* q */ sizeof(Quad_t),
539 0, 0, 0, 0, 0, 0, 0, 0, 0,
543 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
544 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
545 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
546 /* C */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
547 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
548 /* D */ LONG_DOUBLESIZE,
555 /* I */ sizeof(unsigned int),
563 #if defined(HAS_QUAD)
564 /* Q */ sizeof(Uquad_t),
568 0, 0, 0, 0, 0, 0, 0, 0, 0,
571 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
573 #if defined(PERL_PACK_CAN_W)
574 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
578 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
579 0, 0, 0, 0, 0, 0, 0, 0, 0,
581 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
582 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
583 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
584 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
585 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
586 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
587 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
588 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
589 0, 0, 0, 0, 0, 0, 0, 0, 0,
591 0, 0, 0, 0, 0, 0, 0, 0, 0,
592 /* l */ sizeof(long),
594 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
599 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
600 /* s */ sizeof(short),
602 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
607 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
608 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
610 /* I */ sizeof(unsigned int),
611 0, 0, 0, 0, 0, 0, 0, 0, 0,
612 /* L */ sizeof(unsigned long),
614 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
619 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
620 /* S */ sizeof(unsigned short),
622 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
627 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
628 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
633 uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
636 UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
637 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
638 /* We try to process malformed UTF-8 as much as possible (preferrably with
639 warnings), but these two mean we make no progress in the string and
640 might enter an infinite loop */
641 if (retlen == (STRLEN) -1 || retlen == 0)
642 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
643 (int) TYPE_NO_MODIFIERS(datumtype));
645 if (ckWARN(WARN_UNPACK))
646 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
647 "Character in '%c' format wrapped in unpack",
648 (int) TYPE_NO_MODIFIERS(datumtype));
655 #define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
656 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
660 uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
664 const char *from = *s;
666 const U32 flags = ckWARN(WARN_UTF8) ?
667 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
668 for (;buf_len > 0; buf_len--) {
669 if (from >= end) return FALSE;
670 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
671 if (retlen == (STRLEN) -1 || retlen == 0) {
672 from += UTF8SKIP(from);
674 } else from += retlen;
681 /* We have enough characters for the buffer. Did we have problems ? */
684 /* Rewalk the string fragment while warning */
686 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
687 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
688 if (ptr >= end) break;
689 utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
691 if (from > end) from = end;
693 if ((bad & 2) && ckWARN(WARN_UNPACK))
694 Perl_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
695 WARN_PACK : WARN_UNPACK),
696 "Character(s) in '%c' format wrapped in %s",
697 (int) TYPE_NO_MODIFIERS(datumtype),
698 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
705 next_uni_uu(pTHX_ const char **s, const char *end, I32 *out)
708 const UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
709 if (val >= 0x100 || !ISUUCHAR(val) ||
710 retlen == (STRLEN) -1 || retlen == 0) {
714 *out = PL_uudmap[val] & 077;
720 S_bytes_to_uni(const U8 *start, STRLEN len, char *dest) {
721 const U8 * const end = start + len;
723 while (start < end) {
724 const UV uv = NATIVE_TO_ASCII(*start);
725 if (UNI_IS_INVARIANT(uv))
726 *dest++ = (char)(U8)UTF_TO_NATIVE(uv);
728 *dest++ = (char)(U8)UTF8_EIGHT_BIT_HI(uv);
729 *dest++ = (char)(U8)UTF8_EIGHT_BIT_LO(uv);
736 #define PUSH_BYTES(utf8, cur, buf, len) \
739 (cur) = bytes_to_uni((U8 *) buf, len, (cur)); \
741 Copy(buf, cur, len, char); \
746 #define GROWING(utf8, cat, start, cur, in_len) \
748 STRLEN glen = (in_len); \
749 if (utf8) glen *= UTF8_EXPAND; \
750 if ((cur) + glen >= (start) + SvLEN(cat)) { \
751 (start) = sv_exp_grow(cat, glen); \
752 (cur) = (start) + SvCUR(cat); \
756 #define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
758 const STRLEN glen = (in_len); \
760 if (utf8) gl *= UTF8_EXPAND; \
761 if ((cur) + gl >= (start) + SvLEN(cat)) { \
763 SvCUR(cat) = (cur) - (start); \
764 (start) = sv_exp_grow(cat, gl); \
765 (cur) = (start) + SvCUR(cat); \
767 PUSH_BYTES(utf8, cur, buf, glen); \
770 #define PUSH_BYTE(utf8, s, byte) \
773 const U8 au8 = (byte); \
774 (s) = bytes_to_uni(&au8, 1, (s)); \
775 } else *(U8 *)(s)++ = (byte); \
778 /* Only to be used inside a loop (see the break) */
779 #define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
782 if (str >= end) break; \
783 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
784 if (retlen == (STRLEN) -1 || retlen == 0) { \
786 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
791 static const char *_action( const tempsym_t* symptr )
793 return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
796 /* Returns the sizeof() struct described by pat */
798 S_measure_struct(pTHX_ tempsym_t* symptr)
802 while (next_symbol(symptr)) {
806 switch (symptr->howlen) {
808 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
812 /* e_no_len and e_number */
813 len = symptr->length;
817 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
820 /* endianness doesn't influence the size of a type */
821 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
823 Perl_croak(aTHX_ "Invalid type '%c' in %s",
824 (int)TYPE_NO_MODIFIERS(symptr->code),
826 #ifdef PERL_PACK_CAN_SHRIEKSIGN
827 # ifdef PERL_PACK_CAN_DOT
828 case '.' | TYPE_IS_SHRIEKING:
830 case '@' | TYPE_IS_SHRIEKING:
833 #ifdef PERL_PACK_CAN_DOT
837 case 'U': /* XXXX Is it correct? */
840 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
841 (int) TYPE_NO_MODIFIERS(symptr->code),
848 tempsym_t savsym = *symptr;
849 symptr->patptr = savsym.grpbeg;
850 symptr->patend = savsym.grpend;
851 /* XXXX Theoretically, we need to measure many times at
852 different positions, since the subexpression may contain
853 alignment commands, but be not of aligned length.
854 Need to detect this and croak(). */
855 size = measure_struct(symptr);
859 case 'X' | TYPE_IS_SHRIEKING:
860 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
862 if (!len) /* Avoid division by 0 */
864 len = total % len; /* Assumed: the start is aligned. */
869 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
871 case 'x' | TYPE_IS_SHRIEKING:
872 if (!len) /* Avoid division by 0 */
874 star = total % len; /* Assumed: the start is aligned. */
875 if (star) /* Other portable ways? */
899 size = sizeof(char*);
909 /* locate matching closing parenthesis or bracket
910 * returns char pointer to char after match, or NULL
913 S_group_end(pTHX_ register const char *patptr, register const char *patend, char ender)
915 while (patptr < patend) {
916 const char c = *patptr++;
923 while (patptr < patend && *patptr != '\n')
927 patptr = group_end(patptr, patend, ')') + 1;
929 patptr = group_end(patptr, patend, ']') + 1;
931 Perl_croak(aTHX_ "No group ending character '%c' found in template",
937 /* Convert unsigned decimal number to binary.
938 * Expects a pointer to the first digit and address of length variable
939 * Advances char pointer to 1st non-digit char and returns number
942 S_get_num(pTHX_ register const char *patptr, I32 *lenptr )
944 I32 len = *patptr++ - '0';
945 while (isDIGIT(*patptr)) {
946 if (len >= 0x7FFFFFFF/10)
947 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
948 len = (len * 10) + (*patptr++ - '0');
954 /* The marvellous template parsing routine: Using state stored in *symptr,
955 * locates next template code and count
958 S_next_symbol(pTHX_ tempsym_t* symptr )
960 const char* patptr = symptr->patptr;
961 const char* const patend = symptr->patend;
962 const char *allowed = "";
964 symptr->flags &= ~FLAG_SLASH;
966 while (patptr < patend) {
967 if (isSPACE(*patptr))
969 else if (*patptr == '#') {
971 while (patptr < patend && *patptr != '\n')
976 /* We should have found a template code */
977 I32 code = *patptr++ & 0xFF;
978 U32 inherited_modifiers = 0;
980 if (code == ','){ /* grandfather in commas but with a warning */
981 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
982 symptr->flags |= FLAG_COMMA;
983 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
984 "Invalid type ',' in %s", _action( symptr ) );
989 /* for '(', skip to ')' */
991 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
992 Perl_croak(aTHX_ "()-group starts with a count in %s",
994 symptr->grpbeg = (char *) patptr;
996 = 1 + ( symptr->grpend = (char *)group_end(patptr, patend, ')') );
997 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
998 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
1002 /* look for group modifiers to inherit */
1003 if (TYPE_ENDIANNESS(symptr->flags)) {
1004 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
1005 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
1008 /* look for modifiers */
1009 while (patptr < patend) {
1013 modifier = TYPE_IS_SHRIEKING;
1014 allowed = SHRIEKING_ALLOWED_TYPES;
1016 #ifdef PERL_PACK_CAN_BYTEORDER
1018 modifier = TYPE_IS_BIG_ENDIAN;
1019 allowed = ENDIANNESS_ALLOWED_TYPES;
1022 modifier = TYPE_IS_LITTLE_ENDIAN;
1023 allowed = ENDIANNESS_ALLOWED_TYPES;
1025 #endif /* PERL_PACK_CAN_BYTEORDER */
1035 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
1036 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
1037 allowed, _action( symptr ) );
1039 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
1040 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
1041 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
1042 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
1043 TYPE_ENDIANNESS_MASK)
1044 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
1045 *patptr, _action( symptr ) );
1047 if ((code & modifier) && ckWARN(WARN_UNPACK)) {
1048 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
1049 "Duplicate modifier '%c' after '%c' in %s",
1050 *patptr, (int) TYPE_NO_MODIFIERS(code),
1051 _action( symptr ) );
1058 /* inherit modifiers */
1059 code |= inherited_modifiers;
1061 /* look for count and/or / */
1062 if (patptr < patend) {
1063 if (isDIGIT(*patptr)) {
1064 patptr = get_num( patptr, &symptr->length );
1065 symptr->howlen = e_number;
1067 } else if (*patptr == '*') {
1069 symptr->howlen = e_star;
1071 } else if (*patptr == '[') {
1072 const char* lenptr = ++patptr;
1073 symptr->howlen = e_number;
1074 patptr = group_end( patptr, patend, ']' ) + 1;
1075 /* what kind of [] is it? */
1076 if (isDIGIT(*lenptr)) {
1077 lenptr = get_num( lenptr, &symptr->length );
1078 if( *lenptr != ']' )
1079 Perl_croak(aTHX_ "Malformed integer in [] in %s",
1080 _action( symptr ) );
1082 tempsym_t savsym = *symptr;
1083 symptr->patend = (char *) patptr-1;
1084 symptr->patptr = (char *) lenptr;
1085 savsym.length = measure_struct(symptr);
1089 symptr->howlen = e_no_len;
1094 while (patptr < patend) {
1095 if (isSPACE(*patptr))
1097 else if (*patptr == '#') {
1099 while (patptr < patend && *patptr != '\n')
1101 if (patptr < patend)
1104 if (*patptr == '/') {
1105 symptr->flags |= FLAG_SLASH;
1107 if (patptr < patend &&
1108 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
1109 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
1110 _action( symptr ) );
1116 /* at end - no count, no / */
1117 symptr->howlen = e_no_len;
1121 symptr->code = code;
1122 symptr->patptr = (char *) patptr;
1126 symptr->patptr = (char *) patptr;
1130 #ifndef PERL_PACK_NEVER_UPGRADE_COMPATIBILITY
1132 There is no way to cleanly handle the case where we should process the
1133 string per byte in its upgraded form while it's really in downgraded form
1134 (e.g. estimates like strend-s as an upper bound for the number of
1135 characters left wouldn't work). So if we foresee the need of this
1136 (pattern starts with U or contains U0), we want to work on the encoded
1137 version of the string. Users are advised to upgrade their pack string
1138 themselves if they need to do a lot of unpacks like this on it
1140 /* XXX These can be const */
1142 need_utf8(const char *pat, const char *patend)
1145 while (pat < patend) {
1146 if (pat[0] == '#') {
1148 pat = (const char *) memchr(pat, '\n', patend-pat);
1149 if (!pat) return FALSE;
1150 } else if (pat[0] == 'U') {
1151 if (first || pat[1] == '0') return TRUE;
1152 } else first = FALSE;
1159 first_symbol(const char *pat, const char *patend) {
1160 while (pat < patend) {
1161 if (pat[0] != '#') return pat[0];
1163 pat = (const char *) memchr(pat, '\n', patend-pat);
1172 =for apidoc unpackstring
1174 The engine implementing unpack() Perl function. C<unpackstring> puts the
1175 extracted list items on the stack and returns the number of elements.
1176 Issue C<PUTBACK> before and C<SPAGAIN> after the call to this function.
1181 Perl_unpackstring(pTHX_ char *pat, char *patend, char *s, char *strend,
1186 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
1187 #ifndef PERL_PACK_NEVER_UPGRADE_COMPATIBILITY
1188 else if (need_utf8(pat, patend)) {
1189 /* We probably should try to avoid this in case a scalar context call
1190 wouldn't get to the "U0" */
1191 STRLEN len = strend - s;
1192 s = (char *) bytes_to_utf8((U8 *) s, &len);
1195 flags |= FLAG_DO_UTF8;
1198 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
1199 flags |= FLAG_PARSE_UTF8;
1201 if (flags & FLAG_DO_UTF8)
1202 flags |= FLAG_PARSE_UTF8;
1205 TEMPSYM_INIT(&sym, pat, patend, flags);
1207 return unpack_rec(&sym, s, s, strend, NULL );
1212 S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
1216 const I32 start_sp_offset = SP - PL_stack_base;
1222 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
1223 bool beyond = FALSE;
1224 bool explicit_length;
1225 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
1226 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
1227 symptr->strbeg = s - strbeg;
1229 while (next_symbol(symptr)) {
1232 I32 datumtype = symptr->code;
1233 /* do first one only unless in list context
1234 / is implemented by unpacking the count, then popping it from the
1235 stack, so must check that we're not in the middle of a / */
1236 if ( unpack_only_one
1237 && (SP - PL_stack_base == start_sp_offset + 1)
1238 && (datumtype != '/') ) /* XXX can this be omitted */
1241 switch (howlen = symptr->howlen) {
1243 len = strend - strbeg; /* long enough */
1246 /* e_no_len and e_number */
1247 len = symptr->length;
1251 explicit_length = TRUE;
1253 beyond = s >= strend;
1255 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
1257 /* props nonzero means we can process this letter. */
1258 const long size = props & PACK_SIZE_MASK;
1259 const long howmany = (strend - s) / size;
1263 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
1264 if (len && unpack_only_one) len = 1;
1270 switch(TYPE_NO_ENDIANNESS(datumtype)) {
1272 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
1275 if (howlen == e_no_len)
1276 len = 16; /* len is not specified */
1284 tempsym_t savsym = *symptr;
1285 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
1286 symptr->flags |= group_modifiers;
1287 symptr->patend = savsym.grpend;
1288 symptr->previous = &savsym;
1292 symptr->patptr = savsym.grpbeg;
1293 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
1294 else symptr->flags &= ~FLAG_PARSE_UTF8;
1295 unpack_rec(symptr, s, strbeg, strend, &s);
1296 if (s == strend && savsym.howlen == e_star)
1297 break; /* No way to continue */
1300 savsym.flags = symptr->flags & ~group_modifiers;
1304 #ifdef PERL_PACK_CAN_DOT
1305 # ifdef PERL_PACK_CAN_SHRIEKSIGN
1306 case '.' | TYPE_IS_SHRIEKING:
1311 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1312 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
1313 #else /* PERL_PACK_CAN_SHRIEKSIGN */
1314 const bool u8 = utf8;
1316 if (howlen == e_star) from = strbeg;
1317 else if (len <= 0) from = s;
1319 tempsym_t *group = symptr;
1321 while (--len && group) group = group->previous;
1322 from = group ? strbeg + group->strbeg : strbeg;
1325 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
1326 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
1327 XPUSHs(sv_2mortal(sv));
1331 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1332 case '@' | TYPE_IS_SHRIEKING:
1335 s = strbeg + symptr->strbeg;
1336 #ifndef PERL_PACK_POSITIONS_ARE_BYTES_COMPATIBILITY
1337 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1338 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
1339 #else /* PERL_PACK_CAN_SHRIEKSIGN */
1345 Perl_croak(aTHX_ "'@' outside of string in unpack");
1350 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
1355 Perl_croak(aTHX_ "'@' outside of string in unpack");
1359 case 'X' | TYPE_IS_SHRIEKING:
1360 if (!len) /* Avoid division by 0 */
1362 #ifndef PERL_PACK_POSITIONS_ARE_BYTES_COMPATIBILITY
1364 const char *hop, *last;
1366 hop = last = strbeg;
1368 hop += UTF8SKIP(hop);
1375 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1380 len = (s - strbeg) % len;
1383 #ifndef PERL_PACK_POSITIONS_ARE_BYTES_COMPATIBILITY
1387 Perl_croak(aTHX_ "'X' outside of string in unpack");
1388 while (--s, UTF8_IS_CONTINUATION(*s)) {
1390 Perl_croak(aTHX_ "'X' outside of string in unpack");
1397 if (len > s - strbeg)
1398 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1402 case 'x' | TYPE_IS_SHRIEKING: {
1404 if (!len) /* Avoid division by 0 */
1406 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1407 else ai32 = (s - strbeg) % len;
1408 if (ai32 == 0) break;
1416 Perl_croak(aTHX_ "'x' outside of string in unpack");
1421 if (len > strend - s)
1422 Perl_croak(aTHX_ "'x' outside of string in unpack");
1427 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1433 /* Preliminary length estimate is assumed done in 'W' */
1434 if (len > strend - s) len = strend - s;
1437 #ifndef PERL_PACK_CHAR_TEMPLATE_BUG_COMPATIBILITY
1441 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1442 if (hop >= strend) {
1444 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1449 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1453 if (len > strend - s)
1456 if (datumtype == 'Z') {
1457 /* 'Z' strips stuff after first null */
1458 const char *ptr, *end;
1460 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
1461 sv = newSVpvn(s, ptr-s);
1462 if (howlen == e_star) /* exact for 'Z*' */
1463 len = ptr-s + (ptr != strend ? 1 : 0);
1464 } else if (datumtype == 'A') {
1465 /* 'A' strips both nulls and spaces */
1467 #ifndef PERL_PACK_CHAR_TEMPLATE_BUG_COMPATIBILITY
1468 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1469 for (ptr = s+len-1; ptr >= s; ptr--)
1470 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
1471 !is_utf8_space((U8 *) ptr)) break;
1472 if (ptr >= s) ptr += UTF8SKIP(ptr);
1475 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1479 for (ptr = s+len-1; ptr >= s; ptr--)
1480 if (*ptr != 0 && !isSPACE(*ptr)) break;
1483 sv = newSVpvn(s, ptr-s);
1484 } else sv = newSVpvn(s, len);
1486 #ifndef PERL_PACK_CHAR_TEMPLATE_BUG_COMPATIBILITY
1489 /* Undo any upgrade done due to need_utf8() */
1490 if (!(symptr->flags & FLAG_WAS_UTF8))
1491 sv_utf8_downgrade(sv, 0);
1494 XPUSHs(sv_2mortal(sv));
1500 if (howlen == e_star || len > (strend - s) * 8)
1501 len = (strend - s) * 8;
1505 Newxz(PL_bitcount, 256, char);
1506 for (bits = 1; bits < 256; bits++) {
1507 if (bits & 1) PL_bitcount[bits]++;
1508 if (bits & 2) PL_bitcount[bits]++;
1509 if (bits & 4) PL_bitcount[bits]++;
1510 if (bits & 8) PL_bitcount[bits]++;
1511 if (bits & 16) PL_bitcount[bits]++;
1512 if (bits & 32) PL_bitcount[bits]++;
1513 if (bits & 64) PL_bitcount[bits]++;
1514 if (bits & 128) PL_bitcount[bits]++;
1517 #ifndef PERL_PACK_CHAR_TEMPLATE_BUG_COMPATIBILITY
1519 while (len >= 8 && s < strend) {
1520 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
1526 cuv += PL_bitcount[*(U8 *)s++];
1529 if (len && s < strend) {
1531 #ifdef PERL_PACK_CHAR_TEMPLATE_BUG_COMPATIBILITY
1532 bits = SHIFT_BYTE(0, s, strend, datumtype);
1534 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1536 if (datumtype == 'b')
1538 if (bits & 1) cuv++;
1543 if (bits & 0x80) cuv++;
1550 sv = sv_2mortal(newSV(len ? len : 1));
1553 if (datumtype == 'b') {
1555 const I32 ai32 = len;
1556 for (len = 0; len < ai32; len++) {
1557 if (len & 7) bits >>= 1;
1558 #ifndef PERL_PACK_CHAR_TEMPLATE_BUG_COMPATIBILITY
1560 if (s >= strend) break;
1561 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1564 else bits = *(U8 *) s++;
1565 *str++ = bits & 1 ? '1' : '0';
1569 const I32 ai32 = len;
1570 for (len = 0; len < ai32; len++) {
1571 if (len & 7) bits <<= 1;
1572 #ifndef PERL_PACK_CHAR_TEMPLATE_BUG_COMPATIBILITY
1574 if (s >= strend) break;
1575 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1578 else bits = *(U8 *) s++;
1579 *str++ = bits & 0x80 ? '1' : '0';
1583 SvCUR_set(sv, str - SvPVX(sv));
1590 /* Preliminary length estimate, acceptable for utf8 too */
1591 if (howlen == e_star || len > (strend - s) * 2)
1592 len = (strend - s) * 2;
1593 sv = sv_2mortal(newSV(len ? len : 1));
1596 if (datumtype == 'h') {
1599 for (len = 0; len < ai32; len++) {
1600 if (len & 1) bits >>= 4;
1601 #ifndef PERL_PACK_CHAR_TEMPLATE_BUG_COMPATIBILITY
1603 if (s >= strend) break;
1604 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1607 else bits = * (U8 *) s++;
1608 *str++ = PL_hexdigit[bits & 15];
1612 const I32 ai32 = len;
1613 for (len = 0; len < ai32; len++) {
1614 if (len & 1) bits <<= 4;
1615 #ifndef PERL_PACK_CHAR_TEMPLATE_BUG_COMPATIBILITY
1617 if (s >= strend) break;
1618 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1621 else bits = *(U8 *) s++;
1622 *str++ = PL_hexdigit[(bits >> 4) & 15];
1626 SvCUR_set(sv, str - SvPVX(sv));
1632 #ifdef PERL_PACK_CHAR_TEMPLATE_BUG_COMPATIBILITY
1633 int aint = SHIFT_BYTE(0, s, strend, datumtype);
1635 int aint = SHIFT_BYTE(utf8, s, strend, datumtype);
1637 if (aint >= 128) /* fake up signed chars */
1640 PUSHs(sv_2mortal(newSViv((IV)aint)));
1641 else if (checksum > bits_in_uv)
1642 cdouble += (NV)aint;
1648 #ifdef PERL_PACK_CAN_W
1653 if (explicit_length && datumtype == 'C')
1654 #ifdef PERL_PACK_REVERSE_UTF8_MODE_COMPATIBILITY
1655 /* In maint, continue to switch from "character" mode */
1658 /* Switch to "character" mode */
1659 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1663 #ifndef PERL_PACK_CHAR_TEMPLATE_BUG_COMPATIBILITY
1664 if (datumtype == 'C' ?
1665 (symptr->flags & FLAG_DO_UTF8) &&
1666 !(symptr->flags & FLAG_WAS_UTF8) : utf8) {
1667 while (len-- > 0 && s < strend) {
1669 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1670 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1671 if (retlen == (STRLEN) -1 || retlen == 0)
1672 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1675 PUSHs(sv_2mortal(newSVuv((UV) val)));
1676 else if (checksum > bits_in_uv)
1677 cdouble += (NV) val;
1685 const U8 ch = *(U8 *) s++;
1686 PUSHs(sv_2mortal(newSVuv((UV) ch)));
1688 else if (checksum > bits_in_uv)
1689 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1691 while (len-- > 0) cuv += *(U8 *) s++;
1695 if (explicit_length) {
1696 #ifdef PERL_PACK_REVERSE_UTF8_MODE_COMPATIBILITY
1697 /* In maint, switch to "character" mode */
1700 /* Switch to "bytes in UTF-8" mode */
1701 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
1703 /* Should be impossible due to the need_utf8() test */
1704 Perl_croak(aTHX_ "U0 mode on a byte string");
1709 #ifdef PERL_PACK_CHAR_TEMPLATE_BUG_COMPATIBILITY
1713 if (len > strend - s) len = strend - s;
1715 if (len && unpack_only_one) len = 1;
1719 while (len-- > 0 && s < strend) {
1722 #ifndef PERL_PACK_CHAR_TEMPLATE_BUG_COMPATIBILITY
1724 U8 result[UTF8_MAXLEN];
1725 const char *ptr = s;
1727 /* Bug: warns about bad utf8 even if we are short on bytes
1728 and will break out of the loop */
1729 if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1732 len = UTF8SKIP(result);
1733 if (!uni_to_bytes(aTHX_ &ptr, strend,
1734 (char *) &result[1], len-1, 'U')) break;
1735 auv = utf8n_to_uvuni(result, len, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1740 auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1741 if (retlen == (STRLEN) -1 || retlen == 0)
1742 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1746 PUSHs(sv_2mortal(newSVuv((UV) auv)));
1747 else if (checksum > bits_in_uv)
1748 cdouble += (NV) auv;
1753 case 's' | TYPE_IS_SHRIEKING:
1754 #if SHORTSIZE != SIZE16
1757 SHIFT_VAR(utf8, s, strend, ashort, datumtype);
1758 DO_BO_UNPACK(ashort, s);
1760 PUSHs(sv_2mortal(newSViv((IV)ashort)));
1761 else if (checksum > bits_in_uv)
1762 cdouble += (NV)ashort;
1774 #if U16SIZE > SIZE16
1777 SHIFT16(utf8, s, strend, &ai16, datumtype);
1778 DO_BO_UNPACK(ai16, 16);
1779 #if U16SIZE > SIZE16
1784 PUSHs(sv_2mortal(newSViv((IV)ai16)));
1785 else if (checksum > bits_in_uv)
1786 cdouble += (NV)ai16;
1791 case 'S' | TYPE_IS_SHRIEKING:
1792 #if SHORTSIZE != SIZE16
1794 unsigned short aushort;
1795 SHIFT_VAR(utf8, s, strend, aushort, datumtype);
1796 DO_BO_UNPACK(aushort, s);
1798 PUSHs(sv_2mortal(newSVuv((UV) aushort)));
1799 else if (checksum > bits_in_uv)
1800 cdouble += (NV)aushort;
1813 #if U16SIZE > SIZE16
1816 SHIFT16(utf8, s, strend, &au16, datumtype);
1817 DO_BO_UNPACK(au16, 16);
1819 if (datumtype == 'n')
1820 au16 = PerlSock_ntohs(au16);
1823 if (datumtype == 'v')
1827 PUSHs(sv_2mortal(newSVuv((UV)au16)));
1828 else if (checksum > bits_in_uv)
1829 cdouble += (NV) au16;
1834 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1835 case 'v' | TYPE_IS_SHRIEKING:
1836 case 'n' | TYPE_IS_SHRIEKING:
1839 # if U16SIZE > SIZE16
1842 SHIFT16(utf8, s, strend, &ai16, datumtype);
1844 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
1845 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1846 # endif /* HAS_NTOHS */
1848 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
1849 ai16 = (I16) vtohs((U16) ai16);
1850 # endif /* HAS_VTOHS */
1852 PUSHs(sv_2mortal(newSViv((IV)ai16)));
1853 else if (checksum > bits_in_uv)
1854 cdouble += (NV) ai16;
1859 #endif /* PERL_PACK_CAN_SHRIEKSIGN */
1861 case 'i' | TYPE_IS_SHRIEKING:
1864 SHIFT_VAR(utf8, s, strend, aint, datumtype);
1865 DO_BO_UNPACK(aint, i);
1867 PUSHs(sv_2mortal(newSViv((IV)aint)));
1868 else if (checksum > bits_in_uv)
1869 cdouble += (NV)aint;
1875 case 'I' | TYPE_IS_SHRIEKING:
1878 SHIFT_VAR(utf8, s, strend, auint, datumtype);
1879 DO_BO_UNPACK(auint, i);
1881 PUSHs(sv_2mortal(newSVuv((UV)auint)));
1882 else if (checksum > bits_in_uv)
1883 cdouble += (NV)auint;
1891 SHIFT_VAR(utf8, s, strend, aiv, datumtype);
1892 #if IVSIZE == INTSIZE
1893 DO_BO_UNPACK(aiv, i);
1894 #elif IVSIZE == LONGSIZE
1895 DO_BO_UNPACK(aiv, l);
1896 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
1897 DO_BO_UNPACK(aiv, 64);
1899 Perl_croak(aTHX_ "'j' not supported on this platform");
1902 PUSHs(sv_2mortal(newSViv(aiv)));
1903 else if (checksum > bits_in_uv)
1912 SHIFT_VAR(utf8, s, strend, auv, datumtype);
1913 #if IVSIZE == INTSIZE
1914 DO_BO_UNPACK(auv, i);
1915 #elif IVSIZE == LONGSIZE
1916 DO_BO_UNPACK(auv, l);
1917 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
1918 DO_BO_UNPACK(auv, 64);
1920 Perl_croak(aTHX_ "'J' not supported on this platform");
1923 PUSHs(sv_2mortal(newSVuv(auv)));
1924 else if (checksum > bits_in_uv)
1930 case 'l' | TYPE_IS_SHRIEKING:
1931 #if LONGSIZE != SIZE32
1934 SHIFT_VAR(utf8, s, strend, along, datumtype);
1935 DO_BO_UNPACK(along, l);
1937 PUSHs(sv_2mortal(newSViv((IV)along)));
1938 else if (checksum > bits_in_uv)
1939 cdouble += (NV)along;
1950 #if U32SIZE > SIZE32
1953 SHIFT32(utf8, s, strend, &ai32, datumtype);
1954 DO_BO_UNPACK(ai32, 32);
1955 #if U32SIZE > SIZE32
1956 if (ai32 > 2147483647) ai32 -= 4294967296;
1959 PUSHs(sv_2mortal(newSViv((IV)ai32)));
1960 else if (checksum > bits_in_uv)
1961 cdouble += (NV)ai32;
1966 case 'L' | TYPE_IS_SHRIEKING:
1967 #if LONGSIZE != SIZE32
1969 unsigned long aulong;
1970 SHIFT_VAR(utf8, s, strend, aulong, datumtype);
1971 DO_BO_UNPACK(aulong, l);
1973 PUSHs(sv_2mortal(newSVuv((UV)aulong)));
1974 else if (checksum > bits_in_uv)
1975 cdouble += (NV)aulong;
1988 #if U32SIZE > SIZE32
1991 SHIFT32(utf8, s, strend, &au32, datumtype);
1992 DO_BO_UNPACK(au32, 32);
1994 if (datumtype == 'N')
1995 au32 = PerlSock_ntohl(au32);
1998 if (datumtype == 'V')
2002 PUSHs(sv_2mortal(newSVuv((UV)au32)));
2003 else if (checksum > bits_in_uv)
2004 cdouble += (NV)au32;
2009 #ifdef PERL_PACK_CAN_SHRIEKSIGN
2010 case 'V' | TYPE_IS_SHRIEKING:
2011 case 'N' | TYPE_IS_SHRIEKING:
2014 # if U32SIZE > SIZE32
2017 SHIFT32(utf8, s, strend, &ai32, datumtype);
2019 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
2020 ai32 = (I32)PerlSock_ntohl((U32)ai32);
2023 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
2024 ai32 = (I32)vtohl((U32)ai32);
2027 PUSHs(sv_2mortal(newSViv((IV)ai32)));
2028 else if (checksum > bits_in_uv)
2029 cdouble += (NV)ai32;
2034 #endif /* PERL_PACK_CAN_SHRIEKSIGN */
2038 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
2039 DO_BO_UNPACK_PC(aptr);
2040 /* newSVpv generates undef if aptr is NULL */
2041 PUSHs(sv_2mortal(newSVpv(aptr, 0)));
2049 while (len > 0 && s < strend) {
2051 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
2052 auv = (auv << 7) | (ch & 0x7f);
2053 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
2056 PUSHs(sv_2mortal(newSVuv(auv)));
2061 if (++bytes >= sizeof(UV)) { /* promote to string */
2064 sv = Perl_newSVpvf(aTHX_ "%.*"UVuf, (int)TYPE_DIGITS(UV), auv);
2065 while (s < strend) {
2066 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
2067 sv = mul128(sv, (U8)(ch & 0x7f));
2073 t = SvPV_nolen_const(sv);
2076 sv_chop(sv, (char *)t);
2077 PUSHs(sv_2mortal(sv));
2082 if ((s >= strend) && bytes)
2083 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
2087 if (symptr->howlen == e_star)
2088 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
2090 if (s + sizeof(char*) <= strend) {
2092 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
2093 DO_BO_UNPACK_PC(aptr);
2094 /* newSVpvn generates undef if aptr is NULL */
2095 PUSHs(sv_2mortal(newSVpvn(aptr, len)));
2102 SHIFT_VAR(utf8, s, strend, aquad, datumtype);
2103 DO_BO_UNPACK(aquad, 64);
2105 PUSHs(sv_2mortal(aquad >= IV_MIN && aquad <= IV_MAX ?
2106 newSViv((IV)aquad) : newSVnv((NV)aquad)));
2107 else if (checksum > bits_in_uv)
2108 cdouble += (NV)aquad;
2116 SHIFT_VAR(utf8, s, strend, auquad, datumtype);
2117 DO_BO_UNPACK(auquad, 64);
2119 PUSHs(sv_2mortal(auquad <= UV_MAX ?
2120 newSVuv((UV)auquad):newSVnv((NV)auquad)));
2121 else if (checksum > bits_in_uv)
2122 cdouble += (NV)auquad;
2127 #endif /* HAS_QUAD */
2128 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
2132 SHIFT_VAR(utf8, s, strend, afloat, datumtype);
2133 DO_BO_UNPACK_N(afloat, float);
2135 PUSHs(sv_2mortal(newSVnv((NV)afloat)));
2143 SHIFT_VAR(utf8, s, strend, adouble, datumtype);
2144 DO_BO_UNPACK_N(adouble, double);
2146 PUSHs(sv_2mortal(newSVnv((NV)adouble)));
2154 SHIFT_VAR(utf8, s, strend, anv, datumtype);
2155 DO_BO_UNPACK_N(anv, NV);
2157 PUSHs(sv_2mortal(newSVnv(anv)));
2162 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2165 long double aldouble;
2166 SHIFT_VAR(utf8, s, strend, aldouble, datumtype);
2167 DO_BO_UNPACK_N(aldouble, long double);
2169 PUSHs(sv_2mortal(newSVnv((NV)aldouble)));
2171 cdouble += aldouble;
2177 * Initialise the decode mapping. By using a table driven
2178 * algorithm, the code will be character-set independent
2179 * (and just as fast as doing character arithmetic)
2181 if (PL_uudmap[(U8)'M'] == 0) {
2184 for (i = 0; i < sizeof(PL_uuemap); ++i)
2185 PL_uudmap[(U8)PL_uuemap[i]] = i;
2187 * Because ' ' and '`' map to the same value,
2188 * we need to decode them both the same.
2190 PL_uudmap[(U8)' '] = 0;
2193 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
2194 sv = sv_2mortal(newSV(l));
2195 if (l) SvPOK_on(sv);
2198 while (next_uni_uu(aTHX_ &s, strend, &len)) {
2204 next_uni_uu(aTHX_ &s, strend, &a);
2205 next_uni_uu(aTHX_ &s, strend, &b);
2206 next_uni_uu(aTHX_ &s, strend, &c);
2207 next_uni_uu(aTHX_ &s, strend, &d);
2208 hunk[0] = (char)((a << 2) | (b >> 4));
2209 hunk[1] = (char)((b << 4) | (c >> 2));
2210 hunk[2] = (char)((c << 6) | d);
2211 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2219 /* possible checksum byte */
2220 const char *skip = s+UTF8SKIP(s);
2221 if (skip < strend && *skip == '\n')
2227 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
2232 len = PL_uudmap[*(U8*)s++] & 077;
2234 if (s < strend && ISUUCHAR(*s))
2235 a = PL_uudmap[*(U8*)s++] & 077;
2238 if (s < strend && ISUUCHAR(*s))
2239 b = PL_uudmap[*(U8*)s++] & 077;
2242 if (s < strend && ISUUCHAR(*s))
2243 c = PL_uudmap[*(U8*)s++] & 077;
2246 if (s < strend && ISUUCHAR(*s))
2247 d = PL_uudmap[*(U8*)s++] & 077;
2250 hunk[0] = (char)((a << 2) | (b >> 4));
2251 hunk[1] = (char)((b << 4) | (c >> 2));
2252 hunk[2] = (char)((c << 6) | d);
2253 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2258 else /* possible checksum byte */
2259 if (s + 1 < strend && s[1] == '\n')
2268 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
2269 (checksum > bits_in_uv &&
2270 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
2273 anv = (NV) (1 << (checksum & 15));
2274 while (checksum >= 16) {
2278 while (cdouble < 0.0)
2280 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
2281 sv = newSVnv(cdouble);
2284 if (checksum < bits_in_uv) {
2285 UV mask = ((UV)1 << checksum) - 1;
2290 XPUSHs(sv_2mortal(sv));
2294 if (symptr->flags & FLAG_SLASH){
2295 if (SP - PL_stack_base - start_sp_offset <= 0)
2296 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
2297 if( next_symbol(symptr) ){
2298 if( symptr->howlen == e_number )
2299 Perl_croak(aTHX_ "Count after length/code in unpack" );
2301 /* ...end of char buffer then no decent length available */
2302 Perl_croak(aTHX_ "length/code after end of string in unpack" );
2304 /* take top of stack (hope it's numeric) */
2307 Perl_croak(aTHX_ "Negative '/' count in unpack" );
2310 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
2312 datumtype = symptr->code;
2313 explicit_length = FALSE;
2321 return SP - PL_stack_base - start_sp_offset;
2328 I32 gimme = GIMME_V;
2331 const char *pat = SvPV_const(left, llen);
2332 const char *s = SvPV_const(right, rlen);
2333 const char *strend = s + rlen;
2334 const char *patend = pat + llen;
2338 cnt = unpackstring((char *)pat, (char *)patend, (char *)s, (char *)strend,
2339 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
2340 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
2343 if ( !cnt && gimme == G_SCALAR )
2344 PUSHs(&PL_sv_undef);
2349 doencodes(U8 *h, const char *s, I32 len)
2351 *h++ = PL_uuemap[len];
2353 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2354 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
2355 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
2356 *h++ = PL_uuemap[(077 & (s[2] & 077))];
2361 const char r = (len > 1 ? s[1] : '\0');
2362 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2363 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
2364 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
2365 *h++ = PL_uuemap[0];
2372 S_is_an_int(pTHX_ const char *s, STRLEN l)
2374 SV *result = newSVpvn(s, l);
2375 char *const result_c = SvPV_nolen(result); /* convenience */
2376 char *out = result_c;
2386 SvREFCNT_dec(result);
2409 SvREFCNT_dec(result);
2415 SvCUR_set(result, out - result_c);
2419 /* pnum must be '\0' terminated */
2421 S_div128(pTHX_ SV *pnum, bool *done)
2424 char * const s = SvPV(pnum, len);
2430 const int i = m * 10 + (*t - '0');
2431 const int r = (i >> 7); /* r < 10 */
2439 SvCUR_set(pnum, (STRLEN) (t - s));
2444 =for apidoc packlist
2446 The engine implementing pack() Perl function.
2452 Perl_packlist(pTHX_ SV *cat, char *pat, char *patend, register SV **beglist,
2457 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
2459 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
2460 Also make sure any UTF8 flag is loaded */
2461 SvPV_force_nolen(cat);
2462 if (DO_UTF8(cat)) sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
2464 (void)pack_rec( cat, &sym, beglist, endlist );
2467 /* like sv_utf8_upgrade, but also repoint the group start markers */
2469 marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2472 const char *from_ptr, *from_start, *from_end, **marks, **m;
2473 char *to_start, *to_ptr;
2475 if (SvUTF8(sv)) return;
2477 from_start = SvPVX(sv);
2478 from_end = from_start + SvCUR(sv);
2479 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2480 if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
2481 if (from_ptr == from_end) {
2482 /* Simple case: no character needs to be changed */
2487 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
2488 New('U', to_start, len, char);
2489 Copy(from_start, to_start, from_ptr-from_start, char);
2490 to_ptr = to_start + (from_ptr-from_start);
2492 New('U', marks, sym_ptr->level+2, const char *);
2493 for (group=sym_ptr; group; group = group->previous)
2494 marks[group->level] = from_start + group->strbeg;
2495 marks[sym_ptr->level+1] = from_end+1;
2496 for (m = marks; *m < from_ptr; m++)
2497 *m = to_start + (*m-from_start);
2499 for (;from_ptr < from_end; from_ptr++) {
2500 while (*m == from_ptr) *m++ = to_ptr;
2501 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
2505 while (*m == from_ptr) *m++ = to_ptr;
2506 if (m != marks + sym_ptr->level+1) {
2509 Perl_croak(aTHX_ "Assertion: marks beyond string end");
2511 for (group=sym_ptr; group; group = group->previous)
2512 group->strbeg = marks[group->level] - to_start;
2517 SvLEN(sv) += SvIVX(sv);
2518 from_start -= SvIVX(sv);
2521 SvFLAGS(sv) &= ~SVf_OOK;
2524 Safefree(from_start);
2525 SvPVX(sv) = to_start;
2526 SvCUR(sv) = to_ptr - to_start;
2531 /* Exponential string grower. Makes string extension effectively O(n)
2532 needed says how many extra bytes we need (not counting the final '\0')
2533 Only grows the string if there is an actual lack of space
2536 S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
2537 const STRLEN cur = SvCUR(sv);
2538 const STRLEN len = SvLEN(sv);
2540 if (len - cur > needed) return SvPVX(sv);
2541 extend = needed > len ? needed : len;
2542 return SvGROW(sv, len+extend+1);
2547 S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
2549 tempsym_t lookahead;
2550 I32 items = endlist - beglist;
2551 bool found = next_symbol(symptr);
2552 #ifdef PERL_PACK_NEVER_UPGRADE_COMPATIBILITY
2553 const bool utf8 = 0;
2555 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
2557 bool warn_utf8 = ckWARN(WARN_UTF8);
2559 if (symptr->level == 0 && found && symptr->code == 'U') {
2560 marked_upgrade(aTHX_ cat, symptr);
2561 symptr->flags |= FLAG_DO_UTF8;
2562 #ifndef PERL_PACK_NEVER_UPGRADE_COMPATIBILITY
2566 symptr->strbeg = SvCUR(cat);
2572 SV *lengthcode = NULL;
2573 I32 datumtype = symptr->code;
2574 howlen_t howlen = symptr->howlen;
2575 char *start = SvPVX(cat);
2576 char *cur = start + SvCUR(cat);
2578 #define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2582 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2586 /* e_no_len and e_number */
2587 len = symptr->length;
2592 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
2594 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2595 /* We can process this letter. */
2596 STRLEN size = props & PACK_SIZE_MASK;
2597 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2601 /* Look ahead for next symbol. Do we have code/code? */
2602 lookahead = *symptr;
2603 found = next_symbol(&lookahead);
2604 if (symptr->flags & FLAG_SLASH) {
2606 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
2607 if (strchr("aAZ", lookahead.code)) {
2608 if (lookahead.howlen == e_number) count = lookahead.length;
2611 if (SvGAMAGIC(*beglist)) {
2612 /* Avoid reading the active data more than once
2613 by copying it to a temporary. */
2615 const char *const pv = SvPV_const(*beglist, len);
2616 SV *const temp = sv_2mortal(newSVpvn(pv, len));
2617 if (SvUTF8(*beglist))
2621 count = DO_UTF8(*beglist) ?
2622 sv_len_utf8(*beglist) : sv_len(*beglist);
2625 if (lookahead.code == 'Z') count++;
2628 if (lookahead.howlen == e_number && lookahead.length < items)
2629 count = lookahead.length;
2632 lookahead.howlen = e_number;
2633 lookahead.length = count;
2634 lengthcode = sv_2mortal(newSViv(count));
2637 /* Code inside the switch must take care to properly update
2638 cat (CUR length and '\0' termination) if it updated *cur and
2639 doesn't simply leave using break */
2640 switch(TYPE_NO_ENDIANNESS(datumtype)) {
2642 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2643 (int) TYPE_NO_MODIFIERS(datumtype));
2645 Perl_croak(aTHX_ "'%%' may not be used in pack");
2648 #ifdef PERL_PACK_CAN_DOT
2649 # ifdef PERL_PACK_CAN_SHRIEKSIGN
2650 case '.' | TYPE_IS_SHRIEKING:
2653 if (howlen == e_star) from = start;
2654 else if (len == 0) from = cur;
2656 tempsym_t *group = symptr;
2658 while (--len && group) group = group->previous;
2659 from = group ? start + group->strbeg : start;
2662 len = SvIV(fromstr);
2665 #ifdef PERL_PACK_CAN_SHRIEKSIGN
2666 case '@' | TYPE_IS_SHRIEKING:
2669 from = start + symptr->strbeg;
2670 #ifdef PERL_PACK_CAN_DOT
2673 #ifdef PERL_PACK_CAN_SHRIEKSIGN
2674 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
2675 #else /* PERL_PACK_CAN_SHRIEKSIGN */
2679 while (len && from < cur) {
2680 from += UTF8SKIP(from);
2684 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2686 /* Here we know from == cur */
2688 GROWING(0, cat, start, cur, len);
2689 Zero(cur, len, char);
2691 } else if (from < cur) {
2694 } else goto no_change;
2702 if (len > 0) goto grow;
2703 if (len == 0) goto no_change;
2710 tempsym_t savsym = *symptr;
2711 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2712 symptr->flags |= group_modifiers;
2713 symptr->patend = savsym.grpend;
2715 symptr->previous = &lookahead;
2718 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2719 else symptr->flags &= ~FLAG_PARSE_UTF8;
2720 was_utf8 = SvUTF8(cat);
2721 symptr->patptr = savsym.grpbeg;
2722 beglist = pack_rec(cat, symptr, beglist, endlist);
2723 #ifndef PERL_PACK_NEVER_UPGRADE_COMPATIBILITY
2724 if (SvUTF8(cat) != was_utf8)
2725 /* This had better be an upgrade while in utf8==0 mode */
2729 if (savsym.howlen == e_star && beglist == endlist)
2730 break; /* No way to continue */
2732 items = endlist - beglist;
2733 lookahead.flags = symptr->flags & ~group_modifiers;
2736 case 'X' | TYPE_IS_SHRIEKING:
2737 if (!len) /* Avoid division by 0 */
2744 hop += UTF8SKIP(hop);
2751 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2755 len = (cur-start) % len;
2759 if (len < 1) goto no_change;
2763 Perl_croak(aTHX_ "'%c' outside of string in pack",
2764 (int) TYPE_NO_MODIFIERS(datumtype));
2765 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2767 Perl_croak(aTHX_ "'%c' outside of string in pack",
2768 (int) TYPE_NO_MODIFIERS(datumtype));
2774 if (cur - start < len)
2775 Perl_croak(aTHX_ "'%c' outside of string in pack",
2776 (int) TYPE_NO_MODIFIERS(datumtype));
2779 if (cur < start+symptr->strbeg) {
2780 /* Make sure group starts don't point into the void */
2782 const STRLEN length = cur-start;
2783 for (group = symptr;
2784 group && length < group->strbeg;
2785 group = group->previous) group->strbeg = length;
2786 lookahead.strbeg = length;
2789 case 'x' | TYPE_IS_SHRIEKING: {
2791 if (!len) /* Avoid division by 0 */
2793 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
2794 else ai32 = (cur - start) % len;
2795 if (ai32 == 0) goto no_change;
2807 aptr = (char *) SvPV_const(fromstr, fromlen);
2808 #ifndef PERL_PACK_NEVER_UPGRADE_COMPATIBILITY
2809 if (DO_UTF8(fromstr)) {
2810 const char *end, *s;
2812 if (!utf8 && !SvUTF8(cat)) {
2813 marked_upgrade(aTHX_ cat, symptr);
2814 lookahead.flags |= FLAG_DO_UTF8;
2815 lookahead.strbeg = symptr->strbeg;
2818 cur = start + SvCUR(cat);
2820 if (howlen == e_star) {
2821 if (utf8) goto string_copy;
2825 end = aptr + fromlen;
2826 fromlen = datumtype == 'Z' ? len-1 : len;
2827 while ((I32) fromlen > 0 && s < end) {
2832 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2835 if (datumtype == 'Z') len++;
2841 fromlen = len - fromlen;
2842 if (datumtype == 'Z') fromlen--;
2843 if (howlen == e_star) {
2845 if (datumtype == 'Z') len++;
2847 GROWING(0, cat, start, cur, len);
2848 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
2849 datumtype | TYPE_IS_PACK))
2850 Perl_croak(aTHX_ "Perl bug: predicted utf8 length not available");
2854 if (howlen == e_star) {
2856 if (datumtype == 'Z') len++;
2858 if (len <= (I32) fromlen) {
2860 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2862 /* assumes a byte expands to at most UTF8_EXPAND bytes on
2864 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2865 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
2867 while (fromlen > 0) {
2868 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
2875 #ifndef PERL_PACK_NEVER_UPGRADE_COMPATIBILITY
2878 if (howlen == e_star) {
2880 if (datumtype == 'Z') len++;
2882 if (len <= (I32) fromlen) {
2884 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2886 GROWING(0, cat, start, cur, len);
2887 Copy(aptr, cur, fromlen, char);
2891 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2897 const char *str, *end;
2904 str = SvPV_const(fromstr, fromlen);
2905 end = str + fromlen;
2906 #ifndef PERL_PACK_NEVER_UPGRADE_COMPATIBILITY
2907 if (DO_UTF8(fromstr)) {
2909 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2913 utf8_source = FALSE;
2914 utf8_flags = 0; /* Unused, but keep compilers happy */
2916 if (howlen == e_star) len = fromlen;
2917 field_len = (len+7)/8;
2918 GROWING(utf8, cat, start, cur, field_len);
2919 if (len > (I32)fromlen) len = fromlen;
2922 if (datumtype == 'B')
2926 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2928 } else bits |= *str++ & 1;
2929 if (l & 7) bits <<= 1;
2931 PUSH_BYTE(utf8, cur, bits);
2936 /* datumtype == 'b' */
2940 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2941 if (val & 1) bits |= 0x80;
2942 } else if (*str++ & 1)
2944 if (l & 7) bits >>= 1;
2946 PUSH_BYTE(utf8, cur, bits);
2952 if (datumtype == 'B')
2953 bits <<= 7 - (l & 7);
2955 bits >>= 7 - (l & 7);
2956 PUSH_BYTE(utf8, cur, bits);
2959 /* Determine how many chars are left in the requested field */
2961 if (howlen == e_star) field_len = 0;
2962 else field_len -= l;
2963 Zero(cur, field_len, char);
2976 str = SvPV(fromstr, fromlen);
2977 end = str + fromlen;
2978 #ifndef PERL_PACK_NEVER_UPGRADE_COMPATIBILITY
2979 if (DO_UTF8(fromstr)) {
2981 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2985 utf8_source = FALSE;
2986 utf8_flags = 0; /* Unused, but keep compilers happy */
2988 if (howlen == e_star) len = fromlen;
2989 field_len = (len+1)/2;
2990 GROWING(utf8, cat, start, cur, field_len);
2991 if (!utf8 && len > (I32)fromlen) len = fromlen;
2994 if (datumtype == 'H')
2998 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2999 if (val < 256 && isALPHA(val))
3000 bits |= (val + 9) & 0xf;
3003 } else if (isALPHA(*str))
3004 bits |= (*str++ + 9) & 0xf;
3006 bits |= *str++ & 0xf;
3007 if (l & 1) bits <<= 4;
3009 PUSH_BYTE(utf8, cur, bits);
3017 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
3018 if (val < 256 && isALPHA(val))
3019 bits |= ((val + 9) & 0xf) << 4;
3021 bits |= (val & 0xf) << 4;
3022 } else if (isALPHA(*str))
3023 bits |= ((*str++ + 9) & 0xf) << 4;
3025 bits |= (*str++ & 0xf) << 4;
3026 if (l & 1) bits >>= 4;
3028 PUSH_BYTE(utf8, cur, bits);
3034 PUSH_BYTE(utf8, cur, bits);
3037 /* Determine how many chars are left in the requested field */
3039 if (howlen == e_star) field_len = 0;
3040 else field_len -= l;
3041 Zero(cur, field_len, char);
3049 aiv = SvIV(fromstr);
3050 if ((-128 > aiv || aiv > 127) &&
3052 Perl_warner(aTHX_ packWARN(WARN_PACK),
3053 "Character in 'c' format wrapped in pack");
3054 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
3059 #ifndef PERL_PACK_NEVER_UPGRADE_COMPATIBILITY
3060 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
3064 GROWING(0, cat, start, cur, len);
3068 aiv = SvIV(fromstr);
3069 if ((0 > aiv || aiv > 0xff) &&
3071 Perl_warner(aTHX_ packWARN(WARN_PACK),
3072 "Character in 'C' format wrapped in pack");
3073 *cur++ = (char)(aiv & 0xff);
3076 #ifdef PERL_PACK_CAN_W
3079 U8 in_bytes = (U8)IN_BYTES;
3081 end = start+SvLEN(cat)-1;
3082 if (utf8) end -= UTF8_MAXLEN-1;
3086 auv = SvUV(fromstr);
3087 if (in_bytes) auv = auv % 0x100;
3092 SvCUR(cat) = cur - start;
3094 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3095 end = start+SvLEN(cat)-UTF8_MAXLEN;
3097 cur = (char *) uvuni_to_utf8_flags((U8 *) cur,
3100 0 : UNICODE_ALLOW_ANY);
3105 SvCUR(cat) = cur - start;
3106 marked_upgrade(aTHX_ cat, symptr);
3107 lookahead.flags |= FLAG_DO_UTF8;
3108 lookahead.strbeg = symptr->strbeg;
3111 cur = start + SvCUR(cat);
3112 end = start+SvLEN(cat)-UTF8_MAXLEN;
3115 if (ckWARN(WARN_PACK))
3116 Perl_warner(aTHX_ packWARN(WARN_PACK),
3117 "Character in 'W' format wrapped in pack");
3122 SvCUR(cat) = cur - start;
3123 GROWING(0, cat, start, cur, len+1);
3124 end = start+SvLEN(cat)-1;
3126 *(U8 *) cur++ = auv;
3136 #ifndef PERL_PACK_NEVER_UPGRADE_COMPATIBILITY
3137 if (!(symptr->flags & FLAG_DO_UTF8)) {
3138 marked_upgrade(aTHX_ cat, symptr);
3139 lookahead.flags |= FLAG_DO_UTF8;
3140 lookahead.strbeg = symptr->strbeg;
3147 end = start+SvLEN(cat);
3148 if (!utf8) end -= UTF8_MAXLEN;
3152 auv = SvUV(fromstr);
3154 U8 buffer[UTF8_MAXLEN], *endb;
3155 endb = uvuni_to_utf8_flags(buffer, auv,
3157 0 : UNICODE_ALLOW_ANY);
3158 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
3160 SvCUR(cat) = cur - start;
3161 GROWING(0, cat, start, cur,
3162 len+(endb-buffer)*UTF8_EXPAND);
3163 end = start+SvLEN(cat);
3165 cur = bytes_to_uni(buffer, endb-buffer, cur);
3169 SvCUR(cat) = cur - start;
3170 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3171 end = start+SvLEN(cat)-UTF8_MAXLEN;
3173 cur = (char *) uvuni_to_utf8_flags((U8 *) cur, auv,
3175 0 : UNICODE_ALLOW_ANY);
3180 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
3186 anv = SvNV(fromstr);
3188 /* VOS does not automatically map a floating-point overflow
3189 during conversion from double to float into infinity, so we
3190 do it by hand. This code should either be generalized for
3191 any OS that needs it, or removed if and when VOS implements
3192 posix-976 (suggestion to support mapping to infinity).
3193 Paul.Green@stratus.com 02-04-02. */
3195 afloat = _float_constants[0]; /* single prec. inf. */
3196 else if (anv < -FLT_MAX)
3197 afloat = _float_constants[0]; /* single prec. inf. */
3198 else afloat = (float) anv;
3200 # if defined(VMS) && !defined(__IEEE_FP)
3201 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
3202 * on Alpha; fake it if we don't have them.
3206 else if (anv < -FLT_MAX)
3208 else afloat = (float)anv;
3210 afloat = (float)anv;
3212 #endif /* __VOS__ */
3213 DO_BO_PACK_N(afloat, float);
3214 PUSH_VAR(utf8, cur, afloat);
3222 anv = SvNV(fromstr);
3224 /* VOS does not automatically map a floating-point overflow
3225 during conversion from long double to double into infinity,
3226 so we do it by hand. This code should either be generalized
3227 for any OS that needs it, or removed if and when VOS
3228 implements posix-976 (suggestion to support mapping to
3229 infinity). Paul.Green@stratus.com 02-04-02. */
3231 adouble = _double_constants[0]; /* double prec. inf. */
3232 else if (anv < -DBL_MAX)
3233 adouble = _double_constants[0]; /* double prec. inf. */
3234 else adouble = (double) anv;
3236 # if defined(VMS) && !defined(__IEEE_FP)
3237 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
3238 * on Alpha; fake it if we don't have them.
3242 else if (anv < -DBL_MAX)
3244 else adouble = (double)anv;
3246 adouble = (double)anv;
3248 #endif /* __VOS__ */
3249 DO_BO_PACK_N(adouble, double);
3250 PUSH_VAR(utf8, cur, adouble);
3255 Zero(&anv, 1, NV); /* can be long double with unused bits */
3258 anv = SvNV(fromstr);
3259 DO_BO_PACK_N(anv, NV);
3260 PUSH_VAR(utf8, cur, anv);
3264 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
3266 long double aldouble;
3267 /* long doubles can have unused bits, which may be nonzero */
3268 Zero(&aldouble, 1, long double);
3271 aldouble = (long double)SvNV(fromstr);
3272 DO_BO_PACK_N(aldouble, long double);
3273 PUSH_VAR(utf8, cur, aldouble);
3278 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3279 case 'n' | TYPE_IS_SHRIEKING:
3285 ai16 = (I16)SvIV(fromstr);
3287 ai16 = PerlSock_htons(ai16);
3289 PUSH16(utf8, cur, &ai16);
3292 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3293 case 'v' | TYPE_IS_SHRIEKING:
3299 ai16 = (I16)SvIV(fromstr);
3303 PUSH16(utf8, cur, &ai16);
3306 case 'S' | TYPE_IS_SHRIEKING:
3307 #if SHORTSIZE != SIZE16
3309 unsigned short aushort;
3311 aushort = SvUV(fromstr);
3312 DO_BO_PACK(aushort, s);
3313 PUSH_VAR(utf8, cur, aushort);
3323 au16 = (U16)SvUV(fromstr);
3324 DO_BO_PACK(au16, 16);
3325 PUSH16(utf8, cur, &au16);
3328 case 's' | TYPE_IS_SHRIEKING:
3329 #if SHORTSIZE != SIZE16
3333 ashort = SvIV(fromstr);
3334 DO_BO_PACK(ashort, s);
3335 PUSH_VAR(utf8, cur, ashort);
3345 ai16 = (I16)SvIV(fromstr);
3346 DO_BO_PACK(ai16, 16);
3347 PUSH16(utf8, cur, &ai16);
3351 case 'I' | TYPE_IS_SHRIEKING:
3355 auint = SvUV(fromstr);
3356 DO_BO_PACK(auint, i);
3357 PUSH_VAR(utf8, cur, auint);
3364 aiv = SvIV(fromstr);
3365 #if IVSIZE == INTSIZE
3367 #elif IVSIZE == LONGSIZE
3369 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
3370 DO_BO_PACK(aiv, 64);
3372 Perl_croak(aTHX_ "'j' not supported on this platform");
3374 PUSH_VAR(utf8, cur, aiv);
3381 auv = SvUV(fromstr);
3382 #if UVSIZE == INTSIZE
3384 #elif UVSIZE == LONGSIZE
3386 #elif defined(HAS_QUAD) && UVSIZE == U64SIZE
3387 DO_BO_PACK(auv, 64);
3389 Perl_croak(aTHX_ "'J' not supported on this platform");
3391 PUSH_VAR(utf8, cur, auv);
3398 anv = SvNV(fromstr);
3402 SvCUR(cat) = cur - start;
3403 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
3406 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
3407 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
3408 any negative IVs will have already been got by the croak()
3409 above. IOK is untrue for fractions, so we test them
3410 against UV_MAX_P1. */
3411 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
3412 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
3413 char *in = buf + sizeof(buf);
3414 UV auv = SvUV(fromstr);
3417 *--in = (char)((auv & 0x7f) | 0x80);
3420 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
3421 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3422 in, (buf + sizeof(buf)) - in);
3423 } else if (SvPOKp(fromstr))
3425 else if (SvNOKp(fromstr)) {
3426 /* 10**NV_MAX_10_EXP is the largest power of 10
3427 so 10**(NV_MAX_10_EXP+1) is definately unrepresentable
3428 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
3429 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
3430 And with that many bytes only Inf can overflow.
3431 Some C compilers are strict about integral constant
3432 expressions so we conservatively divide by a slightly
3433 smaller integer instead of multiplying by the exact
3434 floating-point value.
3436 #ifdef NV_MAX_10_EXP
3437 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
3438 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
3440 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
3441 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
3443 char *in = buf + sizeof(buf);
3445 anv = Perl_floor(anv);
3447 const NV next = Perl_floor(anv / 128);
3448 if (in <= buf) /* this cannot happen ;-) */
3449 Perl_croak(aTHX_ "Cannot compress integer in pack");
3450 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
3453 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
3454 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3455 in, (buf + sizeof(buf)) - in);
3464 /* Copy string and check for compliance */
3465 from = SvPV_const(fromstr, len);
3466 if ((norm = is_an_int(from, len)) == NULL)
3467 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
3469 Newx(result, len, char);
3472 while (!done) *--in = div128(norm, &done) | 0x80;
3473 result[len - 1] &= 0x7F; /* clear continue bit */
3474 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3475 in, (result + len) - in);
3477 SvREFCNT_dec(norm); /* free norm */
3482 case 'i' | TYPE_IS_SHRIEKING:
3486 aint = SvIV(fromstr);
3487 DO_BO_PACK(aint, i);
3488 PUSH_VAR(utf8, cur, aint);
3491 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3492 case 'N' | TYPE_IS_SHRIEKING:
3498 au32 = SvUV(fromstr);
3500 au32 = PerlSock_htonl(au32);
3502 PUSH32(utf8, cur, &au32);
3505 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3506 case 'V' | TYPE_IS_SHRIEKING:
3512 au32 = SvUV(fromstr);
3516 PUSH32(utf8, cur, &au32);
3519 case 'L' | TYPE_IS_SHRIEKING:
3520 #if LONGSIZE != SIZE32
3522 unsigned long aulong;
3524 aulong = SvUV(fromstr);
3525 DO_BO_PACK(aulong, l);
3526 PUSH_VAR(utf8, cur, aulong);
3536 au32 = SvUV(fromstr);
3537 DO_BO_PACK(au32, 32);
3538 PUSH32(utf8, cur, &au32);
3541 case 'l' | TYPE_IS_SHRIEKING:
3542 #if LONGSIZE != SIZE32
3546 along = SvIV(fromstr);
3547 DO_BO_PACK(along, l);
3548 PUSH_VAR(utf8, cur, along);
3558 ai32 = SvIV(fromstr);
3559 DO_BO_PACK(ai32, 32);
3560 PUSH32(utf8, cur, &ai32);
3568 auquad = (Uquad_t) SvUV(fromstr);
3569 DO_BO_PACK(auquad, 64);
3570 PUSH_VAR(utf8, cur, auquad);
3577 aquad = (Quad_t)SvIV(fromstr);
3578 DO_BO_PACK(aquad, 64);
3579 PUSH_VAR(utf8, cur, aquad);
3582 #endif /* HAS_QUAD */
3584 len = 1; /* assume SV is correct length */
3585 GROWING(utf8, cat, start, cur, sizeof(char *));
3592 SvGETMAGIC(fromstr);
3593 if (!SvOK(fromstr)) aptr = NULL;
3596 /* XXX better yet, could spirit away the string to
3597 * a safe spot and hang on to it until the result
3598 * of pack() (and all copies of the result) are
3601 if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
3602 !SvREADONLY(fromstr))) && ckWARN(WARN_PACK)) {
3603 Perl_warner(aTHX_ packWARN(WARN_PACK),
3604 "Attempt to pack pointer to temporary value");
3606 if (SvPOK(fromstr) || SvNIOK(fromstr))
3607 aptr = (char *) SvPV_nomg_const(fromstr, n_a);
3609 aptr = SvPV_force_flags(fromstr, n_a, 0);
3611 DO_BO_PACK_PC(aptr);
3612 PUSH_VAR(utf8, cur, aptr);
3616 const char *aptr, *aend;
3617 #ifdef PERL_PACK_NEVER_UPGRADE_COMPATIBILITY
3618 const bool from_utf8 = FALSE;
3624 if (len <= 2) len = 45;
3625 else len = len / 3 * 3;
3627 if (ckWARN(WARN_PACK))
3628 Perl_warner(aTHX_ packWARN(WARN_PACK),
3629 "Field too wide in 'u' format in pack");
3632 aptr = (char *) SvPV_const(fromstr, fromlen);
3633 #ifndef PERL_PACK_NEVER_UPGRADE_COMPATIBILITY
3634 from_utf8 = DO_UTF8(fromstr);
3636 aend = aptr + fromlen;
3637 fromlen = sv_len_utf8(fromstr);
3641 aend = NULL; /* Unused, but keep compilers happy */
3643 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
3645 while (fromlen > 0) {
3648 U8 hunk[1+63/3*4+1];
3650 if ((I32)fromlen > len)
3656 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3657 'u' | TYPE_IS_PACK)) {
3659 SvCUR(cat) = cur - start;
3660 Perl_croak(aTHX_ "Assertion: string is shorter than advertised");
3662 end = doencodes(hunk, buffer, todo);
3664 end = doencodes(hunk, aptr, todo);
3667 PUSH_BYTES(utf8, cur, hunk, end-hunk);
3674 SvCUR(cat) = cur - start;
3676 *symptr = lookahead;
3685 dSP; dMARK; dORIGMARK; dTARGET;
3686 register SV *cat = TARG;
3688 SV *pat_sv = *++MARK;
3689 register const char *pat = SvPV_const(pat_sv, fromlen);
3690 register const char *patend = pat + fromlen;
3693 sv_setpvn(cat, "", 0);
3696 packlist(cat, (char *) pat, (char *) patend, MARK, SP + 1);
3706 * c-indentation-style: bsd
3708 * indent-tabs-mode: t
3711 * ex: set ts=8 sts=4 sw=4 noet: