3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
4 * 2000, 2001, 2002, 2003, 2004, 2005, by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
19 /* This file contains pp ("push/pop") functions that
20 * execute the opcodes that make up a perl program. A typical pp function
21 * expects to find its arguments on the stack, and usually pushes its
22 * results onto the stack, hence the 'pp' terminology. Each OP structure
23 * contains a pointer to the relevant pp_foo() function.
25 * This particular file just contains pp_pack() and pp_unpack(). See the
26 * other pp*.c files for the rest of the pp_ functions.
31 #define PERL_IN_PP_PACK_C
34 /* Types used by pack/unpack */
36 e_no_len, /* no length */
37 e_number, /* number, [] */
41 typedef struct tempsym {
42 const char* patptr; /* current template char */
43 const char* patend; /* one after last char */
44 const char* grpbeg; /* 1st char of ()-group */
45 const char* grpend; /* end of ()-group */
46 I32 code; /* template code (!<>) */
47 I32 length; /* length/repeat count */
48 howlen_t howlen; /* how length is given */
49 int level; /* () nesting level */
50 U32 flags; /* /=4, comma=2, pack=1 */
51 /* and group modifiers */
52 STRLEN strbeg; /* offset of group start */
53 struct tempsym *previous; /* previous group */
56 #define TEMPSYM_INIT(symptr, p, e, f) \
58 (symptr)->patptr = (p); \
59 (symptr)->patend = (e); \
60 (symptr)->grpbeg = NULL; \
61 (symptr)->grpend = NULL; \
62 (symptr)->grpend = NULL; \
64 (symptr)->length = 0; \
65 (symptr)->howlen = 0; \
66 (symptr)->level = 0; \
67 (symptr)->flags = (f); \
68 (symptr)->strbeg = 0; \
69 (symptr)->previous = NULL; \
73 # define PERL_PACK_CAN_BYTEORDER
74 # define PERL_PACK_CAN_SHRIEKSIGN
79 #define PERL_PACK_CAN_W
80 #define PERL_PACK_CAN_DOT
82 #define PERL_PACK_CHAR_TEMPLATE_BUG_COMPATIBILITY
83 #define PERL_PACK_REVERSE_UTF8_MODE_COMPATIBILITY
84 #define PERL_PACK_NEVER_UPGRADE_COMPATIBILITY
85 #define PERL_PACK_POSITIONS_ARE_BYTES_COMPATIBILITY
87 /* Maximum number of bytes to which a byte can grow due to upgrade */
91 * Offset for integer pack/unpack.
93 * On architectures where I16 and I32 aren't really 16 and 32 bits,
94 * which for now are all Crays, pack and unpack have to play games.
98 * These values are required for portability of pack() output.
99 * If they're not right on your machine, then pack() and unpack()
100 * wouldn't work right anyway; you'll need to apply the Cray hack.
101 * (I'd like to check them with #if, but you can't use sizeof() in
102 * the preprocessor.) --???
105 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
106 defines are now in config.h. --Andy Dougherty April 1998
111 /* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
114 #if U16SIZE > SIZE16 || U32SIZE > SIZE32
115 # if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
116 # define OFF16(p) ((char*)(p))
117 # define OFF32(p) ((char*)(p))
119 # if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
120 # define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
121 # define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
123 ++++ bad cray byte order
127 # define OFF16(p) ((char *) (p))
128 # define OFF32(p) ((char *) (p))
131 /* Only to be used inside a loop (see the break) */
132 #define SHIFT16(utf8, s, strend, p, datumtype) STMT_START { \
134 if (!uni_to_bytes(aTHX_ &(s), strend, OFF16(p), SIZE16, datumtype)) break; \
136 Copy(s, OFF16(p), SIZE16, char); \
141 /* Only to be used inside a loop (see the break) */
142 #define SHIFT32(utf8, s, strend, p, datumtype) STMT_START { \
144 if (!uni_to_bytes(aTHX_ &(s), strend, OFF32(p), SIZE32, datumtype)) break; \
146 Copy(s, OFF32(p), SIZE32, char); \
151 #define PUSH16(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF16(p), SIZE16)
152 #define PUSH32(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF32(p), SIZE32)
154 /* Only to be used inside a loop (see the break) */
155 #define SHIFT_VAR(utf8, s, strend, var, datumtype) \
158 if (!uni_to_bytes(aTHX_ &s, strend, \
159 (char *) &var, sizeof(var), datumtype)) break;\
161 Copy(s, (char *) &var, sizeof(var), char); \
166 #define PUSH_VAR(utf8, aptr, var) \
167 PUSH_BYTES(utf8, aptr, &(var), sizeof(var))
169 /* Avoid stack overflow due to pathological templates. 100 should be plenty. */
170 #define MAX_SUB_TEMPLATE_LEVEL 100
172 /* flags (note that type modifiers can also be used as flags!) */
173 #define FLAG_WAS_UTF8 0x40
174 #define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
175 #define FLAG_UNPACK_ONLY_ONE 0x10
176 #define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
177 #define FLAG_SLASH 0x04
178 #define FLAG_COMMA 0x02
179 #define FLAG_PACK 0x01
182 S_mul128(pTHX_ SV *sv, U8 m)
185 char *s = SvPV(sv, len);
188 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
189 SV * const tmpNew = newSVpvn("0000000000", 10);
191 sv_catsv(tmpNew, sv);
192 SvREFCNT_dec(sv); /* free old sv */
197 while (!*t) /* trailing '\0'? */
200 const U32 i = ((*t - '0') << 7) + m;
201 *(t--) = '0' + (char)(i % 10);
207 /* Explosives and implosives. */
209 #if 'I' == 73 && 'J' == 74
210 /* On an ASCII/ISO kind of system */
211 #define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
214 Some other sort of character set - use memchr() so we don't match
217 #define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
221 #define TYPE_IS_SHRIEKING 0x100
222 #define TYPE_IS_BIG_ENDIAN 0x200
223 #define TYPE_IS_LITTLE_ENDIAN 0x400
224 #define TYPE_IS_PACK 0x800
225 #define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
226 #define TYPE_MODIFIERS(t) ((t) & ~0xFF)
227 #define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
229 #ifdef PERL_PACK_CAN_SHRIEKSIGN
230 # define SHRIEKING_ALLOWED_TYPES "sSiIlLxXnNvV@."
232 # define SHRIEKING_ALLOWED_TYPES "sSiIlLxX"
235 #ifndef PERL_PACK_CAN_BYTEORDER
236 /* Put "can't" first because it is shorter */
237 # define TYPE_ENDIANNESS(t) 0
238 # define TYPE_NO_ENDIANNESS(t) (t)
240 # define ENDIANNESS_ALLOWED_TYPES ""
242 # define DO_BO_UNPACK(var, type)
243 # define DO_BO_PACK(var, type)
244 # define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast)
245 # define DO_BO_PACK_PTR(var, type, pre_cast, post_cast)
246 # define DO_BO_UNPACK_N(var, type)
247 # define DO_BO_PACK_N(var, type)
248 # define DO_BO_UNPACK_P(var)
249 # define DO_BO_PACK_P(var)
250 # define DO_BO_UNPACK_PC(var)
251 # define DO_BO_PACK_PC(var)
253 #else /* PERL_PACK_CAN_BYTEORDER */
255 # define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
256 # define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
258 # define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
260 # define DO_BO_UNPACK(var, type) \
262 switch (TYPE_ENDIANNESS(datumtype)) { \
263 case TYPE_IS_BIG_ENDIAN: var = my_betoh ## type (var); break; \
264 case TYPE_IS_LITTLE_ENDIAN: var = my_letoh ## type (var); break; \
269 # define DO_BO_PACK(var, type) \
271 switch (TYPE_ENDIANNESS(datumtype)) { \
272 case TYPE_IS_BIG_ENDIAN: var = my_htobe ## type (var); break; \
273 case TYPE_IS_LITTLE_ENDIAN: var = my_htole ## type (var); break; \
278 # define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast) \
280 switch (TYPE_ENDIANNESS(datumtype)) { \
281 case TYPE_IS_BIG_ENDIAN: \
282 var = (post_cast*) my_betoh ## type ((pre_cast) var); \
284 case TYPE_IS_LITTLE_ENDIAN: \
285 var = (post_cast *) my_letoh ## type ((pre_cast) var); \
292 # define DO_BO_PACK_PTR(var, type, pre_cast, post_cast) \
294 switch (TYPE_ENDIANNESS(datumtype)) { \
295 case TYPE_IS_BIG_ENDIAN: \
296 var = (post_cast *) my_htobe ## type ((pre_cast) var); \
298 case TYPE_IS_LITTLE_ENDIAN: \
299 var = (post_cast *) my_htole ## type ((pre_cast) var); \
306 # define BO_CANT_DOIT(action, type) \
308 switch (TYPE_ENDIANNESS(datumtype)) { \
309 case TYPE_IS_BIG_ENDIAN: \
310 Perl_croak(aTHX_ "Can't %s big-endian %ss on this " \
311 "platform", #action, #type); \
313 case TYPE_IS_LITTLE_ENDIAN: \
314 Perl_croak(aTHX_ "Can't %s little-endian %ss on this " \
315 "platform", #action, #type); \
322 # if PTRSIZE == INTSIZE
323 # define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, i, int, void)
324 # define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, i, int, void)
325 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, i, int, char)
326 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, i, int, char)
327 # elif PTRSIZE == LONGSIZE
328 # define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, long, void)
329 # define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, long, void)
330 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, long, char)
331 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, long, char)
332 # elif PTRSIZE == IVSIZE
333 # define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, IV, void)
334 # define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, IV, void)
335 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, IV, char)
336 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, IV, char)
338 # define DO_BO_UNPACK_P(var) BO_CANT_DOIT(unpack, pointer)
339 # define DO_BO_PACK_P(var) BO_CANT_DOIT(pack, pointer)
340 # define DO_BO_UNPACK_PC(var) BO_CANT_DOIT(unpack, pointer)
341 # define DO_BO_PACK_PC(var) BO_CANT_DOIT(pack, pointer)
344 # if defined(my_htolen) && defined(my_letohn) && \
345 defined(my_htoben) && defined(my_betohn)
346 # define DO_BO_UNPACK_N(var, type) \
348 switch (TYPE_ENDIANNESS(datumtype)) { \
349 case TYPE_IS_BIG_ENDIAN: my_betohn(&var, sizeof(type)); break;\
350 case TYPE_IS_LITTLE_ENDIAN: my_letohn(&var, sizeof(type)); break;\
355 # define DO_BO_PACK_N(var, type) \
357 switch (TYPE_ENDIANNESS(datumtype)) { \
358 case TYPE_IS_BIG_ENDIAN: my_htoben(&var, sizeof(type)); break;\
359 case TYPE_IS_LITTLE_ENDIAN: my_htolen(&var, sizeof(type)); break;\
364 # define DO_BO_UNPACK_N(var, type) BO_CANT_DOIT(unpack, type)
365 # define DO_BO_PACK_N(var, type) BO_CANT_DOIT(pack, type)
368 #endif /* PERL_PACK_CAN_BYTEORDER */
370 #define PACK_SIZE_CANNOT_CSUM 0x80
371 #define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
372 #define PACK_SIZE_MASK 0x3F
374 /* These tables are regenerated by genpacksizetables.pl (and then hand pasted
375 in). You're unlikely ever to need to regenerate them. */
377 #if TYPE_IS_SHRIEKING != 0x100
378 ++++shriek offset should be 256
381 typedef U8 packprops_t;
384 STATIC const packprops_t packprops[512] = {
386 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
387 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
388 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
389 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
391 /* C */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
392 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
393 /* D */ LONG_DOUBLESIZE,
400 /* I */ sizeof(unsigned int),
407 #if defined(HAS_QUAD)
408 /* Q */ sizeof(Uquad_t),
415 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
417 #if defined(PERL_PACK_CAN_W)
418 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
422 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
423 /* c */ sizeof(char),
424 /* d */ sizeof(double),
426 /* f */ sizeof(float),
435 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
436 #if defined(HAS_QUAD)
437 /* q */ sizeof(Quad_t),
445 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
446 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
447 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
448 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
449 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
450 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
451 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
452 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
453 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
454 0, 0, 0, 0, 0, 0, 0, 0,
456 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
457 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
458 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
459 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
460 0, 0, 0, 0, 0, 0, 0, 0, 0,
461 /* I */ sizeof(unsigned int),
463 /* L */ sizeof(unsigned long),
465 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
471 /* S */ sizeof(unsigned short),
473 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
478 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
482 /* l */ sizeof(long),
484 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
490 /* s */ sizeof(short),
492 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
497 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
498 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
499 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
500 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
501 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
502 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
503 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
504 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
505 0, 0, 0, 0, 0, 0, 0, 0, 0
508 /* EBCDIC (or bust) */
509 STATIC const packprops_t packprops[512] = {
511 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
512 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
513 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
514 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
515 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
516 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
517 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
518 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
520 /* c */ sizeof(char),
521 /* d */ sizeof(double),
523 /* f */ sizeof(float),
533 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
534 #if defined(HAS_QUAD)
535 /* q */ sizeof(Quad_t),
539 0, 0, 0, 0, 0, 0, 0, 0, 0,
543 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
544 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
545 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
546 /* C */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
547 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
548 /* D */ LONG_DOUBLESIZE,
555 /* I */ sizeof(unsigned int),
563 #if defined(HAS_QUAD)
564 /* Q */ sizeof(Uquad_t),
568 0, 0, 0, 0, 0, 0, 0, 0, 0,
571 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
573 #if defined(PERL_PACK_CAN_W)
574 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
578 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
579 0, 0, 0, 0, 0, 0, 0, 0, 0,
581 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
582 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
583 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
584 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
585 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
586 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
587 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
588 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
589 0, 0, 0, 0, 0, 0, 0, 0, 0,
591 0, 0, 0, 0, 0, 0, 0, 0, 0,
592 /* l */ sizeof(long),
594 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
599 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
600 /* s */ sizeof(short),
602 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
607 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
608 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
610 /* I */ sizeof(unsigned int),
611 0, 0, 0, 0, 0, 0, 0, 0, 0,
612 /* L */ sizeof(unsigned long),
614 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
619 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
620 /* S */ sizeof(unsigned short),
622 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
627 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
628 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
633 uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
636 UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
637 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
638 /* We try to process malformed UTF-8 as much as possible (preferrably with
639 warnings), but these two mean we make no progress in the string and
640 might enter an infinite loop */
641 if (retlen == (STRLEN) -1 || retlen == 0)
642 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
643 (int) TYPE_NO_MODIFIERS(datumtype));
645 if (ckWARN(WARN_UNPACK))
646 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
647 "Character in '%c' format wrapped in unpack",
648 (int) TYPE_NO_MODIFIERS(datumtype));
655 #define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
656 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
660 uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
664 const char *from = *s;
666 const U32 flags = ckWARN(WARN_UTF8) ?
667 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
668 for (;buf_len > 0; buf_len--) {
669 if (from >= end) return FALSE;
670 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
671 if (retlen == (STRLEN) -1 || retlen == 0) {
672 from += UTF8SKIP(from);
674 } else from += retlen;
681 /* We have enough characters for the buffer. Did we have problems ? */
684 /* Rewalk the string fragment while warning */
686 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
687 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
688 if (ptr >= end) break;
689 utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
691 if (from > end) from = end;
693 if ((bad & 2) && ckWARN(WARN_UNPACK))
694 Perl_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
695 WARN_PACK : WARN_UNPACK),
696 "Character(s) in '%c' format wrapped in %s",
697 (int) TYPE_NO_MODIFIERS(datumtype),
698 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
705 next_uni_uu(pTHX_ const char **s, const char *end, I32 *out)
708 const UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
709 if (val >= 0x100 || !ISUUCHAR(val) ||
710 retlen == (STRLEN) -1 || retlen == 0) {
714 *out = PL_uudmap[val] & 077;
720 bytes_to_uni(pTHX_ const U8 *start, STRLEN len, char **dest) {
721 U8 buffer[UTF8_MAXLEN];
722 const U8 * const end = start + len;
724 while (start < end) {
726 uvuni_to_utf8_flags(buffer, NATIVE_TO_UNI(*start), 0) - buffer;
736 Perl_croak(aTHX_ "Perl bug: value %d UTF-8 expands to %d bytes",
744 #define PUSH_BYTES(utf8, cur, buf, len) \
746 if (utf8) bytes_to_uni(aTHX_ (U8 *) buf, len, &(cur)); \
748 Copy(buf, cur, len, char); \
753 #define GROWING(utf8, cat, start, cur, in_len) \
755 STRLEN glen = (in_len); \
756 if (utf8) glen *= UTF8_EXPAND; \
757 if ((cur) + glen >= (start) + SvLEN(cat)) { \
758 (start) = sv_exp_grow(cat, glen); \
759 (cur) = (start) + SvCUR(cat); \
763 #define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
765 const STRLEN glen = (in_len); \
767 if (utf8) gl *= UTF8_EXPAND; \
768 if ((cur) + gl >= (start) + SvLEN(cat)) { \
770 SvCUR(cat) = (cur) - (start); \
771 (start) = sv_exp_grow(cat, gl); \
772 (cur) = (start) + SvCUR(cat); \
774 PUSH_BYTES(utf8, cur, buf, glen); \
777 #define PUSH_BYTE(utf8, s, byte) \
780 const U8 au8 = (byte); \
781 bytes_to_uni(aTHX_ &au8, 1, &(s)); \
782 } else *(U8 *)(s)++ = (byte); \
785 /* Only to be used inside a loop (see the break) */
786 #define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
789 if (str >= end) break; \
790 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
791 if (retlen == (STRLEN) -1 || retlen == 0) { \
793 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
798 static const char *_action( const tempsym_t* symptr )
800 return ( symptr->flags & FLAG_PACK ) ? "pack" : "unpack";
803 /* Returns the sizeof() struct described by pat */
805 S_measure_struct(pTHX_ tempsym_t* symptr)
809 while (next_symbol(symptr)) {
813 switch (symptr->howlen) {
815 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
819 /* e_no_len and e_number */
820 len = symptr->length;
824 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
827 /* endianness doesn't influence the size of a type */
828 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
830 Perl_croak(aTHX_ "Invalid type '%c' in %s",
831 (int)TYPE_NO_MODIFIERS(symptr->code),
833 #ifdef PERL_PACK_CAN_SHRIEKSIGN
834 # ifdef PERL_PACK_CAN_DOT
835 case '.' | TYPE_IS_SHRIEKING:
837 case '@' | TYPE_IS_SHRIEKING:
840 #ifdef PERL_PACK_CAN_DOT
844 case 'U': /* XXXX Is it correct? */
847 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
848 (int) TYPE_NO_MODIFIERS(symptr->code),
855 tempsym_t savsym = *symptr;
856 symptr->patptr = savsym.grpbeg;
857 symptr->patend = savsym.grpend;
858 /* XXXX Theoretically, we need to measure many times at
859 different positions, since the subexpression may contain
860 alignment commands, but be not of aligned length.
861 Need to detect this and croak(). */
862 size = measure_struct(symptr);
866 case 'X' | TYPE_IS_SHRIEKING:
867 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
869 if (!len) /* Avoid division by 0 */
871 len = total % len; /* Assumed: the start is aligned. */
876 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
878 case 'x' | TYPE_IS_SHRIEKING:
879 if (!len) /* Avoid division by 0 */
881 star = total % len; /* Assumed: the start is aligned. */
882 if (star) /* Other portable ways? */
906 size = sizeof(char*);
916 /* locate matching closing parenthesis or bracket
917 * returns char pointer to char after match, or NULL
920 S_group_end(pTHX_ register const char *patptr, register const char *patend, char ender)
922 while (patptr < patend) {
923 const char c = *patptr++;
930 while (patptr < patend && *patptr != '\n')
934 patptr = group_end(patptr, patend, ')') + 1;
936 patptr = group_end(patptr, patend, ']') + 1;
938 Perl_croak(aTHX_ "No group ending character '%c' found in template",
944 /* Convert unsigned decimal number to binary.
945 * Expects a pointer to the first digit and address of length variable
946 * Advances char pointer to 1st non-digit char and returns number
949 S_get_num(pTHX_ register const char *patptr, I32 *lenptr )
951 I32 len = *patptr++ - '0';
952 while (isDIGIT(*patptr)) {
953 if (len >= 0x7FFFFFFF/10)
954 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
955 len = (len * 10) + (*patptr++ - '0');
961 /* The marvellous template parsing routine: Using state stored in *symptr,
962 * locates next template code and count
965 S_next_symbol(pTHX_ tempsym_t* symptr )
967 const char* patptr = symptr->patptr;
968 const char* const patend = symptr->patend;
969 const char *allowed = "";
971 symptr->flags &= ~FLAG_SLASH;
973 while (patptr < patend) {
974 if (isSPACE(*patptr))
976 else if (*patptr == '#') {
978 while (patptr < patend && *patptr != '\n')
983 /* We should have found a template code */
984 I32 code = *patptr++ & 0xFF;
985 U32 inherited_modifiers = 0;
987 if (code == ','){ /* grandfather in commas but with a warning */
988 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
989 symptr->flags |= FLAG_COMMA;
990 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
991 "Invalid type ',' in %s", _action( symptr ) );
996 /* for '(', skip to ')' */
998 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
999 Perl_croak(aTHX_ "()-group starts with a count in %s",
1000 _action( symptr ) );
1001 symptr->grpbeg = (char *) patptr;
1003 = 1 + ( symptr->grpend = (char *)group_end(patptr, patend, ')') );
1004 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
1005 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
1006 _action( symptr ) );
1009 /* look for group modifiers to inherit */
1010 if (TYPE_ENDIANNESS(symptr->flags)) {
1011 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
1012 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
1015 /* look for modifiers */
1016 while (patptr < patend) {
1020 modifier = TYPE_IS_SHRIEKING;
1021 allowed = SHRIEKING_ALLOWED_TYPES;
1023 #ifdef PERL_PACK_CAN_BYTEORDER
1025 modifier = TYPE_IS_BIG_ENDIAN;
1026 allowed = ENDIANNESS_ALLOWED_TYPES;
1029 modifier = TYPE_IS_LITTLE_ENDIAN;
1030 allowed = ENDIANNESS_ALLOWED_TYPES;
1032 #endif /* PERL_PACK_CAN_BYTEORDER */
1042 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
1043 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
1044 allowed, _action( symptr ) );
1046 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
1047 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
1048 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
1049 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
1050 TYPE_ENDIANNESS_MASK)
1051 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
1052 *patptr, _action( symptr ) );
1054 if ((code & modifier) && ckWARN(WARN_UNPACK)) {
1055 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
1056 "Duplicate modifier '%c' after '%c' in %s",
1057 *patptr, (int) TYPE_NO_MODIFIERS(code),
1058 _action( symptr ) );
1065 /* inherit modifiers */
1066 code |= inherited_modifiers;
1068 /* look for count and/or / */
1069 if (patptr < patend) {
1070 if (isDIGIT(*patptr)) {
1071 patptr = get_num( patptr, &symptr->length );
1072 symptr->howlen = e_number;
1074 } else if (*patptr == '*') {
1076 symptr->howlen = e_star;
1078 } else if (*patptr == '[') {
1079 const char* lenptr = ++patptr;
1080 symptr->howlen = e_number;
1081 patptr = group_end( patptr, patend, ']' ) + 1;
1082 /* what kind of [] is it? */
1083 if (isDIGIT(*lenptr)) {
1084 lenptr = get_num( lenptr, &symptr->length );
1085 if( *lenptr != ']' )
1086 Perl_croak(aTHX_ "Malformed integer in [] in %s",
1087 _action( symptr ) );
1089 tempsym_t savsym = *symptr;
1090 symptr->patend = (char *) patptr-1;
1091 symptr->patptr = (char *) lenptr;
1092 savsym.length = measure_struct(symptr);
1096 symptr->howlen = e_no_len;
1101 while (patptr < patend) {
1102 if (isSPACE(*patptr))
1104 else if (*patptr == '#') {
1106 while (patptr < patend && *patptr != '\n')
1108 if (patptr < patend)
1111 if (*patptr == '/') {
1112 symptr->flags |= FLAG_SLASH;
1114 if (patptr < patend &&
1115 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
1116 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
1117 _action( symptr ) );
1123 /* at end - no count, no / */
1124 symptr->howlen = e_no_len;
1128 symptr->code = code;
1129 symptr->patptr = (char *) patptr;
1133 symptr->patptr = (char *) patptr;
1137 #ifndef PERL_PACK_NEVER_UPGRADE_COMPATIBILITY
1139 There is no way to cleanly handle the case where we should process the
1140 string per byte in its upgraded form while it's really in downgraded form
1141 (e.g. estimates like strend-s as an upper bound for the number of
1142 characters left wouldn't work). So if we foresee the need of this
1143 (pattern starts with U or contains U0), we want to work on the encoded
1144 version of the string. Users are advised to upgrade their pack string
1145 themselves if they need to do a lot of unpacks like this on it
1147 /* XXX These can be const */
1149 need_utf8(const char *pat, const char *patend)
1152 while (pat < patend) {
1153 if (pat[0] == '#') {
1155 pat = (const char *) memchr(pat, '\n', patend-pat);
1156 if (!pat) return FALSE;
1157 } else if (pat[0] == 'U') {
1158 if (first || pat[1] == '0') return TRUE;
1159 } else first = FALSE;
1166 first_symbol(const char *pat, const char *patend) {
1167 while (pat < patend) {
1168 if (pat[0] != '#') return pat[0];
1170 pat = (const char *) memchr(pat, '\n', patend-pat);
1179 =for apidoc unpackstring
1181 The engine implementing unpack() Perl function. C<unpackstring> puts the
1182 extracted list items on the stack and returns the number of elements.
1183 Issue C<PUTBACK> before and C<SPAGAIN> after the call to this function.
1188 Perl_unpackstring(pTHX_ char *pat, char *patend, char *s, char *strend,
1193 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
1194 #ifndef PERL_PACK_NEVER_UPGRADE_COMPATIBILITY
1195 else if (need_utf8(pat, patend)) {
1196 /* We probably should try to avoid this in case a scalar context call
1197 wouldn't get to the "U0" */
1198 STRLEN len = strend - s;
1199 s = (char *) bytes_to_utf8((U8 *) s, &len);
1202 flags |= FLAG_DO_UTF8;
1205 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
1206 flags |= FLAG_PARSE_UTF8;
1208 if (flags & FLAG_DO_UTF8)
1209 flags |= FLAG_PARSE_UTF8;
1212 TEMPSYM_INIT(&sym, pat, patend, flags);
1214 return unpack_rec(&sym, s, s, strend, NULL );
1219 S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
1223 const I32 start_sp_offset = SP - PL_stack_base;
1229 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
1230 bool beyond = FALSE;
1231 bool explicit_length;
1232 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
1233 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
1234 symptr->strbeg = s - strbeg;
1236 while (next_symbol(symptr)) {
1239 I32 datumtype = symptr->code;
1240 /* do first one only unless in list context
1241 / is implemented by unpacking the count, then popping it from the
1242 stack, so must check that we're not in the middle of a / */
1243 if ( unpack_only_one
1244 && (SP - PL_stack_base == start_sp_offset + 1)
1245 && (datumtype != '/') ) /* XXX can this be omitted */
1248 switch (howlen = symptr->howlen) {
1250 len = strend - strbeg; /* long enough */
1253 /* e_no_len and e_number */
1254 len = symptr->length;
1258 explicit_length = TRUE;
1260 beyond = s >= strend;
1262 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
1264 /* props nonzero means we can process this letter. */
1265 const long size = props & PACK_SIZE_MASK;
1266 const long howmany = (strend - s) / size;
1270 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
1271 if (len && unpack_only_one) len = 1;
1277 switch(TYPE_NO_ENDIANNESS(datumtype)) {
1279 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
1282 if (howlen == e_no_len)
1283 len = 16; /* len is not specified */
1291 tempsym_t savsym = *symptr;
1292 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
1293 symptr->flags |= group_modifiers;
1294 symptr->patend = savsym.grpend;
1295 symptr->previous = &savsym;
1299 symptr->patptr = savsym.grpbeg;
1300 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
1301 else symptr->flags &= ~FLAG_PARSE_UTF8;
1302 unpack_rec(symptr, s, strbeg, strend, &s);
1303 if (s == strend && savsym.howlen == e_star)
1304 break; /* No way to continue */
1307 savsym.flags = symptr->flags & ~group_modifiers;
1311 #ifdef PERL_PACK_CAN_DOT
1312 # ifdef PERL_PACK_CAN_SHRIEKSIGN
1313 case '.' | TYPE_IS_SHRIEKING:
1318 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1319 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
1320 #else /* PERL_PACK_CAN_SHRIEKSIGN */
1321 const bool u8 = utf8;
1323 if (howlen == e_star) from = strbeg;
1324 else if (len <= 0) from = s;
1326 tempsym_t *group = symptr;
1328 while (--len && group) group = group->previous;
1329 from = group ? strbeg + group->strbeg : strbeg;
1332 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
1333 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
1334 XPUSHs(sv_2mortal(sv));
1338 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1339 case '@' | TYPE_IS_SHRIEKING:
1342 s = strbeg + symptr->strbeg;
1343 #ifndef PERL_PACK_POSITIONS_ARE_BYTES_COMPATIBILITY
1344 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1345 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
1346 #else /* PERL_PACK_CAN_SHRIEKSIGN */
1352 Perl_croak(aTHX_ "'@' outside of string in unpack");
1357 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
1362 Perl_croak(aTHX_ "'@' outside of string in unpack");
1366 case 'X' | TYPE_IS_SHRIEKING:
1367 if (!len) /* Avoid division by 0 */
1369 #ifndef PERL_PACK_POSITIONS_ARE_BYTES_COMPATIBILITY
1371 const char *hop, *last;
1373 hop = last = strbeg;
1375 hop += UTF8SKIP(hop);
1382 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1387 len = (s - strbeg) % len;
1390 #ifndef PERL_PACK_POSITIONS_ARE_BYTES_COMPATIBILITY
1394 Perl_croak(aTHX_ "'X' outside of string in unpack");
1395 while (--s, UTF8_IS_CONTINUATION(*s)) {
1397 Perl_croak(aTHX_ "'X' outside of string in unpack");
1404 if (len > s - strbeg)
1405 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1409 case 'x' | TYPE_IS_SHRIEKING: {
1411 if (!len) /* Avoid division by 0 */
1413 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1414 else ai32 = (s - strbeg) % len;
1415 if (ai32 == 0) break;
1423 Perl_croak(aTHX_ "'x' outside of string in unpack");
1428 if (len > strend - s)
1429 Perl_croak(aTHX_ "'x' outside of string in unpack");
1434 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1440 /* Preliminary length estimate is assumed done in 'W' */
1441 if (len > strend - s) len = strend - s;
1444 #ifndef PERL_PACK_CHAR_TEMPLATE_BUG_COMPATIBILITY
1448 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1449 if (hop >= strend) {
1451 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1456 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1460 if (len > strend - s)
1463 if (datumtype == 'Z') {
1464 /* 'Z' strips stuff after first null */
1465 const char *ptr, *end;
1467 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
1468 sv = newSVpvn(s, ptr-s);
1469 if (howlen == e_star) /* exact for 'Z*' */
1470 len = ptr-s + (ptr != strend ? 1 : 0);
1471 } else if (datumtype == 'A') {
1472 /* 'A' strips both nulls and spaces */
1474 #ifndef PERL_PACK_CHAR_TEMPLATE_BUG_COMPATIBILITY
1475 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1476 for (ptr = s+len-1; ptr >= s; ptr--)
1477 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
1478 !is_utf8_space((U8 *) ptr)) break;
1479 if (ptr >= s) ptr += UTF8SKIP(ptr);
1482 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1486 for (ptr = s+len-1; ptr >= s; ptr--)
1487 if (*ptr != 0 && !isSPACE(*ptr)) break;
1490 sv = newSVpvn(s, ptr-s);
1491 } else sv = newSVpvn(s, len);
1493 #ifndef PERL_PACK_CHAR_TEMPLATE_BUG_COMPATIBILITY
1496 /* Undo any upgrade done due to need_utf8() */
1497 if (!(symptr->flags & FLAG_WAS_UTF8))
1498 sv_utf8_downgrade(sv, 0);
1501 XPUSHs(sv_2mortal(sv));
1507 if (howlen == e_star || len > (strend - s) * 8)
1508 len = (strend - s) * 8;
1512 Newxz(PL_bitcount, 256, char);
1513 for (bits = 1; bits < 256; bits++) {
1514 if (bits & 1) PL_bitcount[bits]++;
1515 if (bits & 2) PL_bitcount[bits]++;
1516 if (bits & 4) PL_bitcount[bits]++;
1517 if (bits & 8) PL_bitcount[bits]++;
1518 if (bits & 16) PL_bitcount[bits]++;
1519 if (bits & 32) PL_bitcount[bits]++;
1520 if (bits & 64) PL_bitcount[bits]++;
1521 if (bits & 128) PL_bitcount[bits]++;
1524 #ifndef PERL_PACK_CHAR_TEMPLATE_BUG_COMPATIBILITY
1526 while (len >= 8 && s < strend) {
1527 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
1533 cuv += PL_bitcount[*(U8 *)s++];
1536 if (len && s < strend) {
1538 #ifdef PERL_PACK_CHAR_TEMPLATE_BUG_COMPATIBILITY
1539 bits = SHIFT_BYTE(0, s, strend, datumtype);
1541 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1543 if (datumtype == 'b')
1545 if (bits & 1) cuv++;
1550 if (bits & 0x80) cuv++;
1557 sv = sv_2mortal(NEWSV(35, len ? len : 1));
1560 if (datumtype == 'b') {
1562 const I32 ai32 = len;
1563 for (len = 0; len < ai32; len++) {
1564 if (len & 7) bits >>= 1;
1565 #ifndef PERL_PACK_CHAR_TEMPLATE_BUG_COMPATIBILITY
1567 if (s >= strend) break;
1568 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1571 else bits = *(U8 *) s++;
1572 *str++ = bits & 1 ? '1' : '0';
1576 const I32 ai32 = len;
1577 for (len = 0; len < ai32; len++) {
1578 if (len & 7) bits <<= 1;
1579 #ifndef PERL_PACK_CHAR_TEMPLATE_BUG_COMPATIBILITY
1581 if (s >= strend) break;
1582 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1585 else bits = *(U8 *) s++;
1586 *str++ = bits & 0x80 ? '1' : '0';
1590 SvCUR_set(sv, str - SvPVX(sv));
1597 /* Preliminary length estimate, acceptable for utf8 too */
1598 if (howlen == e_star || len > (strend - s) * 2)
1599 len = (strend - s) * 2;
1600 sv = sv_2mortal(NEWSV(35, len ? len : 1));
1603 if (datumtype == 'h') {
1606 for (len = 0; len < ai32; len++) {
1607 if (len & 1) bits >>= 4;
1608 #ifndef PERL_PACK_CHAR_TEMPLATE_BUG_COMPATIBILITY
1610 if (s >= strend) break;
1611 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1614 else bits = * (U8 *) s++;
1615 *str++ = PL_hexdigit[bits & 15];
1619 const I32 ai32 = len;
1620 for (len = 0; len < ai32; len++) {
1621 if (len & 1) bits <<= 4;
1622 #ifndef PERL_PACK_CHAR_TEMPLATE_BUG_COMPATIBILITY
1624 if (s >= strend) break;
1625 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1628 else bits = *(U8 *) s++;
1629 *str++ = PL_hexdigit[(bits >> 4) & 15];
1633 SvCUR_set(sv, str - SvPVX(sv));
1639 #ifdef PERL_PACK_CHAR_TEMPLATE_BUG_COMPATIBILITY
1640 int aint = SHIFT_BYTE(0, s, strend, datumtype);
1642 int aint = SHIFT_BYTE(utf8, s, strend, datumtype);
1644 if (aint >= 128) /* fake up signed chars */
1647 PUSHs(sv_2mortal(newSViv((IV)aint)));
1648 else if (checksum > bits_in_uv)
1649 cdouble += (NV)aint;
1655 #ifdef PERL_PACK_CAN_W
1660 if (explicit_length && datumtype == 'C')
1661 #ifdef PERL_PACK_REVERSE_UTF8_MODE_COMPATIBILITY
1662 /* In maint, continue to switch from "character" mode */
1665 /* Switch to "character" mode */
1666 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1670 #ifndef PERL_PACK_CHAR_TEMPLATE_BUG_COMPATIBILITY
1671 if (datumtype == 'C' ?
1672 (symptr->flags & FLAG_DO_UTF8) &&
1673 !(symptr->flags & FLAG_WAS_UTF8) : utf8) {
1674 while (len-- > 0 && s < strend) {
1676 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1677 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1678 if (retlen == (STRLEN) -1 || retlen == 0)
1679 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1682 PUSHs(sv_2mortal(newSVuv((UV) val)));
1683 else if (checksum > bits_in_uv)
1684 cdouble += (NV) val;
1692 const U8 ch = *(U8 *) s++;
1693 PUSHs(sv_2mortal(newSVuv((UV) ch)));
1695 else if (checksum > bits_in_uv)
1696 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1698 while (len-- > 0) cuv += *(U8 *) s++;
1702 if (explicit_length) {
1703 #ifdef PERL_PACK_REVERSE_UTF8_MODE_COMPATIBILITY
1704 /* In maint, switch to "character" mode */
1707 /* Switch to "bytes in UTF-8" mode */
1708 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
1710 /* Should be impossible due to the need_utf8() test */
1711 Perl_croak(aTHX_ "U0 mode on a byte string");
1716 #ifdef PERL_PACK_CHAR_TEMPLATE_BUG_COMPATIBILITY
1720 if (len > strend - s) len = strend - s;
1722 if (len && unpack_only_one) len = 1;
1726 while (len-- > 0 && s < strend) {
1729 #ifndef PERL_PACK_CHAR_TEMPLATE_BUG_COMPATIBILITY
1731 U8 result[UTF8_MAXLEN];
1732 const char *ptr = s;
1734 /* Bug: warns about bad utf8 even if we are short on bytes
1735 and will break out of the loop */
1736 if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1739 len = UTF8SKIP(result);
1740 if (!uni_to_bytes(aTHX_ &ptr, strend,
1741 (char *) &result[1], len-1, 'U')) break;
1742 auv = utf8n_to_uvuni(result, len, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1747 auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1748 if (retlen == (STRLEN) -1 || retlen == 0)
1749 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1753 PUSHs(sv_2mortal(newSVuv((UV) auv)));
1754 else if (checksum > bits_in_uv)
1755 cdouble += (NV) auv;
1760 case 's' | TYPE_IS_SHRIEKING:
1761 #if SHORTSIZE != SIZE16
1764 SHIFT_VAR(utf8, s, strend, ashort, datumtype);
1765 DO_BO_UNPACK(ashort, s);
1767 PUSHs(sv_2mortal(newSViv((IV)ashort)));
1768 else if (checksum > bits_in_uv)
1769 cdouble += (NV)ashort;
1781 #if U16SIZE > SIZE16
1784 SHIFT16(utf8, s, strend, &ai16, datumtype);
1785 DO_BO_UNPACK(ai16, 16);
1786 #if U16SIZE > SIZE16
1791 PUSHs(sv_2mortal(newSViv((IV)ai16)));
1792 else if (checksum > bits_in_uv)
1793 cdouble += (NV)ai16;
1798 case 'S' | TYPE_IS_SHRIEKING:
1799 #if SHORTSIZE != SIZE16
1801 unsigned short aushort;
1802 SHIFT_VAR(utf8, s, strend, aushort, datumtype);
1803 DO_BO_UNPACK(aushort, s);
1805 PUSHs(sv_2mortal(newSVuv((UV) aushort)));
1806 else if (checksum > bits_in_uv)
1807 cdouble += (NV)aushort;
1820 #if U16SIZE > SIZE16
1823 SHIFT16(utf8, s, strend, &au16, datumtype);
1824 DO_BO_UNPACK(au16, 16);
1826 if (datumtype == 'n')
1827 au16 = PerlSock_ntohs(au16);
1830 if (datumtype == 'v')
1834 PUSHs(sv_2mortal(newSVuv((UV)au16)));
1835 else if (checksum > bits_in_uv)
1836 cdouble += (NV) au16;
1841 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1842 case 'v' | TYPE_IS_SHRIEKING:
1843 case 'n' | TYPE_IS_SHRIEKING:
1846 # if U16SIZE > SIZE16
1849 SHIFT16(utf8, s, strend, &ai16, datumtype);
1851 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
1852 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1853 # endif /* HAS_NTOHS */
1855 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
1856 ai16 = (I16) vtohs((U16) ai16);
1857 # endif /* HAS_VTOHS */
1859 PUSHs(sv_2mortal(newSViv((IV)ai16)));
1860 else if (checksum > bits_in_uv)
1861 cdouble += (NV) ai16;
1866 #endif /* PERL_PACK_CAN_SHRIEKSIGN */
1868 case 'i' | TYPE_IS_SHRIEKING:
1871 SHIFT_VAR(utf8, s, strend, aint, datumtype);
1872 DO_BO_UNPACK(aint, i);
1874 PUSHs(sv_2mortal(newSViv((IV)aint)));
1875 else if (checksum > bits_in_uv)
1876 cdouble += (NV)aint;
1882 case 'I' | TYPE_IS_SHRIEKING:
1885 SHIFT_VAR(utf8, s, strend, auint, datumtype);
1886 DO_BO_UNPACK(auint, i);
1888 PUSHs(sv_2mortal(newSVuv((UV)auint)));
1889 else if (checksum > bits_in_uv)
1890 cdouble += (NV)auint;
1898 SHIFT_VAR(utf8, s, strend, aiv, datumtype);
1899 #if IVSIZE == INTSIZE
1900 DO_BO_UNPACK(aiv, i);
1901 #elif IVSIZE == LONGSIZE
1902 DO_BO_UNPACK(aiv, l);
1903 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
1904 DO_BO_UNPACK(aiv, 64);
1906 Perl_croak(aTHX_ "'j' not supported on this platform");
1909 PUSHs(sv_2mortal(newSViv(aiv)));
1910 else if (checksum > bits_in_uv)
1919 SHIFT_VAR(utf8, s, strend, auv, datumtype);
1920 #if IVSIZE == INTSIZE
1921 DO_BO_UNPACK(auv, i);
1922 #elif IVSIZE == LONGSIZE
1923 DO_BO_UNPACK(auv, l);
1924 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
1925 DO_BO_UNPACK(auv, 64);
1927 Perl_croak(aTHX_ "'J' not supported on this platform");
1930 PUSHs(sv_2mortal(newSVuv(auv)));
1931 else if (checksum > bits_in_uv)
1937 case 'l' | TYPE_IS_SHRIEKING:
1938 #if LONGSIZE != SIZE32
1941 SHIFT_VAR(utf8, s, strend, along, datumtype);
1942 DO_BO_UNPACK(along, l);
1944 PUSHs(sv_2mortal(newSViv((IV)along)));
1945 else if (checksum > bits_in_uv)
1946 cdouble += (NV)along;
1957 #if U32SIZE > SIZE32
1960 SHIFT32(utf8, s, strend, &ai32, datumtype);
1961 DO_BO_UNPACK(ai32, 32);
1962 #if U32SIZE > SIZE32
1963 if (ai32 > 2147483647) ai32 -= 4294967296;
1966 PUSHs(sv_2mortal(newSViv((IV)ai32)));
1967 else if (checksum > bits_in_uv)
1968 cdouble += (NV)ai32;
1973 case 'L' | TYPE_IS_SHRIEKING:
1974 #if LONGSIZE != SIZE32
1976 unsigned long aulong;
1977 SHIFT_VAR(utf8, s, strend, aulong, datumtype);
1978 DO_BO_UNPACK(aulong, l);
1980 PUSHs(sv_2mortal(newSVuv((UV)aulong)));
1981 else if (checksum > bits_in_uv)
1982 cdouble += (NV)aulong;
1995 #if U32SIZE > SIZE32
1998 SHIFT32(utf8, s, strend, &au32, datumtype);
1999 DO_BO_UNPACK(au32, 32);
2001 if (datumtype == 'N')
2002 au32 = PerlSock_ntohl(au32);
2005 if (datumtype == 'V')
2009 PUSHs(sv_2mortal(newSVuv((UV)au32)));
2010 else if (checksum > bits_in_uv)
2011 cdouble += (NV)au32;
2016 #ifdef PERL_PACK_CAN_SHRIEKSIGN
2017 case 'V' | TYPE_IS_SHRIEKING:
2018 case 'N' | TYPE_IS_SHRIEKING:
2021 # if U32SIZE > SIZE32
2024 SHIFT32(utf8, s, strend, &ai32, datumtype);
2026 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
2027 ai32 = (I32)PerlSock_ntohl((U32)ai32);
2030 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
2031 ai32 = (I32)vtohl((U32)ai32);
2034 PUSHs(sv_2mortal(newSViv((IV)ai32)));
2035 else if (checksum > bits_in_uv)
2036 cdouble += (NV)ai32;
2041 #endif /* PERL_PACK_CAN_SHRIEKSIGN */
2045 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
2046 DO_BO_UNPACK_PC(aptr);
2047 /* newSVpv generates undef if aptr is NULL */
2048 PUSHs(sv_2mortal(newSVpv(aptr, 0)));
2056 while (len > 0 && s < strend) {
2058 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
2059 auv = (auv << 7) | (ch & 0x7f);
2060 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
2063 PUSHs(sv_2mortal(newSVuv(auv)));
2068 if (++bytes >= sizeof(UV)) { /* promote to string */
2071 sv = Perl_newSVpvf(aTHX_ "%.*"UVf, (int)TYPE_DIGITS(UV), auv);
2072 while (s < strend) {
2073 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
2074 sv = mul128(sv, (U8)(ch & 0x7f));
2080 t = SvPV_nolen_const(sv);
2083 sv_chop(sv, (char *)t);
2084 PUSHs(sv_2mortal(sv));
2089 if ((s >= strend) && bytes)
2090 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
2094 if (symptr->howlen == e_star)
2095 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
2097 if (s + sizeof(char*) <= strend) {
2099 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
2100 DO_BO_UNPACK_PC(aptr);
2101 /* newSVpvn generates undef if aptr is NULL */
2102 PUSHs(sv_2mortal(newSVpvn(aptr, len)));
2109 SHIFT_VAR(utf8, s, strend, aquad, datumtype);
2110 DO_BO_UNPACK(aquad, 64);
2112 PUSHs(sv_2mortal(aquad >= IV_MIN && aquad <= IV_MAX ?
2113 newSViv((IV)aquad) : newSVnv((NV)aquad)));
2114 else if (checksum > bits_in_uv)
2115 cdouble += (NV)aquad;
2123 SHIFT_VAR(utf8, s, strend, auquad, datumtype);
2124 DO_BO_UNPACK(auquad, 64);
2126 PUSHs(sv_2mortal(auquad <= UV_MAX ?
2127 newSVuv((UV)auquad):newSVnv((NV)auquad)));
2128 else if (checksum > bits_in_uv)
2129 cdouble += (NV)auquad;
2134 #endif /* HAS_QUAD */
2135 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
2139 SHIFT_VAR(utf8, s, strend, afloat, datumtype);
2140 DO_BO_UNPACK_N(afloat, float);
2142 PUSHs(sv_2mortal(newSVnv((NV)afloat)));
2150 SHIFT_VAR(utf8, s, strend, adouble, datumtype);
2151 DO_BO_UNPACK_N(adouble, double);
2153 PUSHs(sv_2mortal(newSVnv((NV)adouble)));
2161 SHIFT_VAR(utf8, s, strend, anv, datumtype);
2162 DO_BO_UNPACK_N(anv, NV);
2164 PUSHs(sv_2mortal(newSVnv(anv)));
2169 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2172 long double aldouble;
2173 SHIFT_VAR(utf8, s, strend, aldouble, datumtype);
2174 DO_BO_UNPACK_N(aldouble, long double);
2176 PUSHs(sv_2mortal(newSVnv((NV)aldouble)));
2178 cdouble += aldouble;
2184 * Initialise the decode mapping. By using a table driven
2185 * algorithm, the code will be character-set independent
2186 * (and just as fast as doing character arithmetic)
2188 if (PL_uudmap['M'] == 0) {
2191 for (i = 0; i < sizeof(PL_uuemap); ++i)
2192 PL_uudmap[(U8)PL_uuemap[i]] = i;
2194 * Because ' ' and '`' map to the same value,
2195 * we need to decode them both the same.
2200 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
2201 sv = sv_2mortal(NEWSV(42, l));
2202 if (l) SvPOK_on(sv);
2205 while (next_uni_uu(aTHX_ &s, strend, &len)) {
2211 next_uni_uu(aTHX_ &s, strend, &a);
2212 next_uni_uu(aTHX_ &s, strend, &b);
2213 next_uni_uu(aTHX_ &s, strend, &c);
2214 next_uni_uu(aTHX_ &s, strend, &d);
2215 hunk[0] = (char)((a << 2) | (b >> 4));
2216 hunk[1] = (char)((b << 4) | (c >> 2));
2217 hunk[2] = (char)((c << 6) | d);
2218 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2226 /* possible checksum byte */
2227 const char *skip = s+UTF8SKIP(s);
2228 if (skip < strend && *skip == '\n')
2234 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
2239 len = PL_uudmap[*(U8*)s++] & 077;
2241 if (s < strend && ISUUCHAR(*s))
2242 a = PL_uudmap[*(U8*)s++] & 077;
2245 if (s < strend && ISUUCHAR(*s))
2246 b = PL_uudmap[*(U8*)s++] & 077;
2249 if (s < strend && ISUUCHAR(*s))
2250 c = PL_uudmap[*(U8*)s++] & 077;
2253 if (s < strend && ISUUCHAR(*s))
2254 d = PL_uudmap[*(U8*)s++] & 077;
2257 hunk[0] = (char)((a << 2) | (b >> 4));
2258 hunk[1] = (char)((b << 4) | (c >> 2));
2259 hunk[2] = (char)((c << 6) | d);
2260 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2265 else /* possible checksum byte */
2266 if (s + 1 < strend && s[1] == '\n')
2275 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
2276 (checksum > bits_in_uv &&
2277 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
2280 anv = (NV) (1 << (checksum & 15));
2281 while (checksum >= 16) {
2285 while (cdouble < 0.0)
2287 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
2288 sv = newSVnv(cdouble);
2291 if (checksum < bits_in_uv) {
2292 UV mask = ((UV)1 << checksum) - 1;
2297 XPUSHs(sv_2mortal(sv));
2301 if (symptr->flags & FLAG_SLASH){
2302 if (SP - PL_stack_base - start_sp_offset <= 0)
2303 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
2304 if( next_symbol(symptr) ){
2305 if( symptr->howlen == e_number )
2306 Perl_croak(aTHX_ "Count after length/code in unpack" );
2308 /* ...end of char buffer then no decent length available */
2309 Perl_croak(aTHX_ "length/code after end of string in unpack" );
2311 /* take top of stack (hope it's numeric) */
2314 Perl_croak(aTHX_ "Negative '/' count in unpack" );
2317 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
2319 datumtype = symptr->code;
2320 explicit_length = FALSE;
2328 return SP - PL_stack_base - start_sp_offset;
2335 I32 gimme = GIMME_V;
2338 const char *pat = SvPV_const(left, llen);
2339 const char *s = SvPV_const(right, rlen);
2340 const char *strend = s + rlen;
2341 const char *patend = pat + llen;
2345 cnt = unpackstring((char *)pat, (char *)patend, (char *)s, (char *)strend,
2346 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
2347 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
2350 if ( !cnt && gimme == G_SCALAR )
2351 PUSHs(&PL_sv_undef);
2356 doencodes(U8 *h, const char *s, I32 len)
2358 *h++ = PL_uuemap[len];
2360 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2361 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
2362 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
2363 *h++ = PL_uuemap[(077 & (s[2] & 077))];
2368 const char r = (len > 1 ? s[1] : '\0');
2369 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2370 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
2371 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
2372 *h++ = PL_uuemap[0];
2379 S_is_an_int(pTHX_ const char *s, STRLEN l)
2381 SV *result = newSVpvn(s, l);
2382 char *const result_c = SvPV_nolen(result); /* convenience */
2383 char *out = result_c;
2393 SvREFCNT_dec(result);
2416 SvREFCNT_dec(result);
2422 SvCUR_set(result, out - result_c);
2426 /* pnum must be '\0' terminated */
2428 S_div128(pTHX_ SV *pnum, bool *done)
2431 char * const s = SvPV(pnum, len);
2437 const int i = m * 10 + (*t - '0');
2438 const int r = (i >> 7); /* r < 10 */
2446 SvCUR_set(pnum, (STRLEN) (t - s));
2451 =for apidoc pack_cat
2453 The engine implementing pack() Perl function. Note: parameters next_in_list and
2454 flags are not used. This call should not be used; use packlist instead.
2460 Perl_pack_cat(pTHX_ SV *cat, char *pat, char *patend, register SV **beglist,
2461 SV **endlist, SV ***next_in_list, U32 flags)
2464 PERL_UNUSED_ARG(next_in_list);
2465 PERL_UNUSED_ARG(flags);
2467 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
2469 (void)pack_rec( cat, &sym, beglist, endlist );
2474 =for apidoc packlist
2476 The engine implementing pack() Perl function.
2482 Perl_packlist(pTHX_ SV *cat, char *pat, char *patend, register SV **beglist,
2488 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
2490 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
2491 Also make sure any UTF8 flag is loaded */
2492 SvPV_force(cat, no_len);
2493 if (DO_UTF8(cat)) sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
2495 (void)pack_rec( cat, &sym, beglist, endlist );
2498 /* like sv_utf8_upgrade, but also repoint the group start markers */
2500 marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2503 const char *from_ptr, *from_start, *from_end, **marks, **m;
2504 char *to_start, *to_ptr;
2506 if (SvUTF8(sv)) return;
2508 from_start = SvPVX(sv);
2509 from_end = from_start + SvCUR(sv);
2510 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2511 if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
2512 if (from_ptr == from_end) {
2513 /* Simple case: no character needs to be changed */
2518 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
2519 New('U', to_start, len, char);
2520 Copy(from_start, to_start, from_ptr-from_start, char);
2521 to_ptr = to_start + (from_ptr-from_start);
2523 New('U', marks, sym_ptr->level+2, const char *);
2524 for (group=sym_ptr; group; group = group->previous)
2525 marks[group->level] = from_start + group->strbeg;
2526 marks[sym_ptr->level+1] = from_end+1;
2527 for (m = marks; *m < from_ptr; m++)
2528 *m = to_start + (*m-from_start);
2530 for (;from_ptr < from_end; from_ptr++) {
2531 while (*m == from_ptr) *m++ = to_ptr;
2532 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
2536 while (*m == from_ptr) *m++ = to_ptr;
2537 if (m != marks + sym_ptr->level+1) {
2540 Perl_croak(aTHX_ "Assertion: marks beyond string end");
2542 for (group=sym_ptr; group; group = group->previous)
2543 group->strbeg = marks[group->level] - to_start;
2548 SvLEN(sv) += SvIVX(sv);
2549 from_start -= SvIVX(sv);
2552 SvFLAGS(sv) &= ~SVf_OOK;
2555 Safefree(from_start);
2556 SvPVX(sv) = to_start;
2557 SvCUR(sv) = to_ptr - to_start;
2562 /* Exponential string grower. Makes string extension effectively O(n)
2563 needed says how many extra bytes we need (not counting the final '\0')
2564 Only grows the string if there is an actual lack of space
2567 S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
2568 const STRLEN cur = SvCUR(sv);
2569 const STRLEN len = SvLEN(sv);
2571 if (len - cur > needed) return SvPVX(sv);
2572 extend = needed > len ? needed : len;
2573 return SvGROW(sv, len+extend+1);
2578 S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
2580 tempsym_t lookahead;
2581 I32 items = endlist - beglist;
2582 bool found = next_symbol(symptr);
2583 #ifdef PERL_PACK_NEVER_UPGRADE_COMPATIBILITY
2584 const bool utf8 = 0;
2586 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
2588 bool warn_utf8 = ckWARN(WARN_UTF8);
2590 if (symptr->level == 0 && found && symptr->code == 'U') {
2591 marked_upgrade(aTHX_ cat, symptr);
2592 symptr->flags |= FLAG_DO_UTF8;
2593 #ifndef PERL_PACK_NEVER_UPGRADE_COMPATIBILITY
2597 symptr->strbeg = SvCUR(cat);
2603 SV *lengthcode = NULL;
2604 I32 datumtype = symptr->code;
2605 howlen_t howlen = symptr->howlen;
2606 char *start = SvPVX(cat);
2607 char *cur = start + SvCUR(cat);
2609 #define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2613 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2617 /* e_no_len and e_number */
2618 len = symptr->length;
2623 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
2625 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2626 /* We can process this letter. */
2627 STRLEN size = props & PACK_SIZE_MASK;
2628 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2632 /* Look ahead for next symbol. Do we have code/code? */
2633 lookahead = *symptr;
2634 found = next_symbol(&lookahead);
2635 if (symptr->flags & FLAG_SLASH) {
2637 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
2638 if (strchr("aAZ", lookahead.code)) {
2639 if (lookahead.howlen == e_number) count = lookahead.length;
2642 if (SvGAMAGIC(*beglist)) {
2643 /* Avoid reading the active data more than once
2644 by copying it to a temporary. */
2646 const char *const pv = SvPV_const(*beglist, len);
2647 SV *const temp = sv_2mortal(newSVpvn(pv, len));
2648 if (SvUTF8(*beglist))
2652 count = DO_UTF8(*beglist) ?
2653 sv_len_utf8(*beglist) : sv_len(*beglist);
2656 if (lookahead.code == 'Z') count++;
2659 if (lookahead.howlen == e_number && lookahead.length < items)
2660 count = lookahead.length;
2663 lookahead.howlen = e_number;
2664 lookahead.length = count;
2665 lengthcode = sv_2mortal(newSViv(count));
2668 /* Code inside the switch must take care to properly update
2669 cat (CUR length and '\0' termination) if it updated *cur and
2670 doesn't simply leave using break */
2671 switch(TYPE_NO_ENDIANNESS(datumtype)) {
2673 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2674 (int) TYPE_NO_MODIFIERS(datumtype));
2676 Perl_croak(aTHX_ "'%%' may not be used in pack");
2679 #ifdef PERL_PACK_CAN_DOT
2680 # ifdef PERL_PACK_CAN_SHRIEKSIGN
2681 case '.' | TYPE_IS_SHRIEKING:
2684 if (howlen == e_star) from = start;
2685 else if (len == 0) from = cur;
2687 tempsym_t *group = symptr;
2689 while (--len && group) group = group->previous;
2690 from = group ? start + group->strbeg : start;
2693 len = SvIV(fromstr);
2696 #ifdef PERL_PACK_CAN_SHRIEKSIGN
2697 case '@' | TYPE_IS_SHRIEKING:
2700 from = start + symptr->strbeg;
2701 #ifdef PERL_PACK_CAN_DOT
2704 #ifdef PERL_PACK_CAN_SHRIEKSIGN
2705 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
2706 #else /* PERL_PACK_CAN_SHRIEKSIGN */
2710 while (len && from < cur) {
2711 from += UTF8SKIP(from);
2715 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2717 /* Here we know from == cur */
2719 GROWING(0, cat, start, cur, len);
2720 Zero(cur, len, char);
2722 } else if (from < cur) {
2725 } else goto no_change;
2733 if (len > 0) goto grow;
2734 if (len == 0) goto no_change;
2741 tempsym_t savsym = *symptr;
2742 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2743 symptr->flags |= group_modifiers;
2744 symptr->patend = savsym.grpend;
2746 symptr->previous = &lookahead;
2749 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2750 else symptr->flags &= ~FLAG_PARSE_UTF8;
2751 was_utf8 = SvUTF8(cat);
2752 symptr->patptr = savsym.grpbeg;
2753 beglist = pack_rec(cat, symptr, beglist, endlist);
2754 #ifndef PERL_PACK_NEVER_UPGRADE_COMPATIBILITY
2755 if (SvUTF8(cat) != was_utf8)
2756 /* This had better be an upgrade while in utf8==0 mode */
2760 if (savsym.howlen == e_star && beglist == endlist)
2761 break; /* No way to continue */
2763 lookahead.flags = symptr->flags & ~group_modifiers;
2766 case 'X' | TYPE_IS_SHRIEKING:
2767 if (!len) /* Avoid division by 0 */
2774 hop += UTF8SKIP(hop);
2781 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2785 len = (cur-start) % len;
2789 if (len < 1) goto no_change;
2793 Perl_croak(aTHX_ "'%c' outside of string in pack",
2794 (int) TYPE_NO_MODIFIERS(datumtype));
2795 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2797 Perl_croak(aTHX_ "'%c' outside of string in pack",
2798 (int) TYPE_NO_MODIFIERS(datumtype));
2804 if (cur - start < len)
2805 Perl_croak(aTHX_ "'%c' outside of string in pack",
2806 (int) TYPE_NO_MODIFIERS(datumtype));
2809 if (cur < start+symptr->strbeg) {
2810 /* Make sure group starts don't point into the void */
2812 const STRLEN length = cur-start;
2813 for (group = symptr;
2814 group && length < group->strbeg;
2815 group = group->previous) group->strbeg = length;
2816 lookahead.strbeg = length;
2819 case 'x' | TYPE_IS_SHRIEKING: {
2821 if (!len) /* Avoid division by 0 */
2823 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
2824 else ai32 = (cur - start) % len;
2825 if (ai32 == 0) goto no_change;
2837 aptr = (char *) SvPV_const(fromstr, fromlen);
2838 #ifndef PERL_PACK_NEVER_UPGRADE_COMPATIBILITY
2839 if (DO_UTF8(fromstr)) {
2840 const char *end, *s;
2842 if (!utf8 && !SvUTF8(cat)) {
2843 marked_upgrade(aTHX_ cat, symptr);
2844 lookahead.flags |= FLAG_DO_UTF8;
2845 lookahead.strbeg = symptr->strbeg;
2848 cur = start + SvCUR(cat);
2850 if (howlen == e_star) {
2851 if (utf8) goto string_copy;
2855 end = aptr + fromlen;
2856 fromlen = datumtype == 'Z' ? len-1 : len;
2857 while ((I32) fromlen > 0 && s < end) {
2862 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2865 if (datumtype == 'Z') len++;
2871 fromlen = len - fromlen;
2872 if (datumtype == 'Z') fromlen--;
2873 if (howlen == e_star) {
2875 if (datumtype == 'Z') len++;
2877 GROWING(0, cat, start, cur, len);
2878 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
2879 datumtype | TYPE_IS_PACK))
2880 Perl_croak(aTHX_ "Perl bug: predicted utf8 length not available");
2884 if (howlen == e_star) {
2886 if (datumtype == 'Z') len++;
2888 if (len <= (I32) fromlen) {
2890 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2892 /* assumes a byte expands to at most UTF8_EXPAND bytes on
2894 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2895 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
2897 while (fromlen > 0) {
2898 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
2905 #ifndef PERL_PACK_NEVER_UPGRADE_COMPATIBILITY
2908 if (howlen == e_star) {
2910 if (datumtype == 'Z') len++;
2912 if (len <= (I32) fromlen) {
2914 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2916 GROWING(0, cat, start, cur, len);
2917 Copy(aptr, cur, fromlen, char);
2921 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2927 const char *str, *end;
2934 str = SvPV_const(fromstr, fromlen);
2935 end = str + fromlen;
2936 #ifndef PERL_PACK_NEVER_UPGRADE_COMPATIBILITY
2937 if (DO_UTF8(fromstr)) {
2939 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2943 utf8_source = FALSE;
2944 utf8_flags = 0; /* Unused, but keep compilers happy */
2946 if (howlen == e_star) len = fromlen;
2947 field_len = (len+7)/8;
2948 GROWING(utf8, cat, start, cur, field_len);
2949 if (len > (I32)fromlen) len = fromlen;
2952 if (datumtype == 'B')
2956 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2958 } else bits |= *str++ & 1;
2959 if (l & 7) bits <<= 1;
2961 PUSH_BYTE(utf8, cur, bits);
2966 /* datumtype == 'b' */
2970 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2971 if (val & 1) bits |= 0x80;
2972 } else if (*str++ & 1)
2974 if (l & 7) bits >>= 1;
2976 PUSH_BYTE(utf8, cur, bits);
2982 if (datumtype == 'B')
2983 bits <<= 7 - (l & 7);
2985 bits >>= 7 - (l & 7);
2986 PUSH_BYTE(utf8, cur, bits);
2989 /* Determine how many chars are left in the requested field */
2991 if (howlen == e_star) field_len = 0;
2992 else field_len -= l;
2993 Zero(cur, field_len, char);
3006 str = SvPV(fromstr, fromlen);
3007 end = str + fromlen;
3008 #ifndef PERL_PACK_NEVER_UPGRADE_COMPATIBILITY
3009 if (DO_UTF8(fromstr)) {
3011 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
3015 utf8_source = FALSE;
3016 utf8_flags = 0; /* Unused, but keep compilers happy */
3018 if (howlen == e_star) len = fromlen;
3019 field_len = (len+1)/2;
3020 GROWING(utf8, cat, start, cur, field_len);
3021 if (!utf8 && len > (I32)fromlen) len = fromlen;
3024 if (datumtype == 'H')
3028 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
3029 if (val < 256 && isALPHA(val))
3030 bits |= (val + 9) & 0xf;
3033 } else if (isALPHA(*str))
3034 bits |= (*str++ + 9) & 0xf;
3036 bits |= *str++ & 0xf;
3037 if (l & 1) bits <<= 4;
3039 PUSH_BYTE(utf8, cur, bits);
3047 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
3048 if (val < 256 && isALPHA(val))
3049 bits |= ((val + 9) & 0xf) << 4;
3051 bits |= (val & 0xf) << 4;
3052 } else if (isALPHA(*str))
3053 bits |= ((*str++ + 9) & 0xf) << 4;
3055 bits |= (*str++ & 0xf) << 4;
3056 if (l & 1) bits >>= 4;
3058 PUSH_BYTE(utf8, cur, bits);
3064 PUSH_BYTE(utf8, cur, bits);
3067 /* Determine how many chars are left in the requested field */
3069 if (howlen == e_star) field_len = 0;
3070 else field_len -= l;
3071 Zero(cur, field_len, char);
3079 aiv = SvIV(fromstr);
3080 if ((-128 > aiv || aiv > 127) &&
3082 Perl_warner(aTHX_ packWARN(WARN_PACK),
3083 "Character in 'c' format wrapped in pack");
3084 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
3089 #ifndef PERL_PACK_NEVER_UPGRADE_COMPATIBILITY
3090 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
3094 GROWING(0, cat, start, cur, len);
3098 aiv = SvIV(fromstr);
3099 if ((0 > aiv || aiv > 0xff) &&
3101 Perl_warner(aTHX_ packWARN(WARN_PACK),
3102 "Character in 'C' format wrapped in pack");
3103 *cur++ = (char)(aiv & 0xff);
3106 #ifdef PERL_PACK_CAN_W
3109 U8 in_bytes = IN_BYTES;
3111 end = start+SvLEN(cat)-1;
3112 if (utf8) end -= UTF8_MAXLEN-1;
3116 auv = SvUV(fromstr);
3117 if (in_bytes) auv = auv % 0x100;
3122 SvCUR(cat) = cur - start;
3124 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3125 end = start+SvLEN(cat)-UTF8_MAXLEN;
3127 cur = (char *) uvuni_to_utf8_flags((U8 *) cur,
3130 0 : UNICODE_ALLOW_ANY);
3135 SvCUR(cat) = cur - start;
3136 marked_upgrade(aTHX_ cat, symptr);
3137 lookahead.flags |= FLAG_DO_UTF8;
3138 lookahead.strbeg = symptr->strbeg;
3141 cur = start + SvCUR(cat);
3142 end = start+SvLEN(cat)-UTF8_MAXLEN;
3145 if (ckWARN(WARN_PACK))
3146 Perl_warner(aTHX_ packWARN(WARN_PACK),
3147 "Character in 'W' format wrapped in pack");
3152 SvCUR(cat) = cur - start;
3153 GROWING(0, cat, start, cur, len+1);
3154 end = start+SvLEN(cat)-1;
3156 *(U8 *) cur++ = auv;
3166 #ifndef PERL_PACK_NEVER_UPGRADE_COMPATIBILITY
3167 if (!(symptr->flags & FLAG_DO_UTF8)) {
3168 marked_upgrade(aTHX_ cat, symptr);
3169 lookahead.flags |= FLAG_DO_UTF8;
3170 lookahead.strbeg = symptr->strbeg;
3177 end = start+SvLEN(cat);
3178 if (!utf8) end -= UTF8_MAXLEN;
3182 auv = SvUV(fromstr);
3184 U8 buffer[UTF8_MAXLEN], *endb;
3185 endb = uvuni_to_utf8_flags(buffer, auv,
3187 0 : UNICODE_ALLOW_ANY);
3188 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
3190 SvCUR(cat) = cur - start;
3191 GROWING(0, cat, start, cur,
3192 len+(endb-buffer)*UTF8_EXPAND);
3193 end = start+SvLEN(cat);
3195 bytes_to_uni(aTHX_ buffer, endb-buffer, &cur);
3199 SvCUR(cat) = cur - start;
3200 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3201 end = start+SvLEN(cat)-UTF8_MAXLEN;
3203 cur = (char *) uvuni_to_utf8_flags((U8 *) cur, auv,
3205 0 : UNICODE_ALLOW_ANY);
3210 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
3216 anv = SvNV(fromstr);
3218 /* VOS does not automatically map a floating-point overflow
3219 during conversion from double to float into infinity, so we
3220 do it by hand. This code should either be generalized for
3221 any OS that needs it, or removed if and when VOS implements
3222 posix-976 (suggestion to support mapping to infinity).
3223 Paul.Green@stratus.com 02-04-02. */
3225 afloat = _float_constants[0]; /* single prec. inf. */
3226 else if (anv < -FLT_MAX)
3227 afloat = _float_constants[0]; /* single prec. inf. */
3228 else afloat = (float) anv;
3230 # if defined(VMS) && !defined(__IEEE_FP)
3231 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
3232 * on Alpha; fake it if we don't have them.
3236 else if (anv < -FLT_MAX)
3238 else afloat = (float)anv;
3240 afloat = (float)anv;
3242 #endif /* __VOS__ */
3243 DO_BO_PACK_N(afloat, float);
3244 PUSH_VAR(utf8, cur, afloat);
3252 anv = SvNV(fromstr);
3254 /* VOS does not automatically map a floating-point overflow
3255 during conversion from long double to double into infinity,
3256 so we do it by hand. This code should either be generalized
3257 for any OS that needs it, or removed if and when VOS
3258 implements posix-976 (suggestion to support mapping to
3259 infinity). Paul.Green@stratus.com 02-04-02. */
3261 adouble = _double_constants[0]; /* double prec. inf. */
3262 else if (anv < -DBL_MAX)
3263 adouble = _double_constants[0]; /* double prec. inf. */
3264 else adouble = (double) anv;
3266 # if defined(VMS) && !defined(__IEEE_FP)
3267 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
3268 * on Alpha; fake it if we don't have them.
3272 else if (anv < -DBL_MAX)
3274 else adouble = (double)anv;
3276 adouble = (double)anv;
3278 #endif /* __VOS__ */
3279 DO_BO_PACK_N(adouble, double);
3280 PUSH_VAR(utf8, cur, adouble);
3285 Zero(&anv, 1, NV); /* can be long double with unused bits */
3288 anv = SvNV(fromstr);
3289 DO_BO_PACK_N(anv, NV);
3290 PUSH_VAR(utf8, cur, anv);
3294 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
3296 long double aldouble;
3297 /* long doubles can have unused bits, which may be nonzero */
3298 Zero(&aldouble, 1, long double);
3301 aldouble = (long double)SvNV(fromstr);
3302 DO_BO_PACK_N(aldouble, long double);
3303 PUSH_VAR(utf8, cur, aldouble);
3308 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3309 case 'n' | TYPE_IS_SHRIEKING:
3315 ai16 = (I16)SvIV(fromstr);
3317 ai16 = PerlSock_htons(ai16);
3319 PUSH16(utf8, cur, &ai16);
3322 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3323 case 'v' | TYPE_IS_SHRIEKING:
3329 ai16 = (I16)SvIV(fromstr);
3333 PUSH16(utf8, cur, &ai16);
3336 case 'S' | TYPE_IS_SHRIEKING:
3337 #if SHORTSIZE != SIZE16
3339 unsigned short aushort;
3341 aushort = SvUV(fromstr);
3342 DO_BO_PACK(aushort, s);
3343 PUSH_VAR(utf8, cur, aushort);
3353 au16 = (U16)SvUV(fromstr);
3354 DO_BO_PACK(au16, 16);
3355 PUSH16(utf8, cur, &au16);
3358 case 's' | TYPE_IS_SHRIEKING:
3359 #if SHORTSIZE != SIZE16
3363 ashort = SvIV(fromstr);
3364 DO_BO_PACK(ashort, s);
3365 PUSH_VAR(utf8, cur, ashort);
3375 ai16 = (I16)SvIV(fromstr);
3376 DO_BO_PACK(ai16, 16);
3377 PUSH16(utf8, cur, &ai16);
3381 case 'I' | TYPE_IS_SHRIEKING:
3385 auint = SvUV(fromstr);
3386 DO_BO_PACK(auint, i);
3387 PUSH_VAR(utf8, cur, auint);
3394 aiv = SvIV(fromstr);
3395 #if IVSIZE == INTSIZE
3397 #elif IVSIZE == LONGSIZE
3399 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
3400 DO_BO_PACK(aiv, 64);
3402 Perl_croak(aTHX_ "'j' not supported on this platform");
3404 PUSH_VAR(utf8, cur, aiv);
3411 auv = SvUV(fromstr);
3412 #if UVSIZE == INTSIZE
3414 #elif UVSIZE == LONGSIZE
3416 #elif defined(HAS_QUAD) && UVSIZE == U64SIZE
3417 DO_BO_PACK(auv, 64);
3419 Perl_croak(aTHX_ "'J' not supported on this platform");
3421 PUSH_VAR(utf8, cur, auv);
3428 anv = SvNV(fromstr);
3432 SvCUR(cat) = cur - start;
3433 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
3436 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
3437 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
3438 any negative IVs will have already been got by the croak()
3439 above. IOK is untrue for fractions, so we test them
3440 against UV_MAX_P1. */
3441 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
3442 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
3443 char *in = buf + sizeof(buf);
3444 UV auv = SvUV(fromstr);
3447 *--in = (char)((auv & 0x7f) | 0x80);
3450 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
3451 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3452 in, (buf + sizeof(buf)) - in);
3453 } else if (SvPOKp(fromstr))
3455 else if (SvNOKp(fromstr)) {
3456 /* 10**NV_MAX_10_EXP is the largest power of 10
3457 so 10**(NV_MAX_10_EXP+1) is definately unrepresentable
3458 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
3459 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
3460 And with that many bytes only Inf can overflow.
3461 Some C compilers are strict about integral constant
3462 expressions so we conservatively divide by a slightly
3463 smaller integer instead of multiplying by the exact
3464 floating-point value.
3466 #ifdef NV_MAX_10_EXP
3467 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
3468 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
3470 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
3471 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
3473 char *in = buf + sizeof(buf);
3475 anv = Perl_floor(anv);
3477 const NV next = Perl_floor(anv / 128);
3478 if (in <= buf) /* this cannot happen ;-) */
3479 Perl_croak(aTHX_ "Cannot compress integer in pack");
3480 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
3483 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
3484 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3485 in, (buf + sizeof(buf)) - in);
3494 /* Copy string and check for compliance */
3495 from = SvPV_const(fromstr, len);
3496 if ((norm = is_an_int(from, len)) == NULL)
3497 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
3499 Newx(result, len, char);
3502 while (!done) *--in = div128(norm, &done) | 0x80;
3503 result[len - 1] &= 0x7F; /* clear continue bit */
3504 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3505 in, (result + len) - in);
3507 SvREFCNT_dec(norm); /* free norm */
3512 case 'i' | TYPE_IS_SHRIEKING:
3516 aint = SvIV(fromstr);
3517 DO_BO_PACK(aint, i);
3518 PUSH_VAR(utf8, cur, aint);
3521 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3522 case 'N' | TYPE_IS_SHRIEKING:
3528 au32 = SvUV(fromstr);
3530 au32 = PerlSock_htonl(au32);
3532 PUSH32(utf8, cur, &au32);
3535 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3536 case 'V' | TYPE_IS_SHRIEKING:
3542 au32 = SvUV(fromstr);
3546 PUSH32(utf8, cur, &au32);
3549 case 'L' | TYPE_IS_SHRIEKING:
3550 #if LONGSIZE != SIZE32
3552 unsigned long aulong;
3554 aulong = SvUV(fromstr);
3555 DO_BO_PACK(aulong, l);
3556 PUSH_VAR(utf8, cur, aulong);
3566 au32 = SvUV(fromstr);
3567 DO_BO_PACK(au32, 32);
3568 PUSH32(utf8, cur, &au32);
3571 case 'l' | TYPE_IS_SHRIEKING:
3572 #if LONGSIZE != SIZE32
3576 along = SvIV(fromstr);
3577 DO_BO_PACK(along, l);
3578 PUSH_VAR(utf8, cur, along);
3588 ai32 = SvIV(fromstr);
3589 DO_BO_PACK(ai32, 32);
3590 PUSH32(utf8, cur, &ai32);
3598 auquad = (Uquad_t) SvUV(fromstr);
3599 DO_BO_PACK(auquad, 64);
3600 PUSH_VAR(utf8, cur, auquad);
3607 aquad = (Quad_t)SvIV(fromstr);
3608 DO_BO_PACK(aquad, 64);
3609 PUSH_VAR(utf8, cur, aquad);
3612 #endif /* HAS_QUAD */
3614 len = 1; /* assume SV is correct length */
3615 GROWING(utf8, cat, start, cur, sizeof(char *));
3622 SvGETMAGIC(fromstr);
3623 if (!SvOK(fromstr)) aptr = NULL;
3626 /* XXX better yet, could spirit away the string to
3627 * a safe spot and hang on to it until the result
3628 * of pack() (and all copies of the result) are
3631 if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
3632 !SvREADONLY(fromstr))) && ckWARN(WARN_PACK)) {
3633 Perl_warner(aTHX_ packWARN(WARN_PACK),
3634 "Attempt to pack pointer to temporary value");
3636 if (SvPOK(fromstr) || SvNIOK(fromstr))
3637 aptr = (char *) SvPV_nomg_const(fromstr, n_a);
3639 aptr = SvPV_force_flags(fromstr, n_a, 0);
3641 DO_BO_PACK_PC(aptr);
3642 PUSH_VAR(utf8, cur, aptr);
3646 const char *aptr, *aend;
3647 #ifdef PERL_PACK_NEVER_UPGRADE_COMPATIBILITY
3648 const bool from_utf8 = FALSE;
3654 if (len <= 2) len = 45;
3655 else len = len / 3 * 3;
3657 if (ckWARN(WARN_PACK))
3658 Perl_warner(aTHX_ packWARN(WARN_PACK),
3659 "Field too wide in 'u' format in pack");
3662 aptr = (char *) SvPV_const(fromstr, fromlen);
3663 #ifndef PERL_PACK_NEVER_UPGRADE_COMPATIBILITY
3664 from_utf8 = DO_UTF8(fromstr);
3666 aend = aptr + fromlen;
3667 fromlen = sv_len_utf8(fromstr);
3671 aend = NULL; /* Unused, but keep compilers happy */
3673 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
3675 while (fromlen > 0) {
3678 U8 hunk[1+63/3*4+1];
3680 if ((I32)fromlen > len)
3686 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3687 'u' | TYPE_IS_PACK)) {
3689 SvCUR(cat) = cur - start;
3690 Perl_croak(aTHX_ "Assertion: string is shorter than advertised");
3692 end = doencodes(hunk, buffer, todo);
3694 end = doencodes(hunk, aptr, todo);
3697 PUSH_BYTES(utf8, cur, hunk, end-hunk);
3704 SvCUR(cat) = cur - start;
3706 *symptr = lookahead;
3715 dSP; dMARK; dORIGMARK; dTARGET;
3716 register SV *cat = TARG;
3718 SV *pat_sv = *++MARK;
3719 register const char *pat = SvPV_const(pat_sv, fromlen);
3720 register const char *patend = pat + fromlen;
3723 sv_setpvn(cat, "", 0);
3726 packlist(cat, (char *) pat, (char *) patend, MARK, SP + 1);
3736 * c-indentation-style: bsd
3738 * indent-tabs-mode: t
3741 * ex: set ts=8 sts=4 sw=4 noet: