3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
4 * 2000, 2001, 2002, 2003, 2004, 2005, by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
19 /* This file contains pp ("push/pop") functions that
20 * execute the opcodes that make up a perl program. A typical pp function
21 * expects to find its arguments on the stack, and usually pushes its
22 * results onto the stack, hence the 'pp' terminology. Each OP structure
23 * contains a pointer to the relevant pp_foo() function.
25 * This particular file just contains pp_pack() and pp_unpack(). See the
26 * other pp*.c files for the rest of the pp_ functions.
31 #define PERL_IN_PP_PACK_C
34 /* Types used by pack/unpack */
36 e_no_len, /* no length */
37 e_number, /* number, [] */
41 typedef struct tempsym {
42 const char* patptr; /* current template char */
43 const char* patend; /* one after last char */
44 const char* grpbeg; /* 1st char of ()-group */
45 const char* grpend; /* end of ()-group */
46 I32 code; /* template code (!<>) */
47 I32 length; /* length/repeat count */
48 howlen_t howlen; /* how length is given */
49 int level; /* () nesting level */
50 U32 flags; /* /=4, comma=2, pack=1 */
51 /* and group modifiers */
52 STRLEN strbeg; /* offset of group start */
53 struct tempsym *previous; /* previous group */
56 #define TEMPSYM_INIT(symptr, p, e, f) \
58 (symptr)->patptr = (p); \
59 (symptr)->patend = (e); \
60 (symptr)->grpbeg = NULL; \
61 (symptr)->grpend = NULL; \
62 (symptr)->grpend = NULL; \
64 (symptr)->length = 0; \
65 (symptr)->howlen = 0; \
66 (symptr)->level = 0; \
67 (symptr)->flags = (f); \
68 (symptr)->strbeg = 0; \
69 (symptr)->previous = NULL; \
73 # define PERL_PACK_CAN_BYTEORDER
74 # define PERL_PACK_CAN_SHRIEKSIGN
80 /* Maximum number of bytes to which a byte can grow due to upgrade */
84 * Offset for integer pack/unpack.
86 * On architectures where I16 and I32 aren't really 16 and 32 bits,
87 * which for now are all Crays, pack and unpack have to play games.
91 * These values are required for portability of pack() output.
92 * If they're not right on your machine, then pack() and unpack()
93 * wouldn't work right anyway; you'll need to apply the Cray hack.
94 * (I'd like to check them with #if, but you can't use sizeof() in
95 * the preprocessor.) --???
98 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
99 defines are now in config.h. --Andy Dougherty April 1998
104 /* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
107 #if U16SIZE > SIZE16 || U32SIZE > SIZE32
108 # if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
109 # define OFF16(p) ((char*)(p))
110 # define OFF32(p) ((char*)(p))
112 # if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
113 # define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
114 # define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
116 ++++ bad cray byte order
120 # define OFF16(p) ((char *) (p))
121 # define OFF32(p) ((char *) (p))
124 /* Only to be used inside a loop (see the break) */
125 #define SHIFT16(utf8, s, strend, p, datumtype) STMT_START { \
127 if (!uni_to_bytes(aTHX_ &(s), strend, OFF16(p), SIZE16, datumtype)) break; \
129 Copy(s, OFF16(p), SIZE16, char); \
134 /* Only to be used inside a loop (see the break) */
135 #define SHIFT32(utf8, s, strend, p, datumtype) STMT_START { \
137 if (!uni_to_bytes(aTHX_ &(s), strend, OFF32(p), SIZE32, datumtype)) break; \
139 Copy(s, OFF32(p), SIZE32, char); \
144 #define PUSH16(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF16(p), SIZE16)
145 #define PUSH32(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF32(p), SIZE32)
147 /* Only to be used inside a loop (see the break) */
148 #define SHIFT_VAR(utf8, s, strend, var, datumtype) \
151 if (!uni_to_bytes(aTHX_ &s, strend, \
152 (char *) &var, sizeof(var), datumtype)) break;\
154 Copy(s, (char *) &var, sizeof(var), char); \
159 #define PUSH_VAR(utf8, aptr, var) \
160 PUSH_BYTES(utf8, aptr, &(var), sizeof(var))
162 /* Avoid stack overflow due to pathological templates. 100 should be plenty. */
163 #define MAX_SUB_TEMPLATE_LEVEL 100
165 /* flags (note that type modifiers can also be used as flags!) */
166 #define FLAG_WAS_UTF8 0x40
167 #define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
168 #define FLAG_UNPACK_ONLY_ONE 0x10
169 #define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
170 #define FLAG_SLASH 0x04
171 #define FLAG_COMMA 0x02
172 #define FLAG_PACK 0x01
175 S_mul128(pTHX_ SV *sv, U8 m)
178 char *s = SvPV(sv, len);
181 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
182 SV *tmpNew = newSVpvn("0000000000", 10);
184 sv_catsv(tmpNew, sv);
185 SvREFCNT_dec(sv); /* free old sv */
190 while (!*t) /* trailing '\0'? */
193 const U32 i = ((*t - '0') << 7) + m;
194 *(t--) = '0' + (char)(i % 10);
200 /* Explosives and implosives. */
202 #if 'I' == 73 && 'J' == 74
203 /* On an ASCII/ISO kind of system */
204 #define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
207 Some other sort of character set - use memchr() so we don't match
210 #define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
214 #define TYPE_IS_SHRIEKING 0x100
215 #define TYPE_IS_BIG_ENDIAN 0x200
216 #define TYPE_IS_LITTLE_ENDIAN 0x400
217 #define TYPE_IS_PACK 0x800
218 #define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
219 #define TYPE_MODIFIERS(t) ((t) & ~0xFF)
220 #define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
222 #ifdef PERL_PACK_CAN_SHRIEKSIGN
223 # define SHRIEKING_ALLOWED_TYPES "sSiIlLxXnNvV@."
225 # define SHRIEKING_ALLOWED_TYPES "sSiIlLxX"
228 #ifndef PERL_PACK_CAN_BYTEORDER
229 /* Put "can't" first because it is shorter */
230 # define TYPE_ENDIANNESS(t) 0
231 # define TYPE_NO_ENDIANNESS(t) (t)
233 # define ENDIANNESS_ALLOWED_TYPES ""
235 # define DO_BO_UNPACK(var, type)
236 # define DO_BO_PACK(var, type)
237 # define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast)
238 # define DO_BO_PACK_PTR(var, type, pre_cast, post_cast)
239 # define DO_BO_UNPACK_N(var, type)
240 # define DO_BO_PACK_N(var, type)
241 # define DO_BO_UNPACK_P(var)
242 # define DO_BO_PACK_P(var)
244 #else /* PERL_PACK_CAN_BYTEORDER */
246 # define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
247 # define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
249 # define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
251 # define DO_BO_UNPACK(var, type) \
253 switch (TYPE_ENDIANNESS(datumtype)) { \
254 case TYPE_IS_BIG_ENDIAN: var = my_betoh ## type (var); break; \
255 case TYPE_IS_LITTLE_ENDIAN: var = my_letoh ## type (var); break; \
260 # define DO_BO_PACK(var, type) \
262 switch (TYPE_ENDIANNESS(datumtype)) { \
263 case TYPE_IS_BIG_ENDIAN: var = my_htobe ## type (var); break; \
264 case TYPE_IS_LITTLE_ENDIAN: var = my_htole ## type (var); break; \
269 # define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast) \
271 switch (TYPE_ENDIANNESS(datumtype)) { \
272 case TYPE_IS_BIG_ENDIAN: \
273 var = (post_cast*) my_betoh ## type ((pre_cast) var); \
275 case TYPE_IS_LITTLE_ENDIAN: \
276 var = (post_cast *) my_letoh ## type ((pre_cast) var); \
283 # define DO_BO_PACK_PTR(var, type, pre_cast, post_cast) \
285 switch (TYPE_ENDIANNESS(datumtype)) { \
286 case TYPE_IS_BIG_ENDIAN: \
287 var = (post_cast *) my_htobe ## type ((pre_cast) var); \
289 case TYPE_IS_LITTLE_ENDIAN: \
290 var = (post_cast *) my_htole ## type ((pre_cast) var); \
297 # define BO_CANT_DOIT(action, type) \
299 switch (TYPE_ENDIANNESS(datumtype)) { \
300 case TYPE_IS_BIG_ENDIAN: \
301 Perl_croak(aTHX_ "Can't %s big-endian %ss on this " \
302 "platform", #action, #type); \
304 case TYPE_IS_LITTLE_ENDIAN: \
305 Perl_croak(aTHX_ "Can't %s little-endian %ss on this " \
306 "platform", #action, #type); \
313 # if PTRSIZE == INTSIZE
314 # define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, i, int, void)
315 # define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, i, int, void)
316 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, i, int, char)
317 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, i, int, char)
318 # elif PTRSIZE == LONGSIZE
319 # define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, long, void)
320 # define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, long, void)
321 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, long, char)
322 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, long, char)
324 # define DO_BO_UNPACK_P(var) BO_CANT_DOIT(unpack, pointer)
325 # define DO_BO_PACK_P(var) BO_CANT_DOIT(pack, pointer)
328 # if defined(my_htolen) && defined(my_letohn) && \
329 defined(my_htoben) && defined(my_betohn)
330 # define DO_BO_UNPACK_N(var, type) \
332 switch (TYPE_ENDIANNESS(datumtype)) { \
333 case TYPE_IS_BIG_ENDIAN: my_betohn(&var, sizeof(type)); break;\
334 case TYPE_IS_LITTLE_ENDIAN: my_letohn(&var, sizeof(type)); break;\
339 # define DO_BO_PACK_N(var, type) \
341 switch (TYPE_ENDIANNESS(datumtype)) { \
342 case TYPE_IS_BIG_ENDIAN: my_htoben(&var, sizeof(type)); break;\
343 case TYPE_IS_LITTLE_ENDIAN: my_htolen(&var, sizeof(type)); break;\
348 # define DO_BO_UNPACK_N(var, type) BO_CANT_DOIT(unpack, type)
349 # define DO_BO_PACK_N(var, type) BO_CANT_DOIT(pack, type)
352 #endif /* PERL_PACK_CAN_BYTEORDER */
354 #define PACK_SIZE_CANNOT_CSUM 0x80
355 #define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
356 #define PACK_SIZE_MASK 0x3F
358 /* These tables are regenerated by genpacksizetables.pl (and then hand pasted
359 in). You're unlikely ever to need to regenerate them. */
361 #if TYPE_IS_SHRIEKING != 0x100
362 ++++shriek offset should be 256
365 typedef U8 packprops_t;
368 const packprops_t packprops[512] = {
370 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
371 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
372 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
373 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
375 /* C */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
376 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
377 /* D */ LONG_DOUBLESIZE,
384 /* I */ sizeof(unsigned int),
391 #if defined(HAS_QUAD)
392 /* Q */ sizeof(Uquad_t),
399 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
401 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
402 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
403 /* c */ sizeof(char),
404 /* d */ sizeof(double),
406 /* f */ sizeof(float),
415 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
416 #if defined(HAS_QUAD)
417 /* q */ sizeof(Quad_t),
425 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
426 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
427 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
428 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
429 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
430 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
431 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
432 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
433 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
434 0, 0, 0, 0, 0, 0, 0, 0,
436 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
437 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
438 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
439 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
440 0, 0, 0, 0, 0, 0, 0, 0, 0,
441 /* I */ sizeof(unsigned int),
443 /* L */ sizeof(unsigned long),
445 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
451 /* S */ sizeof(unsigned short),
453 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
458 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
462 /* l */ sizeof(long),
464 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
470 /* s */ sizeof(short),
472 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
477 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
478 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
479 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
480 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
481 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
482 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
483 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
484 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
485 0, 0, 0, 0, 0, 0, 0, 0, 0
488 /* EBCDIC (or bust) */
489 const packprops_t packprops[512] = {
491 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
492 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
493 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
494 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
495 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
496 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
497 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
498 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
500 /* c */ sizeof(char),
501 /* d */ sizeof(double),
503 /* f */ sizeof(float),
513 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
514 #if defined(HAS_QUAD)
515 /* q */ sizeof(Quad_t),
519 0, 0, 0, 0, 0, 0, 0, 0, 0,
523 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
524 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
525 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
526 /* C */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
527 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
528 /* D */ LONG_DOUBLESIZE,
535 /* I */ sizeof(unsigned int),
543 #if defined(HAS_QUAD)
544 /* Q */ sizeof(Uquad_t),
548 0, 0, 0, 0, 0, 0, 0, 0, 0,
551 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
553 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
554 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
555 0, 0, 0, 0, 0, 0, 0, 0, 0,
557 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
558 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
559 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
560 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
561 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
562 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
563 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
564 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
565 0, 0, 0, 0, 0, 0, 0, 0, 0,
567 0, 0, 0, 0, 0, 0, 0, 0, 0,
568 /* l */ sizeof(long),
570 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
575 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
576 /* s */ sizeof(short),
578 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
583 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
584 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
586 /* I */ sizeof(unsigned int),
587 0, 0, 0, 0, 0, 0, 0, 0, 0,
588 /* L */ sizeof(unsigned long),
590 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
595 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
596 /* S */ sizeof(unsigned short),
598 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
603 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
604 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
609 uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
613 val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
614 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
615 /* We try to process malformed UTF-8 as much as possible (preferrably with
616 warnings), but these two mean we make no progress in the string and
617 might enter an infinite loop */
618 if (retlen == (STRLEN) -1 || retlen == 0)
619 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
620 (int) TYPE_NO_MODIFIERS(datumtype));
622 if (ckWARN(WARN_UNPACK))
623 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
624 "Character in '%c' format wrapped in unpack",
625 (int) TYPE_NO_MODIFIERS(datumtype));
632 #define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
633 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
637 uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
641 const char *from = *s;
643 const U32 flags = ckWARN(WARN_UTF8) ?
644 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
645 for (;buf_len > 0; buf_len--) {
646 if (from >= end) return FALSE;
647 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
648 if (retlen == (STRLEN) -1 || retlen == 0) {
649 from += UTF8SKIP(from);
651 } else from += retlen;
656 *(U8 *)buf++ = (U8)val;
658 /* We have enough characters for the buffer. Did we have problems ? */
661 /* Rewalk the string fragment while warning */
663 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
664 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
665 if (ptr >= end) break;
666 utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
668 if (from > end) from = end;
670 if ((bad & 2) && ckWARN(WARN_UNPACK))
671 Perl_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
672 WARN_PACK : WARN_UNPACK),
673 "Character(s) in '%c' format wrapped in %s",
674 (int) TYPE_NO_MODIFIERS(datumtype),
675 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
682 next_uni_uu(pTHX_ const char **s, const char *end, I32 *out)
686 val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
687 if (val >= 0x100 || !ISUUCHAR(val) ||
688 retlen == (STRLEN) -1 || retlen == 0) {
692 *out = PL_uudmap[val] & 077;
698 bytes_to_uni(pTHX_ const U8 *start, STRLEN len, char **dest) {
699 U8 buffer[UTF8_MAXLEN];
700 const U8 *end = start + len;
702 while (start < end) {
704 uvuni_to_utf8_flags(buffer, NATIVE_TO_UNI(*start), 0) - buffer;
714 Perl_croak(aTHX_ "Perl bug: value %d UTF-8 expands to %d bytes",
722 #define PUSH_BYTES(utf8, cur, buf, len) \
724 if (utf8) bytes_to_uni(aTHX_ (U8 *) buf, len, &(cur)); \
726 Copy(buf, cur, len, char); \
731 #define GROWING(utf8, cat, start, cur, in_len) \
733 STRLEN glen = (in_len); \
734 if (utf8) glen *= UTF8_EXPAND; \
735 if ((cur) + glen >= (start) + SvLEN(cat)) { \
736 (start) = sv_exp_grow(aTHX_ cat, glen); \
737 (cur) = (start) + SvCUR(cat); \
741 #define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
743 const STRLEN glen = (in_len); \
745 if (utf8) gl *= UTF8_EXPAND; \
746 if ((cur) + gl >= (start) + SvLEN(cat)) { \
748 SvCUR_set((cat), (cur) - (start)); \
749 (start) = sv_exp_grow(aTHX_ cat, gl); \
750 (cur) = (start) + SvCUR(cat); \
752 PUSH_BYTES(utf8, cur, buf, glen); \
755 #define PUSH_BYTE(utf8, s, byte) \
758 const U8 au8 = (byte); \
759 bytes_to_uni(aTHX_ &au8, 1, &(s)); \
760 } else *(U8 *)(s)++ = (byte); \
763 /* Only to be used inside a loop (see the break) */
764 #define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
767 if (str >= end) break; \
768 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
769 if (retlen == (STRLEN) -1 || retlen == 0) { \
771 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
776 static const char *_action( const tempsym_t* symptr )
778 return ( symptr->flags & FLAG_PACK ) ? "pack" : "unpack";
781 /* Returns the sizeof() struct described by pat */
783 S_measure_struct(pTHX_ tempsym_t* symptr)
787 while (next_symbol(symptr)) {
791 switch (symptr->howlen) {
793 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
797 /* e_no_len and e_number */
798 len = symptr->length;
802 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
805 /* endianness doesn't influence the size of a type */
806 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
808 Perl_croak(aTHX_ "Invalid type '%c' in %s",
809 (int)TYPE_NO_MODIFIERS(symptr->code),
811 #ifdef PERL_PACK_CAN_SHRIEKSIGN
812 case '.' | TYPE_IS_SHRIEKING:
813 case '@' | TYPE_IS_SHRIEKING:
818 case 'U': /* XXXX Is it correct? */
821 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
822 (int) TYPE_NO_MODIFIERS(symptr->code),
829 tempsym_t savsym = *symptr;
830 symptr->patptr = savsym.grpbeg;
831 symptr->patend = savsym.grpend;
832 /* XXXX Theoretically, we need to measure many times at
833 different positions, since the subexpression may contain
834 alignment commands, but be not of aligned length.
835 Need to detect this and croak(). */
836 size = measure_struct(symptr);
840 case 'X' | TYPE_IS_SHRIEKING:
841 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
843 if (!len) /* Avoid division by 0 */
845 len = total % len; /* Assumed: the start is aligned. */
850 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
852 case 'x' | TYPE_IS_SHRIEKING:
853 if (!len) /* Avoid division by 0 */
855 star = total % len; /* Assumed: the start is aligned. */
856 if (star) /* Other portable ways? */
880 size = sizeof(char*);
890 /* locate matching closing parenthesis or bracket
891 * returns char pointer to char after match, or NULL
894 S_group_end(pTHX_ register const char *patptr, register const char *patend, char ender)
896 while (patptr < patend) {
897 const char c = *patptr++;
904 while (patptr < patend && *patptr != '\n')
908 patptr = group_end(patptr, patend, ')') + 1;
910 patptr = group_end(patptr, patend, ']') + 1;
912 Perl_croak(aTHX_ "No group ending character '%c' found in template",
918 /* Convert unsigned decimal number to binary.
919 * Expects a pointer to the first digit and address of length variable
920 * Advances char pointer to 1st non-digit char and returns number
923 S_get_num(pTHX_ register const char *patptr, I32 *lenptr )
925 I32 len = *patptr++ - '0';
926 while (isDIGIT(*patptr)) {
927 if (len >= 0x7FFFFFFF/10)
928 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
929 len = (len * 10) + (*patptr++ - '0');
935 /* The marvellous template parsing routine: Using state stored in *symptr,
936 * locates next template code and count
939 S_next_symbol(pTHX_ tempsym_t* symptr )
941 const char* patptr = symptr->patptr;
942 const char* patend = symptr->patend;
944 symptr->flags &= ~FLAG_SLASH;
946 while (patptr < patend) {
947 if (isSPACE(*patptr))
949 else if (*patptr == '#') {
951 while (patptr < patend && *patptr != '\n')
956 /* We should have found a template code */
957 I32 code = *patptr++ & 0xFF;
958 U32 inherited_modifiers = 0;
960 if (code == ','){ /* grandfather in commas but with a warning */
961 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
962 symptr->flags |= FLAG_COMMA;
963 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
964 "Invalid type ',' in %s", _action( symptr ) );
969 /* for '(', skip to ')' */
971 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
972 Perl_croak(aTHX_ "()-group starts with a count in %s",
974 symptr->grpbeg = patptr;
975 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
976 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
977 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
981 /* look for group modifiers to inherit */
982 if (TYPE_ENDIANNESS(symptr->flags)) {
983 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
984 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
987 /* look for modifiers */
988 while (patptr < patend) {
993 modifier = TYPE_IS_SHRIEKING;
994 allowed = SHRIEKING_ALLOWED_TYPES;
996 #ifdef PERL_PACK_CAN_BYTEORDER
998 modifier = TYPE_IS_BIG_ENDIAN;
999 allowed = ENDIANNESS_ALLOWED_TYPES;
1002 modifier = TYPE_IS_LITTLE_ENDIAN;
1003 allowed = ENDIANNESS_ALLOWED_TYPES;
1005 #endif /* PERL_PACK_CAN_BYTEORDER */
1015 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
1016 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
1017 allowed, _action( symptr ) );
1019 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
1020 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
1021 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
1022 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
1023 TYPE_ENDIANNESS_MASK)
1024 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
1025 *patptr, _action( symptr ) );
1027 if ((code & modifier) && ckWARN(WARN_UNPACK)) {
1028 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
1029 "Duplicate modifier '%c' after '%c' in %s",
1030 *patptr, (int) TYPE_NO_MODIFIERS(code),
1031 _action( symptr ) );
1038 /* inherit modifiers */
1039 code |= inherited_modifiers;
1041 /* look for count and/or / */
1042 if (patptr < patend) {
1043 if (isDIGIT(*patptr)) {
1044 patptr = get_num( patptr, &symptr->length );
1045 symptr->howlen = e_number;
1047 } else if (*patptr == '*') {
1049 symptr->howlen = e_star;
1051 } else if (*patptr == '[') {
1052 const char* lenptr = ++patptr;
1053 symptr->howlen = e_number;
1054 patptr = group_end( patptr, patend, ']' ) + 1;
1055 /* what kind of [] is it? */
1056 if (isDIGIT(*lenptr)) {
1057 lenptr = get_num( lenptr, &symptr->length );
1058 if( *lenptr != ']' )
1059 Perl_croak(aTHX_ "Malformed integer in [] in %s",
1060 _action( symptr ) );
1062 tempsym_t savsym = *symptr;
1063 symptr->patend = patptr-1;
1064 symptr->patptr = lenptr;
1065 savsym.length = measure_struct(symptr);
1069 symptr->howlen = e_no_len;
1074 while (patptr < patend) {
1075 if (isSPACE(*patptr))
1077 else if (*patptr == '#') {
1079 while (patptr < patend && *patptr != '\n')
1081 if (patptr < patend)
1084 if (*patptr == '/') {
1085 symptr->flags |= FLAG_SLASH;
1087 if (patptr < patend &&
1088 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
1089 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
1090 _action( symptr ) );
1096 /* at end - no count, no / */
1097 symptr->howlen = e_no_len;
1101 symptr->code = code;
1102 symptr->patptr = patptr;
1106 symptr->patptr = patptr;
1111 There is no way to cleanly handle the case where we should process the
1112 string per byte in its upgraded form while it's really in downgraded form
1113 (e.g. estimates like strend-s as an upper bound for the number of
1114 characters left wouldn't work). So if we foresee the need of this
1115 (pattern starts with U or contains U0), we want to work on the encoded
1116 version of the string. Users are advised to upgrade their pack string
1117 themselves if they need to do a lot of unpacks like this on it
1120 need_utf8(const char *pat, const char *patend)
1123 while (pat < patend) {
1124 if (pat[0] == '#') {
1126 pat = (const char *) memchr(pat, '\n', patend-pat);
1127 if (!pat) return FALSE;
1128 } else if (pat[0] == 'U') {
1129 if (first || pat[1] == '0') return TRUE;
1130 } else first = FALSE;
1137 first_symbol(const char *pat, const char *patend) {
1138 while (pat < patend) {
1139 if (pat[0] != '#') return pat[0];
1141 pat = (const char *) memchr(pat, '\n', patend-pat);
1149 =for apidoc unpack_str
1151 The engine implementing unpack() Perl function. Note: parameters strbeg, new_s
1152 and ocnt are not used. This call should not be used, use unpackstring instead.
1157 Perl_unpack_str(pTHX_ const char *pat, const char *patend, const char *s, const char *strbeg, const char *strend, char **new_s, I32 ocnt, U32 flags)
1160 PERL_UNUSED_ARG(strbeg);
1161 PERL_UNUSED_ARG(new_s);
1162 PERL_UNUSED_ARG(ocnt);
1164 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
1165 else if (need_utf8(pat, patend)) {
1166 /* We probably should try to avoid this in case a scalar context call
1167 wouldn't get to the "U0" */
1168 STRLEN len = strend - s;
1169 s = (char *) bytes_to_utf8((U8 *) s, &len);
1172 flags |= FLAG_DO_UTF8;
1175 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
1176 flags |= FLAG_PARSE_UTF8;
1178 TEMPSYM_INIT(&sym, pat, patend, flags);
1180 return unpack_rec(&sym, s, s, strend, NULL );
1184 =for apidoc unpackstring
1186 The engine implementing unpack() Perl function. C<unpackstring> puts the
1187 extracted list items on the stack and returns the number of elements.
1188 Issue C<PUTBACK> before and C<SPAGAIN> after the call to this function.
1193 Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
1197 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
1198 else if (need_utf8(pat, patend)) {
1199 /* We probably should try to avoid this in case a scalar context call
1200 wouldn't get to the "U0" */
1201 STRLEN len = strend - s;
1202 s = (char *) bytes_to_utf8((U8 *) s, &len);
1205 flags |= FLAG_DO_UTF8;
1208 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
1209 flags |= FLAG_PARSE_UTF8;
1211 TEMPSYM_INIT(&sym, pat, patend, flags);
1213 return unpack_rec(&sym, s, s, strend, NULL );
1218 S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
1222 const I32 start_sp_offset = SP - PL_stack_base;
1228 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
1229 bool beyond = FALSE;
1230 bool explicit_length;
1231 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
1232 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
1233 symptr->strbeg = s - strbeg;
1235 while (next_symbol(symptr)) {
1238 I32 datumtype = symptr->code;
1239 /* do first one only unless in list context
1240 / is implemented by unpacking the count, then popping it from the
1241 stack, so must check that we're not in the middle of a / */
1242 if ( unpack_only_one
1243 && (SP - PL_stack_base == start_sp_offset + 1)
1244 && (datumtype != '/') ) /* XXX can this be omitted */
1247 switch (howlen = symptr->howlen) {
1249 len = strend - strbeg; /* long enough */
1252 /* e_no_len and e_number */
1253 len = symptr->length;
1257 explicit_length = TRUE;
1259 beyond = s >= strend;
1261 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
1263 /* props nonzero means we can process this letter. */
1264 const long size = props & PACK_SIZE_MASK;
1265 const long howmany = (strend - s) / size;
1269 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
1270 if (len && unpack_only_one) len = 1;
1276 switch(TYPE_NO_ENDIANNESS(datumtype)) {
1278 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
1281 if (howlen == e_no_len)
1282 len = 16; /* len is not specified */
1290 tempsym_t savsym = *symptr;
1291 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
1292 symptr->flags |= group_modifiers;
1293 symptr->patend = savsym.grpend;
1294 symptr->previous = &savsym;
1298 symptr->patptr = savsym.grpbeg;
1299 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
1300 else symptr->flags &= ~FLAG_PARSE_UTF8;
1301 unpack_rec(symptr, s, strbeg, strend, &s);
1302 if (s == strend && savsym.howlen == e_star)
1303 break; /* No way to continue */
1306 savsym.flags = symptr->flags & ~group_modifiers;
1310 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1311 case '.' | TYPE_IS_SHRIEKING:
1316 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1317 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
1318 #else /* PERL_PACK_CAN_SHRIEKSIGN */
1319 const bool u8 = utf8;
1321 if (howlen == e_star) from = strbeg;
1322 else if (len <= 0) from = s;
1324 tempsym_t *group = symptr;
1326 while (--len && group) group = group->previous;
1327 from = group ? strbeg + group->strbeg : strbeg;
1330 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
1331 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
1332 XPUSHs(sv_2mortal(sv));
1335 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1336 case '@' | TYPE_IS_SHRIEKING:
1339 s = strbeg + symptr->strbeg;
1340 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1341 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
1342 #else /* PERL_PACK_CAN_SHRIEKSIGN */
1348 Perl_croak(aTHX_ "'@' outside of string in unpack");
1353 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
1356 Perl_croak(aTHX_ "'@' outside of string in unpack");
1360 case 'X' | TYPE_IS_SHRIEKING:
1361 if (!len) /* Avoid division by 0 */
1364 const char *hop, *last;
1366 hop = last = strbeg;
1368 hop += UTF8SKIP(hop);
1375 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1379 len = (s - strbeg) % len;
1385 Perl_croak(aTHX_ "'X' outside of string in unpack");
1386 while (--s, UTF8_IS_CONTINUATION(*s)) {
1388 Perl_croak(aTHX_ "'X' outside of string in unpack");
1393 if (len > s - strbeg)
1394 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1398 case 'x' | TYPE_IS_SHRIEKING: {
1400 if (!len) /* Avoid division by 0 */
1402 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1403 else ai32 = (s - strbeg) % len;
1404 if (ai32 == 0) break;
1412 Perl_croak(aTHX_ "'x' outside of string in unpack");
1417 if (len > strend - s)
1418 Perl_croak(aTHX_ "'x' outside of string in unpack");
1423 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1429 /* Preliminary length estimate is assumed done in 'W' */
1430 if (len > strend - s) len = strend - s;
1436 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1437 if (hop >= strend) {
1439 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1444 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1446 } else if (len > strend - s)
1449 if (datumtype == 'Z') {
1450 /* 'Z' strips stuff after first null */
1451 const char *ptr, *end;
1453 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
1454 sv = newSVpvn(s, ptr-s);
1455 if (howlen == e_star) /* exact for 'Z*' */
1456 len = ptr-s + (ptr != strend ? 1 : 0);
1457 } else if (datumtype == 'A') {
1458 /* 'A' strips both nulls and spaces */
1460 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1461 for (ptr = s+len-1; ptr >= s; ptr--)
1462 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
1463 !is_utf8_space((U8 *) ptr)) break;
1464 if (ptr >= s) ptr += UTF8SKIP(ptr);
1467 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1469 for (ptr = s+len-1; ptr >= s; ptr--)
1470 if (*ptr != 0 && !isSPACE(*ptr)) break;
1473 sv = newSVpvn(s, ptr-s);
1474 } else sv = newSVpvn(s, len);
1478 /* Undo any upgrade done due to need_utf8() */
1479 if (!(symptr->flags & FLAG_WAS_UTF8))
1480 sv_utf8_downgrade(sv, 0);
1482 XPUSHs(sv_2mortal(sv));
1488 if (howlen == e_star || len > (strend - s) * 8)
1489 len = (strend - s) * 8;
1493 Newxz(PL_bitcount, 256, char);
1494 for (bits = 1; bits < 256; bits++) {
1495 if (bits & 1) PL_bitcount[bits]++;
1496 if (bits & 2) PL_bitcount[bits]++;
1497 if (bits & 4) PL_bitcount[bits]++;
1498 if (bits & 8) PL_bitcount[bits]++;
1499 if (bits & 16) PL_bitcount[bits]++;
1500 if (bits & 32) PL_bitcount[bits]++;
1501 if (bits & 64) PL_bitcount[bits]++;
1502 if (bits & 128) PL_bitcount[bits]++;
1506 while (len >= 8 && s < strend) {
1507 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
1512 cuv += PL_bitcount[*(U8 *)s++];
1515 if (len && s < strend) {
1517 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1518 if (datumtype == 'b')
1520 if (bits & 1) cuv++;
1525 if (bits & 0x80) cuv++;
1532 sv = sv_2mortal(NEWSV(35, len ? len : 1));
1535 if (datumtype == 'b') {
1537 const I32 ai32 = len;
1538 for (len = 0; len < ai32; len++) {
1539 if (len & 7) bits >>= 1;
1541 if (s >= strend) break;
1542 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1543 } else bits = *(U8 *) s++;
1544 *str++ = bits & 1 ? '1' : '0';
1548 const I32 ai32 = len;
1549 for (len = 0; len < ai32; len++) {
1550 if (len & 7) bits <<= 1;
1552 if (s >= strend) break;
1553 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1554 } else bits = *(U8 *) s++;
1555 *str++ = bits & 0x80 ? '1' : '0';
1559 SvCUR_set(sv, str - SvPVX_const(sv));
1566 /* Preliminary length estimate, acceptable for utf8 too */
1567 if (howlen == e_star || len > (strend - s) * 2)
1568 len = (strend - s) * 2;
1569 sv = sv_2mortal(NEWSV(35, len ? len : 1));
1572 if (datumtype == 'h') {
1575 for (len = 0; len < ai32; len++) {
1576 if (len & 1) bits >>= 4;
1578 if (s >= strend) break;
1579 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1580 } else bits = * (U8 *) s++;
1581 *str++ = PL_hexdigit[bits & 15];
1585 const I32 ai32 = len;
1586 for (len = 0; len < ai32; len++) {
1587 if (len & 1) bits <<= 4;
1589 if (s >= strend) break;
1590 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1591 } else bits = *(U8 *) s++;
1592 *str++ = PL_hexdigit[(bits >> 4) & 15];
1596 SvCUR_set(sv, str - SvPVX_const(sv));
1602 int aint = SHIFT_BYTE(utf8, s, strend, datumtype);
1603 if (aint >= 128) /* fake up signed chars */
1606 PUSHs(sv_2mortal(newSViv((IV)aint)));
1607 else if (checksum > bits_in_uv)
1608 cdouble += (NV)aint;
1617 if (explicit_length && datumtype == 'C')
1618 /* Switch to "character" mode */
1619 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1622 if (datumtype == 'C' ?
1623 (symptr->flags & FLAG_DO_UTF8) &&
1624 !(symptr->flags & FLAG_WAS_UTF8) : utf8) {
1625 while (len-- > 0 && s < strend) {
1627 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1628 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1629 if (retlen == (STRLEN) -1 || retlen == 0)
1630 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1633 PUSHs(sv_2mortal(newSVuv((UV) val)));
1634 else if (checksum > bits_in_uv)
1635 cdouble += (NV) val;
1639 } else if (!checksum)
1641 const U8 ch = *(U8 *) s++;
1642 PUSHs(sv_2mortal(newSVuv((UV) ch)));
1644 else if (checksum > bits_in_uv)
1645 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1647 while (len-- > 0) cuv += *(U8 *) s++;
1651 if (explicit_length) {
1652 /* Switch to "bytes in UTF-8" mode */
1653 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
1655 /* Should be impossible due to the need_utf8() test */
1656 Perl_croak(aTHX_ "U0 mode on a byte string");
1660 if (len > strend - s) len = strend - s;
1662 if (len && unpack_only_one) len = 1;
1666 while (len-- > 0 && s < strend) {
1670 U8 result[UTF8_MAXLEN];
1671 const char *ptr = s;
1673 /* Bug: warns about bad utf8 even if we are short on bytes
1674 and will break out of the loop */
1675 if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1678 len = UTF8SKIP(result);
1679 if (!uni_to_bytes(aTHX_ &ptr, strend,
1680 (char *) &result[1], len-1, 'U')) break;
1681 auv = utf8n_to_uvuni(result, len, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1684 auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1685 if (retlen == (STRLEN) -1 || retlen == 0)
1686 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1690 PUSHs(sv_2mortal(newSVuv((UV) auv)));
1691 else if (checksum > bits_in_uv)
1692 cdouble += (NV) auv;
1697 case 's' | TYPE_IS_SHRIEKING:
1698 #if SHORTSIZE != SIZE16
1701 SHIFT_VAR(utf8, s, strend, ashort, datumtype);
1702 DO_BO_UNPACK(ashort, s);
1704 PUSHs(sv_2mortal(newSViv((IV)ashort)));
1705 else if (checksum > bits_in_uv)
1706 cdouble += (NV)ashort;
1718 #if U16SIZE > SIZE16
1721 SHIFT16(utf8, s, strend, &ai16, datumtype);
1722 DO_BO_UNPACK(ai16, 16);
1723 #if U16SIZE > SIZE16
1728 PUSHs(sv_2mortal(newSViv((IV)ai16)));
1729 else if (checksum > bits_in_uv)
1730 cdouble += (NV)ai16;
1735 case 'S' | TYPE_IS_SHRIEKING:
1736 #if SHORTSIZE != SIZE16
1738 unsigned short aushort;
1739 SHIFT_VAR(utf8, s, strend, aushort, datumtype);
1740 DO_BO_UNPACK(aushort, s);
1742 PUSHs(sv_2mortal(newSVuv((UV) aushort)));
1743 else if (checksum > bits_in_uv)
1744 cdouble += (NV)aushort;
1757 #if U16SIZE > SIZE16
1760 SHIFT16(utf8, s, strend, &au16, datumtype);
1761 DO_BO_UNPACK(au16, 16);
1763 if (datumtype == 'n')
1764 au16 = PerlSock_ntohs(au16);
1767 if (datumtype == 'v')
1771 PUSHs(sv_2mortal(newSVuv((UV)au16)));
1772 else if (checksum > bits_in_uv)
1773 cdouble += (NV) au16;
1778 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1779 case 'v' | TYPE_IS_SHRIEKING:
1780 case 'n' | TYPE_IS_SHRIEKING:
1783 # if U16SIZE > SIZE16
1786 SHIFT16(utf8, s, strend, &ai16, datumtype);
1788 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
1789 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1790 # endif /* HAS_NTOHS */
1792 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
1793 ai16 = (I16) vtohs((U16) ai16);
1794 # endif /* HAS_VTOHS */
1796 PUSHs(sv_2mortal(newSViv((IV)ai16)));
1797 else if (checksum > bits_in_uv)
1798 cdouble += (NV) ai16;
1803 #endif /* PERL_PACK_CAN_SHRIEKSIGN */
1805 case 'i' | TYPE_IS_SHRIEKING:
1808 SHIFT_VAR(utf8, s, strend, aint, datumtype);
1809 DO_BO_UNPACK(aint, i);
1811 PUSHs(sv_2mortal(newSViv((IV)aint)));
1812 else if (checksum > bits_in_uv)
1813 cdouble += (NV)aint;
1819 case 'I' | TYPE_IS_SHRIEKING:
1822 SHIFT_VAR(utf8, s, strend, auint, datumtype);
1823 DO_BO_UNPACK(auint, i);
1825 PUSHs(sv_2mortal(newSVuv((UV)auint)));
1826 else if (checksum > bits_in_uv)
1827 cdouble += (NV)auint;
1835 SHIFT_VAR(utf8, s, strend, aiv, datumtype);
1836 #if IVSIZE == INTSIZE
1837 DO_BO_UNPACK(aiv, i);
1838 #elif IVSIZE == LONGSIZE
1839 DO_BO_UNPACK(aiv, l);
1840 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
1841 DO_BO_UNPACK(aiv, 64);
1843 Perl_croak(aTHX_ "'j' not supported on this platform");
1846 PUSHs(sv_2mortal(newSViv(aiv)));
1847 else if (checksum > bits_in_uv)
1856 SHIFT_VAR(utf8, s, strend, auv, datumtype);
1857 #if IVSIZE == INTSIZE
1858 DO_BO_UNPACK(auv, i);
1859 #elif IVSIZE == LONGSIZE
1860 DO_BO_UNPACK(auv, l);
1861 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
1862 DO_BO_UNPACK(auv, 64);
1864 Perl_croak(aTHX_ "'J' not supported on this platform");
1867 PUSHs(sv_2mortal(newSVuv(auv)));
1868 else if (checksum > bits_in_uv)
1874 case 'l' | TYPE_IS_SHRIEKING:
1875 #if LONGSIZE != SIZE32
1878 SHIFT_VAR(utf8, s, strend, along, datumtype);
1879 DO_BO_UNPACK(along, l);
1881 PUSHs(sv_2mortal(newSViv((IV)along)));
1882 else if (checksum > bits_in_uv)
1883 cdouble += (NV)along;
1894 #if U32SIZE > SIZE32
1897 SHIFT32(utf8, s, strend, &ai32, datumtype);
1898 DO_BO_UNPACK(ai32, 32);
1899 #if U32SIZE > SIZE32
1900 if (ai32 > 2147483647) ai32 -= 4294967296;
1903 PUSHs(sv_2mortal(newSViv((IV)ai32)));
1904 else if (checksum > bits_in_uv)
1905 cdouble += (NV)ai32;
1910 case 'L' | TYPE_IS_SHRIEKING:
1911 #if LONGSIZE != SIZE32
1913 unsigned long aulong;
1914 SHIFT_VAR(utf8, s, strend, aulong, datumtype);
1915 DO_BO_UNPACK(aulong, l);
1917 PUSHs(sv_2mortal(newSVuv((UV)aulong)));
1918 else if (checksum > bits_in_uv)
1919 cdouble += (NV)aulong;
1932 #if U32SIZE > SIZE32
1935 SHIFT32(utf8, s, strend, &au32, datumtype);
1936 DO_BO_UNPACK(au32, 32);
1938 if (datumtype == 'N')
1939 au32 = PerlSock_ntohl(au32);
1942 if (datumtype == 'V')
1946 PUSHs(sv_2mortal(newSVuv((UV)au32)));
1947 else if (checksum > bits_in_uv)
1948 cdouble += (NV)au32;
1953 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1954 case 'V' | TYPE_IS_SHRIEKING:
1955 case 'N' | TYPE_IS_SHRIEKING:
1958 # if U32SIZE > SIZE32
1961 SHIFT32(utf8, s, strend, &ai32, datumtype);
1963 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1964 ai32 = (I32)PerlSock_ntohl((U32)ai32);
1967 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1968 ai32 = (I32)vtohl((U32)ai32);
1971 PUSHs(sv_2mortal(newSViv((IV)ai32)));
1972 else if (checksum > bits_in_uv)
1973 cdouble += (NV)ai32;
1978 #endif /* PERL_PACK_CAN_SHRIEKSIGN */
1982 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
1983 DO_BO_UNPACK_PC(aptr);
1984 /* newSVpv generates undef if aptr is NULL */
1985 PUSHs(sv_2mortal(newSVpv(aptr, 0)));
1993 while (len > 0 && s < strend) {
1995 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1996 auv = (auv << 7) | (ch & 0x7f);
1997 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
2000 PUSHs(sv_2mortal(newSVuv(auv)));
2005 if (++bytes >= sizeof(UV)) { /* promote to string */
2008 sv = Perl_newSVpvf(aTHX_ "%.*"UVf, (int)TYPE_DIGITS(UV), auv);
2009 while (s < strend) {
2010 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
2011 sv = mul128(sv, (U8)(ch & 0x7f));
2017 t = SvPV_nolen_const(sv);
2021 PUSHs(sv_2mortal(sv));
2026 if ((s >= strend) && bytes)
2027 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
2031 if (symptr->howlen == e_star)
2032 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
2034 if (sizeof(char*) <= strend - s) {
2036 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
2037 DO_BO_UNPACK_PC(aptr);
2038 /* newSVpvn generates undef if aptr is NULL */
2039 PUSHs(sv_2mortal(newSVpvn(aptr, len)));
2046 SHIFT_VAR(utf8, s, strend, aquad, datumtype);
2047 DO_BO_UNPACK(aquad, 64);
2049 PUSHs(sv_2mortal(aquad >= IV_MIN && aquad <= IV_MAX ?
2050 newSViv((IV)aquad) : newSVnv((NV)aquad)));
2051 else if (checksum > bits_in_uv)
2052 cdouble += (NV)aquad;
2060 SHIFT_VAR(utf8, s, strend, auquad, datumtype);
2061 DO_BO_UNPACK(auquad, 64);
2063 PUSHs(sv_2mortal(auquad <= UV_MAX ?
2064 newSVuv((UV)auquad):newSVnv((NV)auquad)));
2065 else if (checksum > bits_in_uv)
2066 cdouble += (NV)auquad;
2071 #endif /* HAS_QUAD */
2072 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
2076 SHIFT_VAR(utf8, s, strend, afloat, datumtype);
2077 DO_BO_UNPACK_N(afloat, float);
2079 PUSHs(sv_2mortal(newSVnv((NV)afloat)));
2087 SHIFT_VAR(utf8, s, strend, adouble, datumtype);
2088 DO_BO_UNPACK_N(adouble, double);
2090 PUSHs(sv_2mortal(newSVnv((NV)adouble)));
2098 SHIFT_VAR(utf8, s, strend, anv, datumtype);
2099 DO_BO_UNPACK_N(anv, NV);
2101 PUSHs(sv_2mortal(newSVnv(anv)));
2106 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2109 long double aldouble;
2110 SHIFT_VAR(utf8, s, strend, aldouble, datumtype);
2111 DO_BO_UNPACK_N(aldouble, long double);
2113 PUSHs(sv_2mortal(newSVnv((NV)aldouble)));
2115 cdouble += aldouble;
2121 * Initialise the decode mapping. By using a table driven
2122 * algorithm, the code will be character-set independent
2123 * (and just as fast as doing character arithmetic)
2125 if (PL_uudmap['M'] == 0) {
2128 for (i = 0; i < sizeof(PL_uuemap); i += 1)
2129 PL_uudmap[(U8)PL_uuemap[i]] = i;
2131 * Because ' ' and '`' map to the same value,
2132 * we need to decode them both the same.
2137 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
2138 sv = sv_2mortal(NEWSV(42, l));
2139 if (l) SvPOK_on(sv);
2142 while (next_uni_uu(aTHX_ &s, strend, &len)) {
2148 next_uni_uu(aTHX_ &s, strend, &a);
2149 next_uni_uu(aTHX_ &s, strend, &b);
2150 next_uni_uu(aTHX_ &s, strend, &c);
2151 next_uni_uu(aTHX_ &s, strend, &d);
2152 hunk[0] = (char)((a << 2) | (b >> 4));
2153 hunk[1] = (char)((b << 4) | (c >> 2));
2154 hunk[2] = (char)((c << 6) | d);
2155 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2163 /* possible checksum byte */
2164 const char *skip = s+UTF8SKIP(s);
2165 if (skip < strend && *skip == '\n')
2171 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
2176 len = PL_uudmap[*(U8*)s++] & 077;
2178 if (s < strend && ISUUCHAR(*s))
2179 a = PL_uudmap[*(U8*)s++] & 077;
2182 if (s < strend && ISUUCHAR(*s))
2183 b = PL_uudmap[*(U8*)s++] & 077;
2186 if (s < strend && ISUUCHAR(*s))
2187 c = PL_uudmap[*(U8*)s++] & 077;
2190 if (s < strend && ISUUCHAR(*s))
2191 d = PL_uudmap[*(U8*)s++] & 077;
2194 hunk[0] = (char)((a << 2) | (b >> 4));
2195 hunk[1] = (char)((b << 4) | (c >> 2));
2196 hunk[2] = (char)((c << 6) | d);
2197 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2202 else /* possible checksum byte */
2203 if (s + 1 < strend && s[1] == '\n')
2212 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
2213 (checksum > bits_in_uv &&
2214 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
2217 anv = (NV) (1 << (checksum & 15));
2218 while (checksum >= 16) {
2222 while (cdouble < 0.0)
2224 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
2225 sv = newSVnv(cdouble);
2228 if (checksum < bits_in_uv) {
2229 UV mask = ((UV)1 << checksum) - 1;
2234 XPUSHs(sv_2mortal(sv));
2238 if (symptr->flags & FLAG_SLASH){
2239 if (SP - PL_stack_base - start_sp_offset <= 0)
2240 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
2241 if( next_symbol(symptr) ){
2242 if( symptr->howlen == e_number )
2243 Perl_croak(aTHX_ "Count after length/code in unpack" );
2245 /* ...end of char buffer then no decent length available */
2246 Perl_croak(aTHX_ "length/code after end of string in unpack" );
2248 /* take top of stack (hope it's numeric) */
2251 Perl_croak(aTHX_ "Negative '/' count in unpack" );
2254 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
2256 datumtype = symptr->code;
2257 explicit_length = FALSE;
2265 return SP - PL_stack_base - start_sp_offset;
2272 I32 gimme = GIMME_V;
2275 const char *pat = SvPV_const(left, llen);
2276 const char *s = SvPV_const(right, rlen);
2277 const char *strend = s + rlen;
2278 const char *patend = pat + llen;
2282 cnt = unpackstring(pat, patend, s, strend,
2283 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
2284 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
2287 if ( !cnt && gimme == G_SCALAR )
2288 PUSHs(&PL_sv_undef);
2293 doencodes(U8 *h, const char *s, I32 len)
2295 *h++ = PL_uuemap[len];
2297 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2298 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
2299 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
2300 *h++ = PL_uuemap[(077 & (s[2] & 077))];
2305 const char r = (len > 1 ? s[1] : '\0');
2306 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2307 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
2308 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
2309 *h++ = PL_uuemap[0];
2316 S_is_an_int(pTHX_ const char *s, STRLEN l)
2318 SV *result = newSVpvn(s, l);
2319 char *const result_c = SvPV_nolen(result); /* convenience */
2320 char *out = result_c;
2330 SvREFCNT_dec(result);
2353 SvREFCNT_dec(result);
2359 SvCUR_set(result, out - result_c);
2363 /* pnum must be '\0' terminated */
2365 S_div128(pTHX_ SV *pnum, bool *done)
2368 char * const s = SvPV(pnum, len);
2374 const int i = m * 10 + (*t - '0');
2375 const int r = (i >> 7); /* r < 10 */
2383 SvCUR_set(pnum, (STRLEN) (t - s));
2388 =for apidoc pack_cat
2390 The engine implementing pack() Perl function. Note: parameters next_in_list and
2391 flags are not used. This call should not be used; use packlist instead.
2397 Perl_pack_cat(pTHX_ SV *cat, const char *pat, const char *patend, register SV **beglist, SV **endlist, SV ***next_in_list, U32 flags)
2400 PERL_UNUSED_ARG(next_in_list);
2401 PERL_UNUSED_ARG(flags);
2403 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
2405 (void)pack_rec( cat, &sym, beglist, endlist );
2410 =for apidoc packlist
2412 The engine implementing pack() Perl function.
2418 Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, register SV **beglist, SV **endlist )
2423 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
2425 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
2426 Also make sure any UTF8 flag is loaded */
2427 SvPV_force(cat, no_len);
2428 if (DO_UTF8(cat)) sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
2430 (void)pack_rec( cat, &sym, beglist, endlist );
2433 /* like sv_utf8_upgrade, but also repoint the group start markers */
2435 marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2438 const char *from_ptr, *from_start, *from_end, **marks, **m;
2439 char *to_start, *to_ptr;
2441 if (SvUTF8(sv)) return;
2443 from_start = SvPVX_const(sv);
2444 from_end = from_start + SvCUR(sv);
2445 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2446 if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
2447 if (from_ptr == from_end) {
2448 /* Simple case: no character needs to be changed */
2453 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
2454 Newx(to_start, len, char);
2455 Copy(from_start, to_start, from_ptr-from_start, char);
2456 to_ptr = to_start + (from_ptr-from_start);
2458 Newx(marks, sym_ptr->level+2, const char *);
2459 for (group=sym_ptr; group; group = group->previous)
2460 marks[group->level] = from_start + group->strbeg;
2461 marks[sym_ptr->level+1] = from_end+1;
2462 for (m = marks; *m < from_ptr; m++)
2463 *m = to_start + (*m-from_start);
2465 for (;from_ptr < from_end; from_ptr++) {
2466 while (*m == from_ptr) *m++ = to_ptr;
2467 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
2471 while (*m == from_ptr) *m++ = to_ptr;
2472 if (m != marks + sym_ptr->level+1) {
2475 Perl_croak(aTHX_ "Assertion: marks beyond string end");
2477 for (group=sym_ptr; group; group = group->previous)
2478 group->strbeg = marks[group->level] - to_start;
2483 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
2484 from_start -= SvIVX(sv);
2487 SvFLAGS(sv) &= ~SVf_OOK;
2490 Safefree(from_start);
2491 SvPV_set(sv, to_start);
2492 SvCUR_set(sv, to_ptr - to_start);
2497 /* Exponential string grower. Makes string extension effectively O(n)
2498 needed says how many extra bytes we need (not counting the final '\0')
2499 Only grows the string if there is an actual lack of space
2502 sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
2503 const STRLEN cur = SvCUR(sv);
2504 const STRLEN len = SvLEN(sv);
2506 if (len - cur > needed) return SvPVX(sv);
2507 extend = needed > len ? needed : len;
2508 return SvGROW(sv, len+extend+1);
2513 S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
2515 tempsym_t lookahead;
2516 I32 items = endlist - beglist;
2517 bool found = next_symbol(symptr);
2518 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
2519 bool warn_utf8 = ckWARN(WARN_UTF8);
2521 if (symptr->level == 0 && found && symptr->code == 'U') {
2522 marked_upgrade(aTHX_ cat, symptr);
2523 symptr->flags |= FLAG_DO_UTF8;
2526 symptr->strbeg = SvCUR(cat);
2532 SV *lengthcode = Nullsv;
2533 I32 datumtype = symptr->code;
2534 howlen_t howlen = symptr->howlen;
2535 char *start = SvPVX(cat);
2536 char *cur = start + SvCUR(cat);
2538 #define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2542 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2546 /* e_no_len and e_number */
2547 len = symptr->length;
2552 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
2554 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2555 /* We can process this letter. */
2556 STRLEN size = props & PACK_SIZE_MASK;
2557 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2561 /* Look ahead for next symbol. Do we have code/code? */
2562 lookahead = *symptr;
2563 found = next_symbol(&lookahead);
2564 if (symptr->flags & FLAG_SLASH) {
2566 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
2567 if (strchr("aAZ", lookahead.code)) {
2568 if (lookahead.howlen == e_number) count = lookahead.length;
2571 count = DO_UTF8(*beglist) ?
2572 sv_len_utf8(*beglist) : sv_len(*beglist);
2574 if (lookahead.code == 'Z') count++;
2577 if (lookahead.howlen == e_number && lookahead.length < items)
2578 count = lookahead.length;
2581 lookahead.howlen = e_number;
2582 lookahead.length = count;
2583 lengthcode = sv_2mortal(newSViv(count));
2586 /* Code inside the switch must take care to properly update
2587 cat (CUR length and '\0' termination) if it updated *cur and
2588 doesn't simply leave using break */
2589 switch(TYPE_NO_ENDIANNESS(datumtype)) {
2591 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2592 (int) TYPE_NO_MODIFIERS(datumtype));
2594 Perl_croak(aTHX_ "'%%' may not be used in pack");
2597 #ifdef PERL_PACK_CAN_SHRIEKSIGN
2598 case '.' | TYPE_IS_SHRIEKING:
2601 if (howlen == e_star) from = start;
2602 else if (len == 0) from = cur;
2604 tempsym_t *group = symptr;
2606 while (--len && group) group = group->previous;
2607 from = group ? start + group->strbeg : start;
2610 len = SvIV(fromstr);
2612 #ifdef PERL_PACK_CAN_SHRIEKSIGN
2613 case '@' | TYPE_IS_SHRIEKING:
2616 from = start + symptr->strbeg;
2618 #ifdef PERL_PACK_CAN_SHRIEKSIGN
2619 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
2620 #else /* PERL_PACK_CAN_SHRIEKSIGN */
2624 while (len && from < cur) {
2625 from += UTF8SKIP(from);
2629 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2631 /* Here we know from == cur */
2633 GROWING(0, cat, start, cur, len);
2634 Zero(cur, len, char);
2636 } else if (from < cur) {
2639 } else goto no_change;
2647 if (len > 0) goto grow;
2648 if (len == 0) goto no_change;
2655 tempsym_t savsym = *symptr;
2656 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2657 symptr->flags |= group_modifiers;
2658 symptr->patend = savsym.grpend;
2660 symptr->previous = &lookahead;
2663 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2664 else symptr->flags &= ~FLAG_PARSE_UTF8;
2665 was_utf8 = SvUTF8(cat);
2666 symptr->patptr = savsym.grpbeg;
2667 beglist = pack_rec(cat, symptr, beglist, endlist);
2668 if (SvUTF8(cat) != was_utf8)
2669 /* This had better be an upgrade while in utf8==0 mode */
2672 if (savsym.howlen == e_star && beglist == endlist)
2673 break; /* No way to continue */
2675 lookahead.flags = symptr->flags & ~group_modifiers;
2678 case 'X' | TYPE_IS_SHRIEKING:
2679 if (!len) /* Avoid division by 0 */
2686 hop += UTF8SKIP(hop);
2693 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2697 len = (cur-start) % len;
2701 if (len < 1) goto no_change;
2705 Perl_croak(aTHX_ "'%c' outside of string in pack",
2706 (int) TYPE_NO_MODIFIERS(datumtype));
2707 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2709 Perl_croak(aTHX_ "'%c' outside of string in pack",
2710 (int) TYPE_NO_MODIFIERS(datumtype));
2716 if (cur - start < len)
2717 Perl_croak(aTHX_ "'%c' outside of string in pack",
2718 (int) TYPE_NO_MODIFIERS(datumtype));
2721 if (cur < start+symptr->strbeg) {
2722 /* Make sure group starts don't point into the void */
2724 const STRLEN length = cur-start;
2725 for (group = symptr;
2726 group && length < group->strbeg;
2727 group = group->previous) group->strbeg = length;
2728 lookahead.strbeg = length;
2731 case 'x' | TYPE_IS_SHRIEKING: {
2733 if (!len) /* Avoid division by 0 */
2735 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
2736 else ai32 = (cur - start) % len;
2737 if (ai32 == 0) goto no_change;
2749 aptr = SvPV_const(fromstr, fromlen);
2750 if (DO_UTF8(fromstr)) {
2751 const char *end, *s;
2753 if (!utf8 && !SvUTF8(cat)) {
2754 marked_upgrade(aTHX_ cat, symptr);
2755 lookahead.flags |= FLAG_DO_UTF8;
2756 lookahead.strbeg = symptr->strbeg;
2759 cur = start + SvCUR(cat);
2761 if (howlen == e_star) {
2762 if (utf8) goto string_copy;
2766 end = aptr + fromlen;
2767 fromlen = datumtype == 'Z' ? len-1 : len;
2768 while ((I32) fromlen > 0 && s < end) {
2773 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2776 if (datumtype == 'Z') len++;
2782 fromlen = len - fromlen;
2783 if (datumtype == 'Z') fromlen--;
2784 if (howlen == e_star) {
2786 if (datumtype == 'Z') len++;
2788 GROWING(0, cat, start, cur, len);
2789 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
2790 datumtype | TYPE_IS_PACK))
2791 Perl_croak(aTHX_ "Perl bug: predicted utf8 length not available");
2795 if (howlen == e_star) {
2797 if (datumtype == 'Z') len++;
2799 if (len <= (I32) fromlen) {
2801 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2803 /* assumes a byte expands to at most UTF8_EXPAND bytes on
2805 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2806 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
2808 while (fromlen > 0) {
2809 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
2815 if (howlen == e_star) {
2817 if (datumtype == 'Z') len++;
2819 if (len <= (I32) fromlen) {
2821 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2823 GROWING(0, cat, start, cur, len);
2824 Copy(aptr, cur, fromlen, char);
2828 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2834 const char *str, *end;
2841 str = SvPV_const(fromstr, fromlen);
2842 end = str + fromlen;
2843 if (DO_UTF8(fromstr)) {
2845 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2847 utf8_source = FALSE;
2848 utf8_flags = 0; /* Unused, but keep compilers happy */
2850 if (howlen == e_star) len = fromlen;
2851 field_len = (len+7)/8;
2852 GROWING(utf8, cat, start, cur, field_len);
2853 if (len > (I32)fromlen) len = fromlen;
2856 if (datumtype == 'B')
2860 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2862 } else bits |= *str++ & 1;
2863 if (l & 7) bits <<= 1;
2865 PUSH_BYTE(utf8, cur, bits);
2870 /* datumtype == 'b' */
2874 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2875 if (val & 1) bits |= 0x80;
2876 } else if (*str++ & 1)
2878 if (l & 7) bits >>= 1;
2880 PUSH_BYTE(utf8, cur, bits);
2886 if (datumtype == 'B')
2887 bits <<= 7 - (l & 7);
2889 bits >>= 7 - (l & 7);
2890 PUSH_BYTE(utf8, cur, bits);
2893 /* Determine how many chars are left in the requested field */
2895 if (howlen == e_star) field_len = 0;
2896 else field_len -= l;
2897 Zero(cur, field_len, char);
2903 const char *str, *end;
2910 str = SvPV_const(fromstr, fromlen);
2911 end = str + fromlen;
2912 if (DO_UTF8(fromstr)) {
2914 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2916 utf8_source = FALSE;
2917 utf8_flags = 0; /* Unused, but keep compilers happy */
2919 if (howlen == e_star) len = fromlen;
2920 field_len = (len+1)/2;
2921 GROWING(utf8, cat, start, cur, field_len);
2922 if (!utf8 && len > (I32)fromlen) len = fromlen;
2925 if (datumtype == 'H')
2929 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2930 if (val < 256 && isALPHA(val))
2931 bits |= (val + 9) & 0xf;
2934 } else if (isALPHA(*str))
2935 bits |= (*str++ + 9) & 0xf;
2937 bits |= *str++ & 0xf;
2938 if (l & 1) bits <<= 4;
2940 PUSH_BYTE(utf8, cur, bits);
2948 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2949 if (val < 256 && isALPHA(val))
2950 bits |= ((val + 9) & 0xf) << 4;
2952 bits |= (val & 0xf) << 4;
2953 } else if (isALPHA(*str))
2954 bits |= ((*str++ + 9) & 0xf) << 4;
2956 bits |= (*str++ & 0xf) << 4;
2957 if (l & 1) bits >>= 4;
2959 PUSH_BYTE(utf8, cur, bits);
2965 PUSH_BYTE(utf8, cur, bits);
2968 /* Determine how many chars are left in the requested field */
2970 if (howlen == e_star) field_len = 0;
2971 else field_len -= l;
2972 Zero(cur, field_len, char);
2980 aiv = SvIV(fromstr);
2981 if ((-128 > aiv || aiv > 127) &&
2983 Perl_warner(aTHX_ packWARN(WARN_PACK),
2984 "Character in 'c' format wrapped in pack");
2985 PUSH_BYTE(utf8, cur, aiv & 0xff);
2990 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2993 GROWING(0, cat, start, cur, len);
2997 aiv = SvIV(fromstr);
2998 if ((0 > aiv || aiv > 0xff) &&
3000 Perl_warner(aTHX_ packWARN(WARN_PACK),
3001 "Character in 'C' format wrapped in pack");
3002 *cur++ = aiv & 0xff;
3007 U8 in_bytes = IN_BYTES;
3009 end = start+SvLEN(cat)-1;
3010 if (utf8) end -= UTF8_MAXLEN-1;
3014 auv = SvUV(fromstr);
3015 if (in_bytes) auv = auv % 0x100;
3020 SvCUR_set(cat, cur - start);
3022 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3023 end = start+SvLEN(cat)-UTF8_MAXLEN;
3025 cur = (char *) uvuni_to_utf8_flags((U8 *) cur,
3028 0 : UNICODE_ALLOW_ANY);
3033 SvCUR_set(cat, cur - start);
3034 marked_upgrade(aTHX_ cat, symptr);
3035 lookahead.flags |= FLAG_DO_UTF8;
3036 lookahead.strbeg = symptr->strbeg;
3039 cur = start + SvCUR(cat);
3040 end = start+SvLEN(cat)-UTF8_MAXLEN;
3043 if (ckWARN(WARN_PACK))
3044 Perl_warner(aTHX_ packWARN(WARN_PACK),
3045 "Character in 'W' format wrapped in pack");
3050 SvCUR_set(cat, cur - start);
3051 GROWING(0, cat, start, cur, len+1);
3052 end = start+SvLEN(cat)-1;
3054 *(U8 *) cur++ = (U8)auv;
3063 if (!(symptr->flags & FLAG_DO_UTF8)) {
3064 marked_upgrade(aTHX_ cat, symptr);
3065 lookahead.flags |= FLAG_DO_UTF8;
3066 lookahead.strbeg = symptr->strbeg;
3072 end = start+SvLEN(cat);
3073 if (!utf8) end -= UTF8_MAXLEN;
3077 auv = SvUV(fromstr);
3079 U8 buffer[UTF8_MAXLEN], *endb;
3080 endb = uvuni_to_utf8_flags(buffer, auv,
3082 0 : UNICODE_ALLOW_ANY);
3083 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
3085 SvCUR_set(cat, cur - start);
3086 GROWING(0, cat, start, cur,
3087 len+(endb-buffer)*UTF8_EXPAND);
3088 end = start+SvLEN(cat);
3090 bytes_to_uni(aTHX_ buffer, endb-buffer, &cur);
3094 SvCUR_set(cat, cur - start);
3095 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3096 end = start+SvLEN(cat)-UTF8_MAXLEN;
3098 cur = (char *) uvuni_to_utf8_flags((U8 *) cur, auv,
3100 0 : UNICODE_ALLOW_ANY);
3105 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
3111 anv = SvNV(fromstr);
3113 /* VOS does not automatically map a floating-point overflow
3114 during conversion from double to float into infinity, so we
3115 do it by hand. This code should either be generalized for
3116 any OS that needs it, or removed if and when VOS implements
3117 posix-976 (suggestion to support mapping to infinity).
3118 Paul.Green@stratus.com 02-04-02. */
3120 afloat = _float_constants[0]; /* single prec. inf. */
3121 else if (anv < -FLT_MAX)
3122 afloat = _float_constants[0]; /* single prec. inf. */
3123 else afloat = (float) anv;
3125 # if defined(VMS) && !defined(__IEEE_FP)
3126 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
3127 * on Alpha; fake it if we don't have them.
3131 else if (anv < -FLT_MAX)
3133 else afloat = (float)anv;
3135 afloat = (float)anv;
3137 #endif /* __VOS__ */
3138 DO_BO_PACK_N(afloat, float);
3139 PUSH_VAR(utf8, cur, afloat);
3147 anv = SvNV(fromstr);
3149 /* VOS does not automatically map a floating-point overflow
3150 during conversion from long double to double into infinity,
3151 so we do it by hand. This code should either be generalized
3152 for any OS that needs it, or removed if and when VOS
3153 implements posix-976 (suggestion to support mapping to
3154 infinity). Paul.Green@stratus.com 02-04-02. */
3156 adouble = _double_constants[0]; /* double prec. inf. */
3157 else if (anv < -DBL_MAX)
3158 adouble = _double_constants[0]; /* double prec. inf. */
3159 else adouble = (double) anv;
3161 # if defined(VMS) && !defined(__IEEE_FP)
3162 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
3163 * on Alpha; fake it if we don't have them.
3167 else if (anv < -DBL_MAX)
3169 else adouble = (double)anv;
3171 adouble = (double)anv;
3173 #endif /* __VOS__ */
3174 DO_BO_PACK_N(adouble, double);
3175 PUSH_VAR(utf8, cur, adouble);
3180 Zero(&anv, 1, NV); /* can be long double with unused bits */
3183 anv = SvNV(fromstr);
3184 DO_BO_PACK_N(anv, NV);
3185 PUSH_VAR(utf8, cur, anv);
3189 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
3191 long double aldouble;
3192 /* long doubles can have unused bits, which may be nonzero */
3193 Zero(&aldouble, 1, long double);
3196 aldouble = (long double)SvNV(fromstr);
3197 DO_BO_PACK_N(aldouble, long double);
3198 PUSH_VAR(utf8, cur, aldouble);
3203 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3204 case 'n' | TYPE_IS_SHRIEKING:
3210 ai16 = (I16)SvIV(fromstr);
3212 ai16 = PerlSock_htons(ai16);
3214 PUSH16(utf8, cur, &ai16);
3217 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3218 case 'v' | TYPE_IS_SHRIEKING:
3224 ai16 = (I16)SvIV(fromstr);
3228 PUSH16(utf8, cur, &ai16);
3231 case 'S' | TYPE_IS_SHRIEKING:
3232 #if SHORTSIZE != SIZE16
3234 unsigned short aushort;
3236 aushort = SvUV(fromstr);
3237 DO_BO_PACK(aushort, s);
3238 PUSH_VAR(utf8, cur, aushort);
3248 au16 = (U16)SvUV(fromstr);
3249 DO_BO_PACK(au16, 16);
3250 PUSH16(utf8, cur, &au16);
3253 case 's' | TYPE_IS_SHRIEKING:
3254 #if SHORTSIZE != SIZE16
3258 ashort = SvIV(fromstr);
3259 DO_BO_PACK(ashort, s);
3260 PUSH_VAR(utf8, cur, ashort);
3270 ai16 = (I16)SvIV(fromstr);
3271 DO_BO_PACK(ai16, 16);
3272 PUSH16(utf8, cur, &ai16);
3276 case 'I' | TYPE_IS_SHRIEKING:
3280 auint = SvUV(fromstr);
3281 DO_BO_PACK(auint, i);
3282 PUSH_VAR(utf8, cur, auint);
3289 aiv = SvIV(fromstr);
3290 #if IVSIZE == INTSIZE
3292 #elif IVSIZE == LONGSIZE
3294 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
3295 DO_BO_PACK(aiv, 64);
3297 Perl_croak(aTHX_ "'j' not supported on this platform");
3299 PUSH_VAR(utf8, cur, aiv);
3306 auv = SvUV(fromstr);
3307 #if UVSIZE == INTSIZE
3309 #elif UVSIZE == LONGSIZE
3311 #elif defined(HAS_QUAD) && UVSIZE == U64SIZE
3312 DO_BO_PACK(auv, 64);
3314 Perl_croak(aTHX_ "'J' not supported on this platform");
3316 PUSH_VAR(utf8, cur, auv);
3323 anv = SvNV(fromstr);
3327 SvCUR_set(cat, cur - start);
3328 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
3331 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
3332 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
3333 any negative IVs will have already been got by the croak()
3334 above. IOK is untrue for fractions, so we test them
3335 against UV_MAX_P1. */
3336 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
3337 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
3338 char *in = buf + sizeof(buf);
3339 UV auv = SvUV(fromstr);
3342 *--in = (char)((auv & 0x7f) | 0x80);
3345 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
3346 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3347 in, (buf + sizeof(buf)) - in);
3348 } else if (SvPOKp(fromstr))
3350 else if (SvNOKp(fromstr)) {
3351 /* 10**NV_MAX_10_EXP is the largest power of 10
3352 so 10**(NV_MAX_10_EXP+1) is definately unrepresentable
3353 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
3354 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
3355 And with that many bytes only Inf can overflow.
3356 Some C compilers are strict about integral constant
3357 expressions so we conservatively divide by a slightly
3358 smaller integer instead of multiplying by the exact
3359 floating-point value.
3361 #ifdef NV_MAX_10_EXP
3362 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
3363 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
3365 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
3366 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
3368 char *in = buf + sizeof(buf);
3370 anv = Perl_floor(anv);
3372 const NV next = Perl_floor(anv / 128);
3373 if (in <= buf) /* this cannot happen ;-) */
3374 Perl_croak(aTHX_ "Cannot compress integer in pack");
3375 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
3378 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
3379 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3380 in, (buf + sizeof(buf)) - in);
3389 /* Copy string and check for compliance */
3390 from = SvPV_const(fromstr, len);
3391 if ((norm = is_an_int(from, len)) == NULL)
3392 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
3394 Newx(result, len, char);
3397 while (!done) *--in = div128(norm, &done) | 0x80;
3398 result[len - 1] &= 0x7F; /* clear continue bit */
3399 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3400 in, (result + len) - in);
3402 SvREFCNT_dec(norm); /* free norm */
3407 case 'i' | TYPE_IS_SHRIEKING:
3411 aint = SvIV(fromstr);
3412 DO_BO_PACK(aint, i);
3413 PUSH_VAR(utf8, cur, aint);
3416 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3417 case 'N' | TYPE_IS_SHRIEKING:
3423 au32 = SvUV(fromstr);
3425 au32 = PerlSock_htonl(au32);
3427 PUSH32(utf8, cur, &au32);
3430 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3431 case 'V' | TYPE_IS_SHRIEKING:
3437 au32 = SvUV(fromstr);
3441 PUSH32(utf8, cur, &au32);
3444 case 'L' | TYPE_IS_SHRIEKING:
3445 #if LONGSIZE != SIZE32
3447 unsigned long aulong;
3449 aulong = SvUV(fromstr);
3450 DO_BO_PACK(aulong, l);
3451 PUSH_VAR(utf8, cur, aulong);
3461 au32 = SvUV(fromstr);
3462 DO_BO_PACK(au32, 32);
3463 PUSH32(utf8, cur, &au32);
3466 case 'l' | TYPE_IS_SHRIEKING:
3467 #if LONGSIZE != SIZE32
3471 along = SvIV(fromstr);
3472 DO_BO_PACK(along, l);
3473 PUSH_VAR(utf8, cur, along);
3483 ai32 = SvIV(fromstr);
3484 DO_BO_PACK(ai32, 32);
3485 PUSH32(utf8, cur, &ai32);
3493 auquad = (Uquad_t) SvUV(fromstr);
3494 DO_BO_PACK(auquad, 64);
3495 PUSH_VAR(utf8, cur, auquad);
3502 aquad = (Quad_t)SvIV(fromstr);
3503 DO_BO_PACK(aquad, 64);
3504 PUSH_VAR(utf8, cur, aquad);
3507 #endif /* HAS_QUAD */
3509 len = 1; /* assume SV is correct length */
3510 GROWING(utf8, cat, start, cur, sizeof(char *));
3517 SvGETMAGIC(fromstr);
3518 if (!SvOK(fromstr)) aptr = NULL;
3521 /* XXX better yet, could spirit away the string to
3522 * a safe spot and hang on to it until the result
3523 * of pack() (and all copies of the result) are
3526 if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
3527 !SvREADONLY(fromstr))) && ckWARN(WARN_PACK)) {
3528 Perl_warner(aTHX_ packWARN(WARN_PACK),
3529 "Attempt to pack pointer to temporary value");
3531 if (SvPOK(fromstr) || SvNIOK(fromstr))
3532 aptr = SvPV_nomg_const(fromstr, n_a);
3534 aptr = SvPV_force_flags(fromstr, n_a, 0);
3536 DO_BO_PACK_PC(aptr);
3537 PUSH_VAR(utf8, cur, aptr);
3541 const char *aptr, *aend;
3545 if (len <= 2) len = 45;
3546 else len = len / 3 * 3;
3548 Perl_warner(aTHX_ packWARN(WARN_PACK),
3549 "Field too wide in 'u' format in pack");
3552 aptr = SvPV_const(fromstr, fromlen);
3553 from_utf8 = DO_UTF8(fromstr);
3555 aend = aptr + fromlen;
3556 fromlen = sv_len_utf8(fromstr);
3557 } else aend = NULL; /* Unused, but keep compilers happy */
3558 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
3559 while (fromlen > 0) {
3562 U8 hunk[1+63/3*4+1];
3564 if ((I32)fromlen > len)
3570 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3571 'u' | TYPE_IS_PACK)) {
3573 SvCUR_set(cat, cur - start);
3574 Perl_croak(aTHX_ "Assertion: string is shorter than advertised");
3576 end = doencodes(hunk, buffer, todo);
3578 end = doencodes(hunk, aptr, todo);
3581 PUSH_BYTES(utf8, cur, hunk, end-hunk);
3588 SvCUR_set(cat, cur - start);
3590 *symptr = lookahead;
3599 dSP; dMARK; dORIGMARK; dTARGET;
3600 register SV *cat = TARG;
3602 SV *pat_sv = *++MARK;
3603 register const char *pat = SvPV_const(pat_sv, fromlen);
3604 register const char *patend = pat + fromlen;
3607 sv_setpvn(cat, "", 0);
3610 packlist(cat, pat, patend, MARK, SP + 1);
3620 * c-indentation-style: bsd
3622 * indent-tabs-mode: t
3625 * ex: set ts=8 sts=4 sw=4 noet: