This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
misc MAD coredump fixes and parser leak fixes
[perl5.git] / pp_pack.c
... / ...
CommitLineData
1/* pp_pack.c
2 *
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
4 * 2000, 2001, 2002, 2003, 2004, 2005, 2006, by Larry Wall and others
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
9 */
10
11/*
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
16 * some salt.
17 */
18
19/* This file contains pp ("push/pop") functions that
20 * execute the opcodes that make up a perl program. A typical pp function
21 * expects to find its arguments on the stack, and usually pushes its
22 * results onto the stack, hence the 'pp' terminology. Each OP structure
23 * contains a pointer to the relevant pp_foo() function.
24 *
25 * This particular file just contains pp_pack() and pp_unpack(). See the
26 * other pp*.c files for the rest of the pp_ functions.
27 */
28
29
30#include "EXTERN.h"
31#define PERL_IN_PP_PACK_C
32#include "perl.h"
33
34/* Types used by pack/unpack */
35typedef enum {
36 e_no_len, /* no length */
37 e_number, /* number, [] */
38 e_star /* asterisk */
39} howlen_t;
40
41typedef struct tempsym {
42 const char* patptr; /* current template char */
43 const char* patend; /* one after last char */
44 const char* grpbeg; /* 1st char of ()-group */
45 const char* grpend; /* end of ()-group */
46 I32 code; /* template code (!<>) */
47 I32 length; /* length/repeat count */
48 howlen_t howlen; /* how length is given */
49 int level; /* () nesting level */
50 U32 flags; /* /=4, comma=2, pack=1 */
51 /* and group modifiers */
52 STRLEN strbeg; /* offset of group start */
53 struct tempsym *previous; /* previous group */
54} tempsym_t;
55
56#define TEMPSYM_INIT(symptr, p, e, f) \
57 STMT_START { \
58 (symptr)->patptr = (p); \
59 (symptr)->patend = (e); \
60 (symptr)->grpbeg = NULL; \
61 (symptr)->grpend = NULL; \
62 (symptr)->grpend = NULL; \
63 (symptr)->code = 0; \
64 (symptr)->length = 0; \
65 (symptr)->howlen = e_no_len; \
66 (symptr)->level = 0; \
67 (symptr)->flags = (f); \
68 (symptr)->strbeg = 0; \
69 (symptr)->previous = NULL; \
70 } STMT_END
71
72#if PERL_VERSION >= 9
73# define PERL_PACK_CAN_BYTEORDER
74# define PERL_PACK_CAN_SHRIEKSIGN
75#endif
76
77#ifndef CHAR_BIT
78# define CHAR_BIT 8
79#endif
80/* Maximum number of bytes to which a byte can grow due to upgrade */
81#define UTF8_EXPAND 2
82
83/*
84 * Offset for integer pack/unpack.
85 *
86 * On architectures where I16 and I32 aren't really 16 and 32 bits,
87 * which for now are all Crays, pack and unpack have to play games.
88 */
89
90/*
91 * These values are required for portability of pack() output.
92 * If they're not right on your machine, then pack() and unpack()
93 * wouldn't work right anyway; you'll need to apply the Cray hack.
94 * (I'd like to check them with #if, but you can't use sizeof() in
95 * the preprocessor.) --???
96 */
97/*
98 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
99 defines are now in config.h. --Andy Dougherty April 1998
100 */
101#define SIZE16 2
102#define SIZE32 4
103
104/* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
105 --jhi Feb 1999 */
106
107#if U16SIZE > SIZE16 || U32SIZE > SIZE32
108# if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
109# define OFF16(p) ((char*)(p))
110# define OFF32(p) ((char*)(p))
111# else
112# if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
113# define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
114# define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
115# else
116 ++++ bad cray byte order
117# endif
118# endif
119#else
120# define OFF16(p) ((char *) (p))
121# define OFF32(p) ((char *) (p))
122#endif
123
124/* Only to be used inside a loop (see the break) */
125#define SHIFT16(utf8, s, strend, p, datumtype) STMT_START { \
126 if (utf8) { \
127 if (!uni_to_bytes(aTHX_ &(s), strend, OFF16(p), SIZE16, datumtype)) break; \
128 } else { \
129 Copy(s, OFF16(p), SIZE16, char); \
130 (s) += SIZE16; \
131 } \
132} STMT_END
133
134/* Only to be used inside a loop (see the break) */
135#define SHIFT32(utf8, s, strend, p, datumtype) STMT_START { \
136 if (utf8) { \
137 if (!uni_to_bytes(aTHX_ &(s), strend, OFF32(p), SIZE32, datumtype)) break; \
138 } else { \
139 Copy(s, OFF32(p), SIZE32, char); \
140 (s) += SIZE32; \
141 } \
142} STMT_END
143
144#define PUSH16(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF16(p), SIZE16)
145#define PUSH32(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF32(p), SIZE32)
146
147/* Only to be used inside a loop (see the break) */
148#define SHIFT_VAR(utf8, s, strend, var, datumtype) \
149STMT_START { \
150 if (utf8) { \
151 if (!uni_to_bytes(aTHX_ &s, strend, \
152 (char *) &var, sizeof(var), datumtype)) break;\
153 } else { \
154 Copy(s, (char *) &var, sizeof(var), char); \
155 s += sizeof(var); \
156 } \
157} STMT_END
158
159#define PUSH_VAR(utf8, aptr, var) \
160 PUSH_BYTES(utf8, aptr, &(var), sizeof(var))
161
162/* Avoid stack overflow due to pathological templates. 100 should be plenty. */
163#define MAX_SUB_TEMPLATE_LEVEL 100
164
165/* flags (note that type modifiers can also be used as flags!) */
166#define FLAG_WAS_UTF8 0x40
167#define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
168#define FLAG_UNPACK_ONLY_ONE 0x10
169#define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
170#define FLAG_SLASH 0x04
171#define FLAG_COMMA 0x02
172#define FLAG_PACK 0x01
173
174STATIC SV *
175S_mul128(pTHX_ SV *sv, U8 m)
176{
177 STRLEN len;
178 char *s = SvPV(sv, len);
179 char *t;
180
181 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
182 SV * const tmpNew = newSVpvs("0000000000");
183
184 sv_catsv(tmpNew, sv);
185 SvREFCNT_dec(sv); /* free old sv */
186 sv = tmpNew;
187 s = SvPV(sv, len);
188 }
189 t = s + len - 1;
190 while (!*t) /* trailing '\0'? */
191 t--;
192 while (t > s) {
193 const U32 i = ((*t - '0') << 7) + m;
194 *(t--) = '0' + (char)(i % 10);
195 m = (char)(i / 10);
196 }
197 return (sv);
198}
199
200/* Explosives and implosives. */
201
202#if 'I' == 73 && 'J' == 74
203/* On an ASCII/ISO kind of system */
204#define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
205#else
206/*
207 Some other sort of character set - use memchr() so we don't match
208 the null byte.
209 */
210#define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
211#endif
212
213/* type modifiers */
214#define TYPE_IS_SHRIEKING 0x100
215#define TYPE_IS_BIG_ENDIAN 0x200
216#define TYPE_IS_LITTLE_ENDIAN 0x400
217#define TYPE_IS_PACK 0x800
218#define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
219#define TYPE_MODIFIERS(t) ((t) & ~0xFF)
220#define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
221
222#ifdef PERL_PACK_CAN_SHRIEKSIGN
223# define SHRIEKING_ALLOWED_TYPES "sSiIlLxXnNvV@."
224#else
225# define SHRIEKING_ALLOWED_TYPES "sSiIlLxX"
226#endif
227
228#ifndef PERL_PACK_CAN_BYTEORDER
229/* Put "can't" first because it is shorter */
230# define TYPE_ENDIANNESS(t) 0
231# define TYPE_NO_ENDIANNESS(t) (t)
232
233# define ENDIANNESS_ALLOWED_TYPES ""
234
235# define DO_BO_UNPACK(var, type)
236# define DO_BO_PACK(var, type)
237# define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast)
238# define DO_BO_PACK_PTR(var, type, pre_cast, post_cast)
239# define DO_BO_UNPACK_N(var, type)
240# define DO_BO_PACK_N(var, type)
241# define DO_BO_UNPACK_P(var)
242# define DO_BO_PACK_P(var)
243# define DO_BO_UNPACK_PC(var)
244# define DO_BO_PACK_PC(var)
245
246#else /* PERL_PACK_CAN_BYTEORDER */
247
248# define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
249# define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
250
251# define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
252
253# define DO_BO_UNPACK(var, type) \
254 STMT_START { \
255 switch (TYPE_ENDIANNESS(datumtype)) { \
256 case TYPE_IS_BIG_ENDIAN: var = my_betoh ## type (var); break; \
257 case TYPE_IS_LITTLE_ENDIAN: var = my_letoh ## type (var); break; \
258 default: break; \
259 } \
260 } STMT_END
261
262# define DO_BO_PACK(var, type) \
263 STMT_START { \
264 switch (TYPE_ENDIANNESS(datumtype)) { \
265 case TYPE_IS_BIG_ENDIAN: var = my_htobe ## type (var); break; \
266 case TYPE_IS_LITTLE_ENDIAN: var = my_htole ## type (var); break; \
267 default: break; \
268 } \
269 } STMT_END
270
271# define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast) \
272 STMT_START { \
273 switch (TYPE_ENDIANNESS(datumtype)) { \
274 case TYPE_IS_BIG_ENDIAN: \
275 var = (post_cast*) my_betoh ## type ((pre_cast) var); \
276 break; \
277 case TYPE_IS_LITTLE_ENDIAN: \
278 var = (post_cast *) my_letoh ## type ((pre_cast) var); \
279 break; \
280 default: \
281 break; \
282 } \
283 } STMT_END
284
285# define DO_BO_PACK_PTR(var, type, pre_cast, post_cast) \
286 STMT_START { \
287 switch (TYPE_ENDIANNESS(datumtype)) { \
288 case TYPE_IS_BIG_ENDIAN: \
289 var = (post_cast *) my_htobe ## type ((pre_cast) var); \
290 break; \
291 case TYPE_IS_LITTLE_ENDIAN: \
292 var = (post_cast *) my_htole ## type ((pre_cast) var); \
293 break; \
294 default: \
295 break; \
296 } \
297 } STMT_END
298
299# define BO_CANT_DOIT(action, type) \
300 STMT_START { \
301 switch (TYPE_ENDIANNESS(datumtype)) { \
302 case TYPE_IS_BIG_ENDIAN: \
303 Perl_croak(aTHX_ "Can't %s big-endian %ss on this " \
304 "platform", #action, #type); \
305 break; \
306 case TYPE_IS_LITTLE_ENDIAN: \
307 Perl_croak(aTHX_ "Can't %s little-endian %ss on this " \
308 "platform", #action, #type); \
309 break; \
310 default: \
311 break; \
312 } \
313 } STMT_END
314
315# if PTRSIZE == INTSIZE
316# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, i, int, void)
317# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, i, int, void)
318# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, i, int, char)
319# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, i, int, char)
320# elif PTRSIZE == LONGSIZE
321# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, long, void)
322# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, long, void)
323# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, long, char)
324# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, long, char)
325# elif PTRSIZE == IVSIZE
326# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, IV, void)
327# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, IV, void)
328# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, IV, char)
329# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, IV, char)
330# else
331# define DO_BO_UNPACK_P(var) BO_CANT_DOIT(unpack, pointer)
332# define DO_BO_PACK_P(var) BO_CANT_DOIT(pack, pointer)
333# define DO_BO_UNPACK_PC(var) BO_CANT_DOIT(unpack, pointer)
334# define DO_BO_PACK_PC(var) BO_CANT_DOIT(pack, pointer)
335# endif
336
337# if defined(my_htolen) && defined(my_letohn) && \
338 defined(my_htoben) && defined(my_betohn)
339# define DO_BO_UNPACK_N(var, type) \
340 STMT_START { \
341 switch (TYPE_ENDIANNESS(datumtype)) { \
342 case TYPE_IS_BIG_ENDIAN: my_betohn(&var, sizeof(type)); break;\
343 case TYPE_IS_LITTLE_ENDIAN: my_letohn(&var, sizeof(type)); break;\
344 default: break; \
345 } \
346 } STMT_END
347
348# define DO_BO_PACK_N(var, type) \
349 STMT_START { \
350 switch (TYPE_ENDIANNESS(datumtype)) { \
351 case TYPE_IS_BIG_ENDIAN: my_htoben(&var, sizeof(type)); break;\
352 case TYPE_IS_LITTLE_ENDIAN: my_htolen(&var, sizeof(type)); break;\
353 default: break; \
354 } \
355 } STMT_END
356# else
357# define DO_BO_UNPACK_N(var, type) BO_CANT_DOIT(unpack, type)
358# define DO_BO_PACK_N(var, type) BO_CANT_DOIT(pack, type)
359# endif
360
361#endif /* PERL_PACK_CAN_BYTEORDER */
362
363#define PACK_SIZE_CANNOT_CSUM 0x80
364#define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
365#define PACK_SIZE_MASK 0x3F
366
367/* These tables are regenerated by genpacksizetables.pl (and then hand pasted
368 in). You're unlikely ever to need to regenerate them. */
369
370#if TYPE_IS_SHRIEKING != 0x100
371 ++++shriek offset should be 256
372#endif
373
374typedef U8 packprops_t;
375#if 'J'-'I' == 1
376/* ASCII */
377STATIC const packprops_t packprops[512] = {
378 /* normal */
379 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
380 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
381 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
382 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
383 0, 0, 0,
384 /* C */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
385#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
386 /* D */ LONG_DOUBLESIZE,
387#else
388 0,
389#endif
390 0,
391 /* F */ NVSIZE,
392 0, 0,
393 /* I */ sizeof(unsigned int),
394 /* J */ UVSIZE,
395 0,
396 /* L */ SIZE32,
397 0,
398 /* N */ SIZE32,
399 0, 0,
400#if defined(HAS_QUAD)
401 /* Q */ sizeof(Uquad_t),
402#else
403 0,
404#endif
405 0,
406 /* S */ SIZE16,
407 0,
408 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
409 /* V */ SIZE32,
410 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
411 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
412 /* c */ sizeof(char),
413 /* d */ sizeof(double),
414 0,
415 /* f */ sizeof(float),
416 0, 0,
417 /* i */ sizeof(int),
418 /* j */ IVSIZE,
419 0,
420 /* l */ SIZE32,
421 0,
422 /* n */ SIZE16,
423 0,
424 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
425#if defined(HAS_QUAD)
426 /* q */ sizeof(Quad_t),
427#else
428 0,
429#endif
430 0,
431 /* s */ SIZE16,
432 0, 0,
433 /* v */ SIZE16,
434 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
435 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
436 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
437 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
438 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
439 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
440 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
441 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
442 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
443 0, 0, 0, 0, 0, 0, 0, 0,
444 /* shrieking */
445 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
446 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
447 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
448 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
449 0, 0, 0, 0, 0, 0, 0, 0, 0,
450 /* I */ sizeof(unsigned int),
451 0, 0,
452 /* L */ sizeof(unsigned long),
453 0,
454#if defined(PERL_PACK_CAN_SHRIEKSIGN)
455 /* N */ SIZE32,
456#else
457 0,
458#endif
459 0, 0, 0, 0,
460 /* S */ sizeof(unsigned short),
461 0, 0,
462#if defined(PERL_PACK_CAN_SHRIEKSIGN)
463 /* V */ SIZE32,
464#else
465 0,
466#endif
467 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
468 0, 0,
469 /* i */ sizeof(int),
470 0, 0,
471 /* l */ sizeof(long),
472 0,
473#if defined(PERL_PACK_CAN_SHRIEKSIGN)
474 /* n */ SIZE16,
475#else
476 0,
477#endif
478 0, 0, 0, 0,
479 /* s */ sizeof(short),
480 0, 0,
481#if defined(PERL_PACK_CAN_SHRIEKSIGN)
482 /* v */ SIZE16,
483#else
484 0,
485#endif
486 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
487 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
488 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
489 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
490 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
491 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
492 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
493 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
494 0, 0, 0, 0, 0, 0, 0, 0, 0
495};
496#else
497/* EBCDIC (or bust) */
498STATIC const packprops_t packprops[512] = {
499 /* normal */
500 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
501 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
502 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
503 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
504 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
505 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
506 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
507 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
508 0, 0, 0,
509 /* c */ sizeof(char),
510 /* d */ sizeof(double),
511 0,
512 /* f */ sizeof(float),
513 0, 0,
514 /* i */ sizeof(int),
515 0, 0, 0, 0, 0, 0, 0,
516 /* j */ IVSIZE,
517 0,
518 /* l */ SIZE32,
519 0,
520 /* n */ SIZE16,
521 0,
522 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
523#if defined(HAS_QUAD)
524 /* q */ sizeof(Quad_t),
525#else
526 0,
527#endif
528 0, 0, 0, 0, 0, 0, 0, 0, 0,
529 /* s */ SIZE16,
530 0, 0,
531 /* v */ SIZE16,
532 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
533 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
534 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
535 /* C */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
536#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
537 /* D */ LONG_DOUBLESIZE,
538#else
539 0,
540#endif
541 0,
542 /* F */ NVSIZE,
543 0, 0,
544 /* I */ sizeof(unsigned int),
545 0, 0, 0, 0, 0, 0, 0,
546 /* J */ UVSIZE,
547 0,
548 /* L */ SIZE32,
549 0,
550 /* N */ SIZE32,
551 0, 0,
552#if defined(HAS_QUAD)
553 /* Q */ sizeof(Uquad_t),
554#else
555 0,
556#endif
557 0, 0, 0, 0, 0, 0, 0, 0, 0,
558 /* S */ SIZE16,
559 0,
560 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
561 /* V */ SIZE32,
562 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
563 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
564 0, 0, 0, 0, 0, 0, 0, 0, 0,
565 /* shrieking */
566 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
567 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
568 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
569 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
570 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
571 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
572 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
573 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
574 0, 0, 0, 0, 0, 0, 0, 0, 0,
575 /* i */ sizeof(int),
576 0, 0, 0, 0, 0, 0, 0, 0, 0,
577 /* l */ sizeof(long),
578 0,
579#if defined(PERL_PACK_CAN_SHRIEKSIGN)
580 /* n */ SIZE16,
581#else
582 0,
583#endif
584 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
585 /* s */ sizeof(short),
586 0, 0,
587#if defined(PERL_PACK_CAN_SHRIEKSIGN)
588 /* v */ SIZE16,
589#else
590 0,
591#endif
592 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
593 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
594 0, 0, 0,
595 /* I */ sizeof(unsigned int),
596 0, 0, 0, 0, 0, 0, 0, 0, 0,
597 /* L */ sizeof(unsigned long),
598 0,
599#if defined(PERL_PACK_CAN_SHRIEKSIGN)
600 /* N */ SIZE32,
601#else
602 0,
603#endif
604 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
605 /* S */ sizeof(unsigned short),
606 0, 0,
607#if defined(PERL_PACK_CAN_SHRIEKSIGN)
608 /* V */ SIZE32,
609#else
610 0,
611#endif
612 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
613 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
614};
615#endif
616
617STATIC U8
618uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
619{
620 STRLEN retlen;
621 UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
622 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
623 /* We try to process malformed UTF-8 as much as possible (preferrably with
624 warnings), but these two mean we make no progress in the string and
625 might enter an infinite loop */
626 if (retlen == (STRLEN) -1 || retlen == 0)
627 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
628 (int) TYPE_NO_MODIFIERS(datumtype));
629 if (val >= 0x100) {
630 if (ckWARN(WARN_UNPACK))
631 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
632 "Character in '%c' format wrapped in unpack",
633 (int) TYPE_NO_MODIFIERS(datumtype));
634 val &= 0xff;
635 }
636 *s += retlen;
637 return (U8)val;
638}
639
640#define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
641 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
642 *(U8 *)(s)++)
643
644STATIC bool
645uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
646{
647 UV val;
648 STRLEN retlen;
649 const char *from = *s;
650 int bad = 0;
651 const U32 flags = ckWARN(WARN_UTF8) ?
652 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
653 for (;buf_len > 0; buf_len--) {
654 if (from >= end) return FALSE;
655 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
656 if (retlen == (STRLEN) -1 || retlen == 0) {
657 from += UTF8SKIP(from);
658 bad |= 1;
659 } else from += retlen;
660 if (val >= 0x100) {
661 bad |= 2;
662 val &= 0xff;
663 }
664 *(U8 *)buf++ = (U8)val;
665 }
666 /* We have enough characters for the buffer. Did we have problems ? */
667 if (bad) {
668 if (bad & 1) {
669 /* Rewalk the string fragment while warning */
670 const char *ptr;
671 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
672 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
673 if (ptr >= end) break;
674 utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
675 }
676 if (from > end) from = end;
677 }
678 if ((bad & 2) && ckWARN(WARN_UNPACK))
679 Perl_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
680 WARN_PACK : WARN_UNPACK),
681 "Character(s) in '%c' format wrapped in %s",
682 (int) TYPE_NO_MODIFIERS(datumtype),
683 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
684 }
685 *s = from;
686 return TRUE;
687}
688
689STATIC bool
690next_uni_uu(pTHX_ const char **s, const char *end, I32 *out)
691{
692 dVAR;
693 STRLEN retlen;
694 const UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
695 if (val >= 0x100 || !ISUUCHAR(val) ||
696 retlen == (STRLEN) -1 || retlen == 0) {
697 *out = 0;
698 return FALSE;
699 }
700 *out = PL_uudmap[val] & 077;
701 *s += retlen;
702 return TRUE;
703}
704
705STATIC char *
706S_bytes_to_uni(const U8 *start, STRLEN len, char *dest) {
707 const U8 * const end = start + len;
708
709 while (start < end) {
710 const UV uv = NATIVE_TO_ASCII(*start);
711 if (UNI_IS_INVARIANT(uv))
712 *dest++ = (char)(U8)UTF_TO_NATIVE(uv);
713 else {
714 *dest++ = (char)(U8)UTF8_EIGHT_BIT_HI(uv);
715 *dest++ = (char)(U8)UTF8_EIGHT_BIT_LO(uv);
716 }
717 start++;
718 }
719 return dest;
720}
721
722#define PUSH_BYTES(utf8, cur, buf, len) \
723STMT_START { \
724 if (utf8) \
725 (cur) = bytes_to_uni((U8 *) buf, len, (cur)); \
726 else { \
727 Copy(buf, cur, len, char); \
728 (cur) += (len); \
729 } \
730} STMT_END
731
732#define GROWING(utf8, cat, start, cur, in_len) \
733STMT_START { \
734 STRLEN glen = (in_len); \
735 if (utf8) glen *= UTF8_EXPAND; \
736 if ((cur) + glen >= (start) + SvLEN(cat)) { \
737 (start) = sv_exp_grow(cat, glen); \
738 (cur) = (start) + SvCUR(cat); \
739 } \
740} STMT_END
741
742#define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
743STMT_START { \
744 const STRLEN glen = (in_len); \
745 STRLEN gl = glen; \
746 if (utf8) gl *= UTF8_EXPAND; \
747 if ((cur) + gl >= (start) + SvLEN(cat)) { \
748 *cur = '\0'; \
749 SvCUR_set((cat), (cur) - (start)); \
750 (start) = sv_exp_grow(cat, gl); \
751 (cur) = (start) + SvCUR(cat); \
752 } \
753 PUSH_BYTES(utf8, cur, buf, glen); \
754} STMT_END
755
756#define PUSH_BYTE(utf8, s, byte) \
757STMT_START { \
758 if (utf8) { \
759 const U8 au8 = (byte); \
760 (s) = bytes_to_uni(&au8, 1, (s)); \
761 } else *(U8 *)(s)++ = (byte); \
762} STMT_END
763
764/* Only to be used inside a loop (see the break) */
765#define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
766STMT_START { \
767 STRLEN retlen; \
768 if (str >= end) break; \
769 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
770 if (retlen == (STRLEN) -1 || retlen == 0) { \
771 *cur = '\0'; \
772 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
773 } \
774 str += retlen; \
775} STMT_END
776
777static const char *_action( const tempsym_t* symptr )
778{
779 return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
780}
781
782/* Returns the sizeof() struct described by pat */
783STATIC I32
784S_measure_struct(pTHX_ tempsym_t* symptr)
785{
786 I32 total = 0;
787
788 while (next_symbol(symptr)) {
789 I32 len;
790 int size;
791
792 switch (symptr->howlen) {
793 case e_star:
794 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
795 _action( symptr ) );
796 break;
797 default:
798 /* e_no_len and e_number */
799 len = symptr->length;
800 break;
801 }
802
803 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
804 if (!size) {
805 int star;
806 /* endianness doesn't influence the size of a type */
807 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
808 default:
809 Perl_croak(aTHX_ "Invalid type '%c' in %s",
810 (int)TYPE_NO_MODIFIERS(symptr->code),
811 _action( symptr ) );
812#ifdef PERL_PACK_CAN_SHRIEKSIGN
813 case '.' | TYPE_IS_SHRIEKING:
814 case '@' | TYPE_IS_SHRIEKING:
815#endif
816 case '@':
817 case '.':
818 case '/':
819 case 'U': /* XXXX Is it correct? */
820 case 'w':
821 case 'u':
822 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
823 (int) TYPE_NO_MODIFIERS(symptr->code),
824 _action( symptr ) );
825 case '%':
826 size = 0;
827 break;
828 case '(':
829 {
830 tempsym_t savsym = *symptr;
831 symptr->patptr = savsym.grpbeg;
832 symptr->patend = savsym.grpend;
833 /* XXXX Theoretically, we need to measure many times at
834 different positions, since the subexpression may contain
835 alignment commands, but be not of aligned length.
836 Need to detect this and croak(). */
837 size = measure_struct(symptr);
838 *symptr = savsym;
839 break;
840 }
841 case 'X' | TYPE_IS_SHRIEKING:
842 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
843 */
844 if (!len) /* Avoid division by 0 */
845 len = 1;
846 len = total % len; /* Assumed: the start is aligned. */
847 /* FALL THROUGH */
848 case 'X':
849 size = -1;
850 if (total < len)
851 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
852 break;
853 case 'x' | TYPE_IS_SHRIEKING:
854 if (!len) /* Avoid division by 0 */
855 len = 1;
856 star = total % len; /* Assumed: the start is aligned. */
857 if (star) /* Other portable ways? */
858 len = len - star;
859 else
860 len = 0;
861 /* FALL THROUGH */
862 case 'x':
863 case 'A':
864 case 'Z':
865 case 'a':
866 size = 1;
867 break;
868 case 'B':
869 case 'b':
870 len = (len + 7)/8;
871 size = 1;
872 break;
873 case 'H':
874 case 'h':
875 len = (len + 1)/2;
876 size = 1;
877 break;
878
879 case 'P':
880 len = 1;
881 size = sizeof(char*);
882 break;
883 }
884 }
885 total += len * size;
886 }
887 return total;
888}
889
890
891/* locate matching closing parenthesis or bracket
892 * returns char pointer to char after match, or NULL
893 */
894STATIC const char *
895S_group_end(pTHX_ register const char *patptr, register const char *patend, char ender)
896{
897 while (patptr < patend) {
898 const char c = *patptr++;
899
900 if (isSPACE(c))
901 continue;
902 else if (c == ender)
903 return patptr-1;
904 else if (c == '#') {
905 while (patptr < patend && *patptr != '\n')
906 patptr++;
907 continue;
908 } else if (c == '(')
909 patptr = group_end(patptr, patend, ')') + 1;
910 else if (c == '[')
911 patptr = group_end(patptr, patend, ']') + 1;
912 }
913 Perl_croak(aTHX_ "No group ending character '%c' found in template",
914 ender);
915 return 0;
916}
917
918
919/* Convert unsigned decimal number to binary.
920 * Expects a pointer to the first digit and address of length variable
921 * Advances char pointer to 1st non-digit char and returns number
922 */
923STATIC const char *
924S_get_num(pTHX_ register const char *patptr, I32 *lenptr )
925{
926 I32 len = *patptr++ - '0';
927 while (isDIGIT(*patptr)) {
928 if (len >= 0x7FFFFFFF/10)
929 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
930 len = (len * 10) + (*patptr++ - '0');
931 }
932 *lenptr = len;
933 return patptr;
934}
935
936/* The marvellous template parsing routine: Using state stored in *symptr,
937 * locates next template code and count
938 */
939STATIC bool
940S_next_symbol(pTHX_ tempsym_t* symptr )
941{
942 const char* patptr = symptr->patptr;
943 const char* const patend = symptr->patend;
944
945 symptr->flags &= ~FLAG_SLASH;
946
947 while (patptr < patend) {
948 if (isSPACE(*patptr))
949 patptr++;
950 else if (*patptr == '#') {
951 patptr++;
952 while (patptr < patend && *patptr != '\n')
953 patptr++;
954 if (patptr < patend)
955 patptr++;
956 } else {
957 /* We should have found a template code */
958 I32 code = *patptr++ & 0xFF;
959 U32 inherited_modifiers = 0;
960
961 if (code == ','){ /* grandfather in commas but with a warning */
962 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
963 symptr->flags |= FLAG_COMMA;
964 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
965 "Invalid type ',' in %s", _action( symptr ) );
966 }
967 continue;
968 }
969
970 /* for '(', skip to ')' */
971 if (code == '(') {
972 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
973 Perl_croak(aTHX_ "()-group starts with a count in %s",
974 _action( symptr ) );
975 symptr->grpbeg = patptr;
976 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
977 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
978 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
979 _action( symptr ) );
980 }
981
982 /* look for group modifiers to inherit */
983 if (TYPE_ENDIANNESS(symptr->flags)) {
984 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
985 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
986 }
987
988 /* look for modifiers */
989 while (patptr < patend) {
990 const char *allowed;
991 I32 modifier;
992 switch (*patptr) {
993 case '!':
994 modifier = TYPE_IS_SHRIEKING;
995 allowed = SHRIEKING_ALLOWED_TYPES;
996 break;
997#ifdef PERL_PACK_CAN_BYTEORDER
998 case '>':
999 modifier = TYPE_IS_BIG_ENDIAN;
1000 allowed = ENDIANNESS_ALLOWED_TYPES;
1001 break;
1002 case '<':
1003 modifier = TYPE_IS_LITTLE_ENDIAN;
1004 allowed = ENDIANNESS_ALLOWED_TYPES;
1005 break;
1006#endif /* PERL_PACK_CAN_BYTEORDER */
1007 default:
1008 allowed = "";
1009 modifier = 0;
1010 break;
1011 }
1012
1013 if (modifier == 0)
1014 break;
1015
1016 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
1017 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
1018 allowed, _action( symptr ) );
1019
1020 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
1021 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
1022 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
1023 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
1024 TYPE_ENDIANNESS_MASK)
1025 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
1026 *patptr, _action( symptr ) );
1027
1028 if ((code & modifier) && ckWARN(WARN_UNPACK)) {
1029 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
1030 "Duplicate modifier '%c' after '%c' in %s",
1031 *patptr, (int) TYPE_NO_MODIFIERS(code),
1032 _action( symptr ) );
1033 }
1034
1035 code |= modifier;
1036 patptr++;
1037 }
1038
1039 /* inherit modifiers */
1040 code |= inherited_modifiers;
1041
1042 /* look for count and/or / */
1043 if (patptr < patend) {
1044 if (isDIGIT(*patptr)) {
1045 patptr = get_num( patptr, &symptr->length );
1046 symptr->howlen = e_number;
1047
1048 } else if (*patptr == '*') {
1049 patptr++;
1050 symptr->howlen = e_star;
1051
1052 } else if (*patptr == '[') {
1053 const char* lenptr = ++patptr;
1054 symptr->howlen = e_number;
1055 patptr = group_end( patptr, patend, ']' ) + 1;
1056 /* what kind of [] is it? */
1057 if (isDIGIT(*lenptr)) {
1058 lenptr = get_num( lenptr, &symptr->length );
1059 if( *lenptr != ']' )
1060 Perl_croak(aTHX_ "Malformed integer in [] in %s",
1061 _action( symptr ) );
1062 } else {
1063 tempsym_t savsym = *symptr;
1064 symptr->patend = patptr-1;
1065 symptr->patptr = lenptr;
1066 savsym.length = measure_struct(symptr);
1067 *symptr = savsym;
1068 }
1069 } else {
1070 symptr->howlen = e_no_len;
1071 symptr->length = 1;
1072 }
1073
1074 /* try to find / */
1075 while (patptr < patend) {
1076 if (isSPACE(*patptr))
1077 patptr++;
1078 else if (*patptr == '#') {
1079 patptr++;
1080 while (patptr < patend && *patptr != '\n')
1081 patptr++;
1082 if (patptr < patend)
1083 patptr++;
1084 } else {
1085 if (*patptr == '/') {
1086 symptr->flags |= FLAG_SLASH;
1087 patptr++;
1088 if (patptr < patend &&
1089 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
1090 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
1091 _action( symptr ) );
1092 }
1093 break;
1094 }
1095 }
1096 } else {
1097 /* at end - no count, no / */
1098 symptr->howlen = e_no_len;
1099 symptr->length = 1;
1100 }
1101
1102 symptr->code = code;
1103 symptr->patptr = patptr;
1104 return TRUE;
1105 }
1106 }
1107 symptr->patptr = patptr;
1108 return FALSE;
1109}
1110
1111/*
1112 There is no way to cleanly handle the case where we should process the
1113 string per byte in its upgraded form while it's really in downgraded form
1114 (e.g. estimates like strend-s as an upper bound for the number of
1115 characters left wouldn't work). So if we foresee the need of this
1116 (pattern starts with U or contains U0), we want to work on the encoded
1117 version of the string. Users are advised to upgrade their pack string
1118 themselves if they need to do a lot of unpacks like this on it
1119*/
1120STATIC bool
1121need_utf8(const char *pat, const char *patend)
1122{
1123 bool first = TRUE;
1124 while (pat < patend) {
1125 if (pat[0] == '#') {
1126 pat++;
1127 pat = (const char *) memchr(pat, '\n', patend-pat);
1128 if (!pat) return FALSE;
1129 } else if (pat[0] == 'U') {
1130 if (first || pat[1] == '0') return TRUE;
1131 } else first = FALSE;
1132 pat++;
1133 }
1134 return FALSE;
1135}
1136
1137STATIC char
1138first_symbol(const char *pat, const char *patend) {
1139 while (pat < patend) {
1140 if (pat[0] != '#') return pat[0];
1141 pat++;
1142 pat = (const char *) memchr(pat, '\n', patend-pat);
1143 if (!pat) return 0;
1144 pat++;
1145 }
1146 return 0;
1147}
1148
1149/*
1150=for apidoc unpackstring
1151
1152The engine implementing unpack() Perl function. C<unpackstring> puts the
1153extracted list items on the stack and returns the number of elements.
1154Issue C<PUTBACK> before and C<SPAGAIN> after the call to this function.
1155
1156=cut */
1157
1158I32
1159Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
1160{
1161 tempsym_t sym;
1162
1163 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
1164 else if (need_utf8(pat, patend)) {
1165 /* We probably should try to avoid this in case a scalar context call
1166 wouldn't get to the "U0" */
1167 STRLEN len = strend - s;
1168 s = (char *) bytes_to_utf8((U8 *) s, &len);
1169 SAVEFREEPV(s);
1170 strend = s + len;
1171 flags |= FLAG_DO_UTF8;
1172 }
1173
1174 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
1175 flags |= FLAG_PARSE_UTF8;
1176
1177 TEMPSYM_INIT(&sym, pat, patend, flags);
1178
1179 return unpack_rec(&sym, s, s, strend, NULL );
1180}
1181
1182STATIC
1183I32
1184S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
1185{
1186 dVAR; dSP;
1187 SV *sv;
1188 const I32 start_sp_offset = SP - PL_stack_base;
1189 howlen_t howlen;
1190
1191 I32 checksum = 0;
1192 UV cuv = 0;
1193 NV cdouble = 0.0;
1194 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
1195 bool beyond = FALSE;
1196 bool explicit_length;
1197 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
1198 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
1199 symptr->strbeg = s - strbeg;
1200
1201 while (next_symbol(symptr)) {
1202 packprops_t props;
1203 I32 len;
1204 I32 datumtype = symptr->code;
1205 /* do first one only unless in list context
1206 / is implemented by unpacking the count, then popping it from the
1207 stack, so must check that we're not in the middle of a / */
1208 if ( unpack_only_one
1209 && (SP - PL_stack_base == start_sp_offset + 1)
1210 && (datumtype != '/') ) /* XXX can this be omitted */
1211 break;
1212
1213 switch (howlen = symptr->howlen) {
1214 case e_star:
1215 len = strend - strbeg; /* long enough */
1216 break;
1217 default:
1218 /* e_no_len and e_number */
1219 len = symptr->length;
1220 break;
1221 }
1222
1223 explicit_length = TRUE;
1224 redo_switch:
1225 beyond = s >= strend;
1226
1227 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
1228 if (props) {
1229 /* props nonzero means we can process this letter. */
1230 const long size = props & PACK_SIZE_MASK;
1231 const long howmany = (strend - s) / size;
1232 if (len > howmany)
1233 len = howmany;
1234
1235 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
1236 if (len && unpack_only_one) len = 1;
1237 EXTEND(SP, len);
1238 EXTEND_MORTAL(len);
1239 }
1240 }
1241
1242 switch(TYPE_NO_ENDIANNESS(datumtype)) {
1243 default:
1244 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
1245
1246 case '%':
1247 if (howlen == e_no_len)
1248 len = 16; /* len is not specified */
1249 checksum = len;
1250 cuv = 0;
1251 cdouble = 0;
1252 continue;
1253 break;
1254 case '(':
1255 {
1256 tempsym_t savsym = *symptr;
1257 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
1258 symptr->flags |= group_modifiers;
1259 symptr->patend = savsym.grpend;
1260 symptr->previous = &savsym;
1261 symptr->level++;
1262 PUTBACK;
1263 while (len--) {
1264 symptr->patptr = savsym.grpbeg;
1265 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
1266 else symptr->flags &= ~FLAG_PARSE_UTF8;
1267 unpack_rec(symptr, s, strbeg, strend, &s);
1268 if (s == strend && savsym.howlen == e_star)
1269 break; /* No way to continue */
1270 }
1271 SPAGAIN;
1272 savsym.flags = symptr->flags & ~group_modifiers;
1273 *symptr = savsym;
1274 break;
1275 }
1276#ifdef PERL_PACK_CAN_SHRIEKSIGN
1277 case '.' | TYPE_IS_SHRIEKING:
1278#endif
1279 case '.': {
1280 const char *from;
1281 SV *sv;
1282#ifdef PERL_PACK_CAN_SHRIEKSIGN
1283 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
1284#else /* PERL_PACK_CAN_SHRIEKSIGN */
1285 const bool u8 = utf8;
1286#endif
1287 if (howlen == e_star) from = strbeg;
1288 else if (len <= 0) from = s;
1289 else {
1290 tempsym_t *group = symptr;
1291
1292 while (--len && group) group = group->previous;
1293 from = group ? strbeg + group->strbeg : strbeg;
1294 }
1295 sv = from <= s ?
1296 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
1297 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
1298 XPUSHs(sv_2mortal(sv));
1299 break;
1300 }
1301#ifdef PERL_PACK_CAN_SHRIEKSIGN
1302 case '@' | TYPE_IS_SHRIEKING:
1303#endif
1304 case '@':
1305 s = strbeg + symptr->strbeg;
1306#ifdef PERL_PACK_CAN_SHRIEKSIGN
1307 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
1308#else /* PERL_PACK_CAN_SHRIEKSIGN */
1309 if (utf8)
1310#endif
1311 {
1312 while (len > 0) {
1313 if (s >= strend)
1314 Perl_croak(aTHX_ "'@' outside of string in unpack");
1315 s += UTF8SKIP(s);
1316 len--;
1317 }
1318 if (s > strend)
1319 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
1320 } else {
1321 if (strend-s < len)
1322 Perl_croak(aTHX_ "'@' outside of string in unpack");
1323 s += len;
1324 }
1325 break;
1326 case 'X' | TYPE_IS_SHRIEKING:
1327 if (!len) /* Avoid division by 0 */
1328 len = 1;
1329 if (utf8) {
1330 const char *hop, *last;
1331 I32 l = len;
1332 hop = last = strbeg;
1333 while (hop < s) {
1334 hop += UTF8SKIP(hop);
1335 if (--l == 0) {
1336 last = hop;
1337 l = len;
1338 }
1339 }
1340 if (last > s)
1341 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1342 s = last;
1343 break;
1344 }
1345 len = (s - strbeg) % len;
1346 /* FALL THROUGH */
1347 case 'X':
1348 if (utf8) {
1349 while (len > 0) {
1350 if (s <= strbeg)
1351 Perl_croak(aTHX_ "'X' outside of string in unpack");
1352 while (--s, UTF8_IS_CONTINUATION(*s)) {
1353 if (s <= strbeg)
1354 Perl_croak(aTHX_ "'X' outside of string in unpack");
1355 }
1356 len--;
1357 }
1358 } else {
1359 if (len > s - strbeg)
1360 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1361 s -= len;
1362 }
1363 break;
1364 case 'x' | TYPE_IS_SHRIEKING: {
1365 I32 ai32;
1366 if (!len) /* Avoid division by 0 */
1367 len = 1;
1368 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1369 else ai32 = (s - strbeg) % len;
1370 if (ai32 == 0) break;
1371 len -= ai32;
1372 }
1373 /* FALL THROUGH */
1374 case 'x':
1375 if (utf8) {
1376 while (len>0) {
1377 if (s >= strend)
1378 Perl_croak(aTHX_ "'x' outside of string in unpack");
1379 s += UTF8SKIP(s);
1380 len--;
1381 }
1382 } else {
1383 if (len > strend - s)
1384 Perl_croak(aTHX_ "'x' outside of string in unpack");
1385 s += len;
1386 }
1387 break;
1388 case '/':
1389 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1390 break;
1391 case 'A':
1392 case 'Z':
1393 case 'a':
1394 if (checksum) {
1395 /* Preliminary length estimate is assumed done in 'W' */
1396 if (len > strend - s) len = strend - s;
1397 goto W_checksum;
1398 }
1399 if (utf8) {
1400 I32 l;
1401 const char *hop;
1402 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1403 if (hop >= strend) {
1404 if (hop > strend)
1405 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1406 break;
1407 }
1408 }
1409 if (hop > strend)
1410 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1411 len = hop - s;
1412 } else if (len > strend - s)
1413 len = strend - s;
1414
1415 if (datumtype == 'Z') {
1416 /* 'Z' strips stuff after first null */
1417 const char *ptr, *end;
1418 end = s + len;
1419 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
1420 sv = newSVpvn(s, ptr-s);
1421 if (howlen == e_star) /* exact for 'Z*' */
1422 len = ptr-s + (ptr != strend ? 1 : 0);
1423 } else if (datumtype == 'A') {
1424 /* 'A' strips both nulls and spaces */
1425 const char *ptr;
1426 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1427 for (ptr = s+len-1; ptr >= s; ptr--)
1428 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
1429 !is_utf8_space((U8 *) ptr)) break;
1430 if (ptr >= s) ptr += UTF8SKIP(ptr);
1431 else ptr++;
1432 if (ptr > s+len)
1433 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1434 } else {
1435 for (ptr = s+len-1; ptr >= s; ptr--)
1436 if (*ptr != 0 && !isSPACE(*ptr)) break;
1437 ptr++;
1438 }
1439 sv = newSVpvn(s, ptr-s);
1440 } else sv = newSVpvn(s, len);
1441
1442 if (utf8) {
1443 SvUTF8_on(sv);
1444 /* Undo any upgrade done due to need_utf8() */
1445 if (!(symptr->flags & FLAG_WAS_UTF8))
1446 sv_utf8_downgrade(sv, 0);
1447 }
1448 XPUSHs(sv_2mortal(sv));
1449 s += len;
1450 break;
1451 case 'B':
1452 case 'b': {
1453 char *str;
1454 if (howlen == e_star || len > (strend - s) * 8)
1455 len = (strend - s) * 8;
1456 if (checksum) {
1457 if (!PL_bitcount) {
1458 int bits;
1459 Newxz(PL_bitcount, 256, char);
1460 for (bits = 1; bits < 256; bits++) {
1461 if (bits & 1) PL_bitcount[bits]++;
1462 if (bits & 2) PL_bitcount[bits]++;
1463 if (bits & 4) PL_bitcount[bits]++;
1464 if (bits & 8) PL_bitcount[bits]++;
1465 if (bits & 16) PL_bitcount[bits]++;
1466 if (bits & 32) PL_bitcount[bits]++;
1467 if (bits & 64) PL_bitcount[bits]++;
1468 if (bits & 128) PL_bitcount[bits]++;
1469 }
1470 }
1471 if (utf8)
1472 while (len >= 8 && s < strend) {
1473 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
1474 len -= 8;
1475 }
1476 else
1477 while (len >= 8) {
1478 cuv += PL_bitcount[*(U8 *)s++];
1479 len -= 8;
1480 }
1481 if (len && s < strend) {
1482 U8 bits;
1483 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1484 if (datumtype == 'b')
1485 while (len-- > 0) {
1486 if (bits & 1) cuv++;
1487 bits >>= 1;
1488 }
1489 else
1490 while (len-- > 0) {
1491 if (bits & 0x80) cuv++;
1492 bits <<= 1;
1493 }
1494 }
1495 break;
1496 }
1497
1498 sv = sv_2mortal(newSV(len ? len : 1));
1499 SvPOK_on(sv);
1500 str = SvPVX(sv);
1501 if (datumtype == 'b') {
1502 U8 bits = 0;
1503 const I32 ai32 = len;
1504 for (len = 0; len < ai32; len++) {
1505 if (len & 7) bits >>= 1;
1506 else if (utf8) {
1507 if (s >= strend) break;
1508 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1509 } else bits = *(U8 *) s++;
1510 *str++ = bits & 1 ? '1' : '0';
1511 }
1512 } else {
1513 U8 bits = 0;
1514 const I32 ai32 = len;
1515 for (len = 0; len < ai32; len++) {
1516 if (len & 7) bits <<= 1;
1517 else if (utf8) {
1518 if (s >= strend) break;
1519 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1520 } else bits = *(U8 *) s++;
1521 *str++ = bits & 0x80 ? '1' : '0';
1522 }
1523 }
1524 *str = '\0';
1525 SvCUR_set(sv, str - SvPVX_const(sv));
1526 XPUSHs(sv);
1527 break;
1528 }
1529 case 'H':
1530 case 'h': {
1531 char *str;
1532 /* Preliminary length estimate, acceptable for utf8 too */
1533 if (howlen == e_star || len > (strend - s) * 2)
1534 len = (strend - s) * 2;
1535 sv = sv_2mortal(newSV(len ? len : 1));
1536 SvPOK_on(sv);
1537 str = SvPVX(sv);
1538 if (datumtype == 'h') {
1539 U8 bits = 0;
1540 I32 ai32 = len;
1541 for (len = 0; len < ai32; len++) {
1542 if (len & 1) bits >>= 4;
1543 else if (utf8) {
1544 if (s >= strend) break;
1545 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1546 } else bits = * (U8 *) s++;
1547 *str++ = PL_hexdigit[bits & 15];
1548 }
1549 } else {
1550 U8 bits = 0;
1551 const I32 ai32 = len;
1552 for (len = 0; len < ai32; len++) {
1553 if (len & 1) bits <<= 4;
1554 else if (utf8) {
1555 if (s >= strend) break;
1556 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1557 } else bits = *(U8 *) s++;
1558 *str++ = PL_hexdigit[(bits >> 4) & 15];
1559 }
1560 }
1561 *str = '\0';
1562 SvCUR_set(sv, str - SvPVX_const(sv));
1563 XPUSHs(sv);
1564 break;
1565 }
1566 case 'c':
1567 while (len-- > 0) {
1568 int aint = SHIFT_BYTE(utf8, s, strend, datumtype);
1569 if (aint >= 128) /* fake up signed chars */
1570 aint -= 256;
1571 if (!checksum)
1572 PUSHs(sv_2mortal(newSViv((IV)aint)));
1573 else if (checksum > bits_in_uv)
1574 cdouble += (NV)aint;
1575 else
1576 cuv += aint;
1577 }
1578 break;
1579 case 'C':
1580 case 'W':
1581 W_checksum:
1582 if (len == 0) {
1583 if (explicit_length && datumtype == 'C')
1584 /* Switch to "character" mode */
1585 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1586 break;
1587 }
1588 if (datumtype == 'C' ?
1589 (symptr->flags & FLAG_DO_UTF8) &&
1590 !(symptr->flags & FLAG_WAS_UTF8) : utf8) {
1591 while (len-- > 0 && s < strend) {
1592 STRLEN retlen;
1593 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1594 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1595 if (retlen == (STRLEN) -1 || retlen == 0)
1596 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1597 s += retlen;
1598 if (!checksum)
1599 PUSHs(sv_2mortal(newSVuv((UV) val)));
1600 else if (checksum > bits_in_uv)
1601 cdouble += (NV) val;
1602 else
1603 cuv += val;
1604 }
1605 } else if (!checksum)
1606 while (len-- > 0) {
1607 const U8 ch = *(U8 *) s++;
1608 PUSHs(sv_2mortal(newSVuv((UV) ch)));
1609 }
1610 else if (checksum > bits_in_uv)
1611 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1612 else
1613 while (len-- > 0) cuv += *(U8 *) s++;
1614 break;
1615 case 'U':
1616 if (len == 0) {
1617 if (explicit_length) {
1618 /* Switch to "bytes in UTF-8" mode */
1619 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
1620 else
1621 /* Should be impossible due to the need_utf8() test */
1622 Perl_croak(aTHX_ "U0 mode on a byte string");
1623 }
1624 break;
1625 }
1626 if (len > strend - s) len = strend - s;
1627 if (!checksum) {
1628 if (len && unpack_only_one) len = 1;
1629 EXTEND(SP, len);
1630 EXTEND_MORTAL(len);
1631 }
1632 while (len-- > 0 && s < strend) {
1633 STRLEN retlen;
1634 UV auv;
1635 if (utf8) {
1636 U8 result[UTF8_MAXLEN];
1637 const char *ptr = s;
1638 STRLEN len;
1639 /* Bug: warns about bad utf8 even if we are short on bytes
1640 and will break out of the loop */
1641 if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1642 'U'))
1643 break;
1644 len = UTF8SKIP(result);
1645 if (!uni_to_bytes(aTHX_ &ptr, strend,
1646 (char *) &result[1], len-1, 'U')) break;
1647 auv = utf8n_to_uvuni(result, len, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1648 s = ptr;
1649 } else {
1650 auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1651 if (retlen == (STRLEN) -1 || retlen == 0)
1652 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1653 s += retlen;
1654 }
1655 if (!checksum)
1656 PUSHs(sv_2mortal(newSVuv((UV) auv)));
1657 else if (checksum > bits_in_uv)
1658 cdouble += (NV) auv;
1659 else
1660 cuv += auv;
1661 }
1662 break;
1663 case 's' | TYPE_IS_SHRIEKING:
1664#if SHORTSIZE != SIZE16
1665 while (len-- > 0) {
1666 short ashort;
1667 SHIFT_VAR(utf8, s, strend, ashort, datumtype);
1668 DO_BO_UNPACK(ashort, s);
1669 if (!checksum)
1670 PUSHs(sv_2mortal(newSViv((IV)ashort)));
1671 else if (checksum > bits_in_uv)
1672 cdouble += (NV)ashort;
1673 else
1674 cuv += ashort;
1675 }
1676 break;
1677#else
1678 /* Fallthrough! */
1679#endif
1680 case 's':
1681 while (len-- > 0) {
1682 I16 ai16;
1683
1684#if U16SIZE > SIZE16
1685 ai16 = 0;
1686#endif
1687 SHIFT16(utf8, s, strend, &ai16, datumtype);
1688 DO_BO_UNPACK(ai16, 16);
1689#if U16SIZE > SIZE16
1690 if (ai16 > 32767)
1691 ai16 -= 65536;
1692#endif
1693 if (!checksum)
1694 PUSHs(sv_2mortal(newSViv((IV)ai16)));
1695 else if (checksum > bits_in_uv)
1696 cdouble += (NV)ai16;
1697 else
1698 cuv += ai16;
1699 }
1700 break;
1701 case 'S' | TYPE_IS_SHRIEKING:
1702#if SHORTSIZE != SIZE16
1703 while (len-- > 0) {
1704 unsigned short aushort;
1705 SHIFT_VAR(utf8, s, strend, aushort, datumtype);
1706 DO_BO_UNPACK(aushort, s);
1707 if (!checksum)
1708 PUSHs(sv_2mortal(newSVuv((UV) aushort)));
1709 else if (checksum > bits_in_uv)
1710 cdouble += (NV)aushort;
1711 else
1712 cuv += aushort;
1713 }
1714 break;
1715#else
1716 /* Fallhrough! */
1717#endif
1718 case 'v':
1719 case 'n':
1720 case 'S':
1721 while (len-- > 0) {
1722 U16 au16;
1723#if U16SIZE > SIZE16
1724 au16 = 0;
1725#endif
1726 SHIFT16(utf8, s, strend, &au16, datumtype);
1727 DO_BO_UNPACK(au16, 16);
1728#ifdef HAS_NTOHS
1729 if (datumtype == 'n')
1730 au16 = PerlSock_ntohs(au16);
1731#endif
1732#ifdef HAS_VTOHS
1733 if (datumtype == 'v')
1734 au16 = vtohs(au16);
1735#endif
1736 if (!checksum)
1737 PUSHs(sv_2mortal(newSVuv((UV)au16)));
1738 else if (checksum > bits_in_uv)
1739 cdouble += (NV) au16;
1740 else
1741 cuv += au16;
1742 }
1743 break;
1744#ifdef PERL_PACK_CAN_SHRIEKSIGN
1745 case 'v' | TYPE_IS_SHRIEKING:
1746 case 'n' | TYPE_IS_SHRIEKING:
1747 while (len-- > 0) {
1748 I16 ai16;
1749# if U16SIZE > SIZE16
1750 ai16 = 0;
1751# endif
1752 SHIFT16(utf8, s, strend, &ai16, datumtype);
1753# ifdef HAS_NTOHS
1754 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
1755 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1756# endif /* HAS_NTOHS */
1757# ifdef HAS_VTOHS
1758 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
1759 ai16 = (I16) vtohs((U16) ai16);
1760# endif /* HAS_VTOHS */
1761 if (!checksum)
1762 PUSHs(sv_2mortal(newSViv((IV)ai16)));
1763 else if (checksum > bits_in_uv)
1764 cdouble += (NV) ai16;
1765 else
1766 cuv += ai16;
1767 }
1768 break;
1769#endif /* PERL_PACK_CAN_SHRIEKSIGN */
1770 case 'i':
1771 case 'i' | TYPE_IS_SHRIEKING:
1772 while (len-- > 0) {
1773 int aint;
1774 SHIFT_VAR(utf8, s, strend, aint, datumtype);
1775 DO_BO_UNPACK(aint, i);
1776 if (!checksum)
1777 PUSHs(sv_2mortal(newSViv((IV)aint)));
1778 else if (checksum > bits_in_uv)
1779 cdouble += (NV)aint;
1780 else
1781 cuv += aint;
1782 }
1783 break;
1784 case 'I':
1785 case 'I' | TYPE_IS_SHRIEKING:
1786 while (len-- > 0) {
1787 unsigned int auint;
1788 SHIFT_VAR(utf8, s, strend, auint, datumtype);
1789 DO_BO_UNPACK(auint, i);
1790 if (!checksum)
1791 PUSHs(sv_2mortal(newSVuv((UV)auint)));
1792 else if (checksum > bits_in_uv)
1793 cdouble += (NV)auint;
1794 else
1795 cuv += auint;
1796 }
1797 break;
1798 case 'j':
1799 while (len-- > 0) {
1800 IV aiv;
1801 SHIFT_VAR(utf8, s, strend, aiv, datumtype);
1802#if IVSIZE == INTSIZE
1803 DO_BO_UNPACK(aiv, i);
1804#elif IVSIZE == LONGSIZE
1805 DO_BO_UNPACK(aiv, l);
1806#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
1807 DO_BO_UNPACK(aiv, 64);
1808#else
1809 Perl_croak(aTHX_ "'j' not supported on this platform");
1810#endif
1811 if (!checksum)
1812 PUSHs(sv_2mortal(newSViv(aiv)));
1813 else if (checksum > bits_in_uv)
1814 cdouble += (NV)aiv;
1815 else
1816 cuv += aiv;
1817 }
1818 break;
1819 case 'J':
1820 while (len-- > 0) {
1821 UV auv;
1822 SHIFT_VAR(utf8, s, strend, auv, datumtype);
1823#if IVSIZE == INTSIZE
1824 DO_BO_UNPACK(auv, i);
1825#elif IVSIZE == LONGSIZE
1826 DO_BO_UNPACK(auv, l);
1827#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
1828 DO_BO_UNPACK(auv, 64);
1829#else
1830 Perl_croak(aTHX_ "'J' not supported on this platform");
1831#endif
1832 if (!checksum)
1833 PUSHs(sv_2mortal(newSVuv(auv)));
1834 else if (checksum > bits_in_uv)
1835 cdouble += (NV)auv;
1836 else
1837 cuv += auv;
1838 }
1839 break;
1840 case 'l' | TYPE_IS_SHRIEKING:
1841#if LONGSIZE != SIZE32
1842 while (len-- > 0) {
1843 long along;
1844 SHIFT_VAR(utf8, s, strend, along, datumtype);
1845 DO_BO_UNPACK(along, l);
1846 if (!checksum)
1847 PUSHs(sv_2mortal(newSViv((IV)along)));
1848 else if (checksum > bits_in_uv)
1849 cdouble += (NV)along;
1850 else
1851 cuv += along;
1852 }
1853 break;
1854#else
1855 /* Fallthrough! */
1856#endif
1857 case 'l':
1858 while (len-- > 0) {
1859 I32 ai32;
1860#if U32SIZE > SIZE32
1861 ai32 = 0;
1862#endif
1863 SHIFT32(utf8, s, strend, &ai32, datumtype);
1864 DO_BO_UNPACK(ai32, 32);
1865#if U32SIZE > SIZE32
1866 if (ai32 > 2147483647) ai32 -= 4294967296;
1867#endif
1868 if (!checksum)
1869 PUSHs(sv_2mortal(newSViv((IV)ai32)));
1870 else if (checksum > bits_in_uv)
1871 cdouble += (NV)ai32;
1872 else
1873 cuv += ai32;
1874 }
1875 break;
1876 case 'L' | TYPE_IS_SHRIEKING:
1877#if LONGSIZE != SIZE32
1878 while (len-- > 0) {
1879 unsigned long aulong;
1880 SHIFT_VAR(utf8, s, strend, aulong, datumtype);
1881 DO_BO_UNPACK(aulong, l);
1882 if (!checksum)
1883 PUSHs(sv_2mortal(newSVuv((UV)aulong)));
1884 else if (checksum > bits_in_uv)
1885 cdouble += (NV)aulong;
1886 else
1887 cuv += aulong;
1888 }
1889 break;
1890#else
1891 /* Fall through! */
1892#endif
1893 case 'V':
1894 case 'N':
1895 case 'L':
1896 while (len-- > 0) {
1897 U32 au32;
1898#if U32SIZE > SIZE32
1899 au32 = 0;
1900#endif
1901 SHIFT32(utf8, s, strend, &au32, datumtype);
1902 DO_BO_UNPACK(au32, 32);
1903#ifdef HAS_NTOHL
1904 if (datumtype == 'N')
1905 au32 = PerlSock_ntohl(au32);
1906#endif
1907#ifdef HAS_VTOHL
1908 if (datumtype == 'V')
1909 au32 = vtohl(au32);
1910#endif
1911 if (!checksum)
1912 PUSHs(sv_2mortal(newSVuv((UV)au32)));
1913 else if (checksum > bits_in_uv)
1914 cdouble += (NV)au32;
1915 else
1916 cuv += au32;
1917 }
1918 break;
1919#ifdef PERL_PACK_CAN_SHRIEKSIGN
1920 case 'V' | TYPE_IS_SHRIEKING:
1921 case 'N' | TYPE_IS_SHRIEKING:
1922 while (len-- > 0) {
1923 I32 ai32;
1924# if U32SIZE > SIZE32
1925 ai32 = 0;
1926# endif
1927 SHIFT32(utf8, s, strend, &ai32, datumtype);
1928# ifdef HAS_NTOHL
1929 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1930 ai32 = (I32)PerlSock_ntohl((U32)ai32);
1931# endif
1932# ifdef HAS_VTOHL
1933 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1934 ai32 = (I32)vtohl((U32)ai32);
1935# endif
1936 if (!checksum)
1937 PUSHs(sv_2mortal(newSViv((IV)ai32)));
1938 else if (checksum > bits_in_uv)
1939 cdouble += (NV)ai32;
1940 else
1941 cuv += ai32;
1942 }
1943 break;
1944#endif /* PERL_PACK_CAN_SHRIEKSIGN */
1945 case 'p':
1946 while (len-- > 0) {
1947 const char *aptr;
1948 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
1949 DO_BO_UNPACK_PC(aptr);
1950 /* newSVpv generates undef if aptr is NULL */
1951 PUSHs(sv_2mortal(newSVpv(aptr, 0)));
1952 }
1953 break;
1954 case 'w':
1955 {
1956 UV auv = 0;
1957 U32 bytes = 0;
1958
1959 while (len > 0 && s < strend) {
1960 U8 ch;
1961 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1962 auv = (auv << 7) | (ch & 0x7f);
1963 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
1964 if (ch < 0x80) {
1965 bytes = 0;
1966 PUSHs(sv_2mortal(newSVuv(auv)));
1967 len--;
1968 auv = 0;
1969 continue;
1970 }
1971 if (++bytes >= sizeof(UV)) { /* promote to string */
1972 const char *t;
1973
1974 sv = Perl_newSVpvf(aTHX_ "%.*"UVuf, (int)TYPE_DIGITS(UV), auv);
1975 while (s < strend) {
1976 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1977 sv = mul128(sv, (U8)(ch & 0x7f));
1978 if (!(ch & 0x80)) {
1979 bytes = 0;
1980 break;
1981 }
1982 }
1983 t = SvPV_nolen_const(sv);
1984 while (*t == '0')
1985 t++;
1986 sv_chop(sv, t);
1987 PUSHs(sv_2mortal(sv));
1988 len--;
1989 auv = 0;
1990 }
1991 }
1992 if ((s >= strend) && bytes)
1993 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
1994 }
1995 break;
1996 case 'P':
1997 if (symptr->howlen == e_star)
1998 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
1999 EXTEND(SP, 1);
2000 if (s + sizeof(char*) <= strend) {
2001 char *aptr;
2002 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
2003 DO_BO_UNPACK_PC(aptr);
2004 /* newSVpvn generates undef if aptr is NULL */
2005 PUSHs(sv_2mortal(newSVpvn(aptr, len)));
2006 }
2007 break;
2008#ifdef HAS_QUAD
2009 case 'q':
2010 while (len-- > 0) {
2011 Quad_t aquad;
2012 SHIFT_VAR(utf8, s, strend, aquad, datumtype);
2013 DO_BO_UNPACK(aquad, 64);
2014 if (!checksum)
2015 PUSHs(sv_2mortal(aquad >= IV_MIN && aquad <= IV_MAX ?
2016 newSViv((IV)aquad) : newSVnv((NV)aquad)));
2017 else if (checksum > bits_in_uv)
2018 cdouble += (NV)aquad;
2019 else
2020 cuv += aquad;
2021 }
2022 break;
2023 case 'Q':
2024 while (len-- > 0) {
2025 Uquad_t auquad;
2026 SHIFT_VAR(utf8, s, strend, auquad, datumtype);
2027 DO_BO_UNPACK(auquad, 64);
2028 if (!checksum)
2029 PUSHs(sv_2mortal(auquad <= UV_MAX ?
2030 newSVuv((UV)auquad):newSVnv((NV)auquad)));
2031 else if (checksum > bits_in_uv)
2032 cdouble += (NV)auquad;
2033 else
2034 cuv += auquad;
2035 }
2036 break;
2037#endif /* HAS_QUAD */
2038 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
2039 case 'f':
2040 while (len-- > 0) {
2041 float afloat;
2042 SHIFT_VAR(utf8, s, strend, afloat, datumtype);
2043 DO_BO_UNPACK_N(afloat, float);
2044 if (!checksum)
2045 PUSHs(sv_2mortal(newSVnv((NV)afloat)));
2046 else
2047 cdouble += afloat;
2048 }
2049 break;
2050 case 'd':
2051 while (len-- > 0) {
2052 double adouble;
2053 SHIFT_VAR(utf8, s, strend, adouble, datumtype);
2054 DO_BO_UNPACK_N(adouble, double);
2055 if (!checksum)
2056 PUSHs(sv_2mortal(newSVnv((NV)adouble)));
2057 else
2058 cdouble += adouble;
2059 }
2060 break;
2061 case 'F':
2062 while (len-- > 0) {
2063 NV anv;
2064 SHIFT_VAR(utf8, s, strend, anv, datumtype);
2065 DO_BO_UNPACK_N(anv, NV);
2066 if (!checksum)
2067 PUSHs(sv_2mortal(newSVnv(anv)));
2068 else
2069 cdouble += anv;
2070 }
2071 break;
2072#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2073 case 'D':
2074 while (len-- > 0) {
2075 long double aldouble;
2076 SHIFT_VAR(utf8, s, strend, aldouble, datumtype);
2077 DO_BO_UNPACK_N(aldouble, long double);
2078 if (!checksum)
2079 PUSHs(sv_2mortal(newSVnv((NV)aldouble)));
2080 else
2081 cdouble += aldouble;
2082 }
2083 break;
2084#endif
2085 case 'u':
2086 /* MKS:
2087 * Initialise the decode mapping. By using a table driven
2088 * algorithm, the code will be character-set independent
2089 * (and just as fast as doing character arithmetic)
2090 */
2091 if (PL_uudmap[(U8)'M'] == 0) {
2092 size_t i;
2093
2094 for (i = 0; i < sizeof(PL_uuemap); ++i)
2095 PL_uudmap[(U8)PL_uuemap[i]] = i;
2096 /*
2097 * Because ' ' and '`' map to the same value,
2098 * we need to decode them both the same.
2099 */
2100 PL_uudmap[(U8)' '] = 0;
2101 }
2102 {
2103 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
2104 sv = sv_2mortal(newSV(l));
2105 if (l) SvPOK_on(sv);
2106 }
2107 if (utf8) {
2108 while (next_uni_uu(aTHX_ &s, strend, &len)) {
2109 I32 a, b, c, d;
2110 char hunk[4];
2111
2112 hunk[3] = '\0';
2113 while (len > 0) {
2114 next_uni_uu(aTHX_ &s, strend, &a);
2115 next_uni_uu(aTHX_ &s, strend, &b);
2116 next_uni_uu(aTHX_ &s, strend, &c);
2117 next_uni_uu(aTHX_ &s, strend, &d);
2118 hunk[0] = (char)((a << 2) | (b >> 4));
2119 hunk[1] = (char)((b << 4) | (c >> 2));
2120 hunk[2] = (char)((c << 6) | d);
2121 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2122 len -= 3;
2123 }
2124 if (s < strend) {
2125 if (*s == '\n') {
2126 s++;
2127 }
2128 else {
2129 /* possible checksum byte */
2130 const char *skip = s+UTF8SKIP(s);
2131 if (skip < strend && *skip == '\n')
2132 s = skip+1;
2133 }
2134 }
2135 }
2136 } else {
2137 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
2138 I32 a, b, c, d;
2139 char hunk[4];
2140
2141 hunk[3] = '\0';
2142 len = PL_uudmap[*(U8*)s++] & 077;
2143 while (len > 0) {
2144 if (s < strend && ISUUCHAR(*s))
2145 a = PL_uudmap[*(U8*)s++] & 077;
2146 else
2147 a = 0;
2148 if (s < strend && ISUUCHAR(*s))
2149 b = PL_uudmap[*(U8*)s++] & 077;
2150 else
2151 b = 0;
2152 if (s < strend && ISUUCHAR(*s))
2153 c = PL_uudmap[*(U8*)s++] & 077;
2154 else
2155 c = 0;
2156 if (s < strend && ISUUCHAR(*s))
2157 d = PL_uudmap[*(U8*)s++] & 077;
2158 else
2159 d = 0;
2160 hunk[0] = (char)((a << 2) | (b >> 4));
2161 hunk[1] = (char)((b << 4) | (c >> 2));
2162 hunk[2] = (char)((c << 6) | d);
2163 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2164 len -= 3;
2165 }
2166 if (*s == '\n')
2167 s++;
2168 else /* possible checksum byte */
2169 if (s + 1 < strend && s[1] == '\n')
2170 s += 2;
2171 }
2172 }
2173 XPUSHs(sv);
2174 break;
2175 }
2176
2177 if (checksum) {
2178 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
2179 (checksum > bits_in_uv &&
2180 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
2181 NV trouble, anv;
2182
2183 anv = (NV) (1 << (checksum & 15));
2184 while (checksum >= 16) {
2185 checksum -= 16;
2186 anv *= 65536.0;
2187 }
2188 while (cdouble < 0.0)
2189 cdouble += anv;
2190 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
2191 sv = newSVnv(cdouble);
2192 }
2193 else {
2194 if (checksum < bits_in_uv) {
2195 UV mask = ((UV)1 << checksum) - 1;
2196 cuv &= mask;
2197 }
2198 sv = newSVuv(cuv);
2199 }
2200 XPUSHs(sv_2mortal(sv));
2201 checksum = 0;
2202 }
2203
2204 if (symptr->flags & FLAG_SLASH){
2205 if (SP - PL_stack_base - start_sp_offset <= 0)
2206 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
2207 if( next_symbol(symptr) ){
2208 if( symptr->howlen == e_number )
2209 Perl_croak(aTHX_ "Count after length/code in unpack" );
2210 if( beyond ){
2211 /* ...end of char buffer then no decent length available */
2212 Perl_croak(aTHX_ "length/code after end of string in unpack" );
2213 } else {
2214 /* take top of stack (hope it's numeric) */
2215 len = POPi;
2216 if( len < 0 )
2217 Perl_croak(aTHX_ "Negative '/' count in unpack" );
2218 }
2219 } else {
2220 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
2221 }
2222 datumtype = symptr->code;
2223 explicit_length = FALSE;
2224 goto redo_switch;
2225 }
2226 }
2227
2228 if (new_s)
2229 *new_s = s;
2230 PUTBACK;
2231 return SP - PL_stack_base - start_sp_offset;
2232}
2233
2234PP(pp_unpack)
2235{
2236 dVAR;
2237 dSP;
2238 dPOPPOPssrl;
2239 I32 gimme = GIMME_V;
2240 STRLEN llen;
2241 STRLEN rlen;
2242 const char *pat = SvPV_const(left, llen);
2243 const char *s = SvPV_const(right, rlen);
2244 const char *strend = s + rlen;
2245 const char *patend = pat + llen;
2246 I32 cnt;
2247
2248 PUTBACK;
2249 cnt = unpackstring(pat, patend, s, strend,
2250 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
2251 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
2252
2253 SPAGAIN;
2254 if ( !cnt && gimme == G_SCALAR )
2255 PUSHs(&PL_sv_undef);
2256 RETURN;
2257}
2258
2259STATIC U8 *
2260doencodes(U8 *h, const char *s, I32 len)
2261{
2262 *h++ = PL_uuemap[len];
2263 while (len > 2) {
2264 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2265 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
2266 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
2267 *h++ = PL_uuemap[(077 & (s[2] & 077))];
2268 s += 3;
2269 len -= 3;
2270 }
2271 if (len > 0) {
2272 const char r = (len > 1 ? s[1] : '\0');
2273 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2274 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
2275 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
2276 *h++ = PL_uuemap[0];
2277 }
2278 *h++ = '\n';
2279 return h;
2280}
2281
2282STATIC SV *
2283S_is_an_int(pTHX_ const char *s, STRLEN l)
2284{
2285 SV *result = newSVpvn(s, l);
2286 char *const result_c = SvPV_nolen(result); /* convenience */
2287 char *out = result_c;
2288 bool skip = 1;
2289 bool ignore = 0;
2290
2291 while (*s) {
2292 switch (*s) {
2293 case ' ':
2294 break;
2295 case '+':
2296 if (!skip) {
2297 SvREFCNT_dec(result);
2298 return (NULL);
2299 }
2300 break;
2301 case '0':
2302 case '1':
2303 case '2':
2304 case '3':
2305 case '4':
2306 case '5':
2307 case '6':
2308 case '7':
2309 case '8':
2310 case '9':
2311 skip = 0;
2312 if (!ignore) {
2313 *(out++) = *s;
2314 }
2315 break;
2316 case '.':
2317 ignore = 1;
2318 break;
2319 default:
2320 SvREFCNT_dec(result);
2321 return (NULL);
2322 }
2323 s++;
2324 }
2325 *(out++) = '\0';
2326 SvCUR_set(result, out - result_c);
2327 return (result);
2328}
2329
2330/* pnum must be '\0' terminated */
2331STATIC int
2332S_div128(pTHX_ SV *pnum, bool *done)
2333{
2334 STRLEN len;
2335 char * const s = SvPV(pnum, len);
2336 char *t = s;
2337 int m = 0;
2338
2339 *done = 1;
2340 while (*t) {
2341 const int i = m * 10 + (*t - '0');
2342 const int r = (i >> 7); /* r < 10 */
2343 m = i & 0x7F;
2344 if (r) {
2345 *done = 0;
2346 }
2347 *(t++) = '0' + r;
2348 }
2349 *(t++) = '\0';
2350 SvCUR_set(pnum, (STRLEN) (t - s));
2351 return (m);
2352}
2353
2354/*
2355=for apidoc packlist
2356
2357The engine implementing pack() Perl function.
2358
2359=cut
2360*/
2361
2362void
2363Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, register SV **beglist, SV **endlist )
2364{
2365 dVAR;
2366 tempsym_t sym;
2367
2368 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
2369
2370 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
2371 Also make sure any UTF8 flag is loaded */
2372 SvPV_force_nolen(cat);
2373 if (DO_UTF8(cat))
2374 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
2375
2376 (void)pack_rec( cat, &sym, beglist, endlist );
2377}
2378
2379/* like sv_utf8_upgrade, but also repoint the group start markers */
2380STATIC void
2381marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2382 STRLEN len;
2383 tempsym_t *group;
2384 const char *from_ptr, *from_start, *from_end, **marks, **m;
2385 char *to_start, *to_ptr;
2386
2387 if (SvUTF8(sv)) return;
2388
2389 from_start = SvPVX_const(sv);
2390 from_end = from_start + SvCUR(sv);
2391 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2392 if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
2393 if (from_ptr == from_end) {
2394 /* Simple case: no character needs to be changed */
2395 SvUTF8_on(sv);
2396 return;
2397 }
2398
2399 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
2400 Newx(to_start, len, char);
2401 Copy(from_start, to_start, from_ptr-from_start, char);
2402 to_ptr = to_start + (from_ptr-from_start);
2403
2404 Newx(marks, sym_ptr->level+2, const char *);
2405 for (group=sym_ptr; group; group = group->previous)
2406 marks[group->level] = from_start + group->strbeg;
2407 marks[sym_ptr->level+1] = from_end+1;
2408 for (m = marks; *m < from_ptr; m++)
2409 *m = to_start + (*m-from_start);
2410
2411 for (;from_ptr < from_end; from_ptr++) {
2412 while (*m == from_ptr) *m++ = to_ptr;
2413 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
2414 }
2415 *to_ptr = 0;
2416
2417 while (*m == from_ptr) *m++ = to_ptr;
2418 if (m != marks + sym_ptr->level+1) {
2419 Safefree(marks);
2420 Safefree(to_start);
2421 Perl_croak(aTHX_ "Assertion: marks beyond string end");
2422 }
2423 for (group=sym_ptr; group; group = group->previous)
2424 group->strbeg = marks[group->level] - to_start;
2425 Safefree(marks);
2426
2427 if (SvOOK(sv)) {
2428 if (SvIVX(sv)) {
2429 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
2430 from_start -= SvIVX(sv);
2431 SvIV_set(sv, 0);
2432 }
2433 SvFLAGS(sv) &= ~SVf_OOK;
2434 }
2435 if (SvLEN(sv) != 0)
2436 Safefree(from_start);
2437 SvPV_set(sv, to_start);
2438 SvCUR_set(sv, to_ptr - to_start);
2439 SvLEN_set(sv, len);
2440 SvUTF8_on(sv);
2441}
2442
2443/* Exponential string grower. Makes string extension effectively O(n)
2444 needed says how many extra bytes we need (not counting the final '\0')
2445 Only grows the string if there is an actual lack of space
2446*/
2447STATIC char *
2448S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
2449 const STRLEN cur = SvCUR(sv);
2450 const STRLEN len = SvLEN(sv);
2451 STRLEN extend;
2452 if (len - cur > needed) return SvPVX(sv);
2453 extend = needed > len ? needed : len;
2454 return SvGROW(sv, len+extend+1);
2455}
2456
2457STATIC
2458SV **
2459S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
2460{
2461 dVAR;
2462 tempsym_t lookahead;
2463 I32 items = endlist - beglist;
2464 bool found = next_symbol(symptr);
2465 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
2466 bool warn_utf8 = ckWARN(WARN_UTF8);
2467
2468 if (symptr->level == 0 && found && symptr->code == 'U') {
2469 marked_upgrade(aTHX_ cat, symptr);
2470 symptr->flags |= FLAG_DO_UTF8;
2471 utf8 = 0;
2472 }
2473 symptr->strbeg = SvCUR(cat);
2474
2475 while (found) {
2476 SV *fromstr;
2477 STRLEN fromlen;
2478 I32 len;
2479 SV *lengthcode = NULL;
2480 I32 datumtype = symptr->code;
2481 howlen_t howlen = symptr->howlen;
2482 char *start = SvPVX(cat);
2483 char *cur = start + SvCUR(cat);
2484
2485#define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2486
2487 switch (howlen) {
2488 case e_star:
2489 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2490 0 : items;
2491 break;
2492 default:
2493 /* e_no_len and e_number */
2494 len = symptr->length;
2495 break;
2496 }
2497
2498 if (len) {
2499 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
2500
2501 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2502 /* We can process this letter. */
2503 STRLEN size = props & PACK_SIZE_MASK;
2504 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2505 }
2506 }
2507
2508 /* Look ahead for next symbol. Do we have code/code? */
2509 lookahead = *symptr;
2510 found = next_symbol(&lookahead);
2511 if (symptr->flags & FLAG_SLASH) {
2512 IV count;
2513 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
2514 if (strchr("aAZ", lookahead.code)) {
2515 if (lookahead.howlen == e_number) count = lookahead.length;
2516 else {
2517 if (items > 0) {
2518 if (SvGAMAGIC(*beglist)) {
2519 /* Avoid reading the active data more than once
2520 by copying it to a temporary. */
2521 STRLEN len;
2522 const char *const pv = SvPV_const(*beglist, len);
2523 SV *const temp = sv_2mortal(newSVpvn(pv, len));
2524 if (SvUTF8(*beglist))
2525 SvUTF8_on(temp);
2526 *beglist = temp;
2527 }
2528 count = DO_UTF8(*beglist) ?
2529 sv_len_utf8(*beglist) : sv_len(*beglist);
2530 }
2531 else count = 0;
2532 if (lookahead.code == 'Z') count++;
2533 }
2534 } else {
2535 if (lookahead.howlen == e_number && lookahead.length < items)
2536 count = lookahead.length;
2537 else count = items;
2538 }
2539 lookahead.howlen = e_number;
2540 lookahead.length = count;
2541 lengthcode = sv_2mortal(newSViv(count));
2542 }
2543
2544 /* Code inside the switch must take care to properly update
2545 cat (CUR length and '\0' termination) if it updated *cur and
2546 doesn't simply leave using break */
2547 switch(TYPE_NO_ENDIANNESS(datumtype)) {
2548 default:
2549 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2550 (int) TYPE_NO_MODIFIERS(datumtype));
2551 case '%':
2552 Perl_croak(aTHX_ "'%%' may not be used in pack");
2553 {
2554 char *from;
2555#ifdef PERL_PACK_CAN_SHRIEKSIGN
2556 case '.' | TYPE_IS_SHRIEKING:
2557#endif
2558 case '.':
2559 if (howlen == e_star) from = start;
2560 else if (len == 0) from = cur;
2561 else {
2562 tempsym_t *group = symptr;
2563
2564 while (--len && group) group = group->previous;
2565 from = group ? start + group->strbeg : start;
2566 }
2567 fromstr = NEXTFROM;
2568 len = SvIV(fromstr);
2569 goto resize;
2570#ifdef PERL_PACK_CAN_SHRIEKSIGN
2571 case '@' | TYPE_IS_SHRIEKING:
2572#endif
2573 case '@':
2574 from = start + symptr->strbeg;
2575 resize:
2576#ifdef PERL_PACK_CAN_SHRIEKSIGN
2577 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
2578#else /* PERL_PACK_CAN_SHRIEKSIGN */
2579 if (utf8)
2580#endif
2581 if (len >= 0) {
2582 while (len && from < cur) {
2583 from += UTF8SKIP(from);
2584 len--;
2585 }
2586 if (from > cur)
2587 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2588 if (len) {
2589 /* Here we know from == cur */
2590 grow:
2591 GROWING(0, cat, start, cur, len);
2592 Zero(cur, len, char);
2593 cur += len;
2594 } else if (from < cur) {
2595 len = cur - from;
2596 goto shrink;
2597 } else goto no_change;
2598 } else {
2599 cur = from;
2600 len = -len;
2601 goto utf8_shrink;
2602 }
2603 else {
2604 len -= cur - from;
2605 if (len > 0) goto grow;
2606 if (len == 0) goto no_change;
2607 len = -len;
2608 goto shrink;
2609 }
2610 break;
2611 }
2612 case '(': {
2613 tempsym_t savsym = *symptr;
2614 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2615 symptr->flags |= group_modifiers;
2616 symptr->patend = savsym.grpend;
2617 symptr->level++;
2618 symptr->previous = &lookahead;
2619 while (len--) {
2620 U32 was_utf8;
2621 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2622 else symptr->flags &= ~FLAG_PARSE_UTF8;
2623 was_utf8 = SvUTF8(cat);
2624 symptr->patptr = savsym.grpbeg;
2625 beglist = pack_rec(cat, symptr, beglist, endlist);
2626 if (SvUTF8(cat) != was_utf8)
2627 /* This had better be an upgrade while in utf8==0 mode */
2628 utf8 = 1;
2629
2630 if (savsym.howlen == e_star && beglist == endlist)
2631 break; /* No way to continue */
2632 }
2633 items = endlist - beglist;
2634 lookahead.flags = symptr->flags & ~group_modifiers;
2635 goto no_change;
2636 }
2637 case 'X' | TYPE_IS_SHRIEKING:
2638 if (!len) /* Avoid division by 0 */
2639 len = 1;
2640 if (utf8) {
2641 char *hop, *last;
2642 I32 l = len;
2643 hop = last = start;
2644 while (hop < cur) {
2645 hop += UTF8SKIP(hop);
2646 if (--l == 0) {
2647 last = hop;
2648 l = len;
2649 }
2650 }
2651 if (last > cur)
2652 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2653 cur = last;
2654 break;
2655 }
2656 len = (cur-start) % len;
2657 /* FALL THROUGH */
2658 case 'X':
2659 if (utf8) {
2660 if (len < 1) goto no_change;
2661 utf8_shrink:
2662 while (len > 0) {
2663 if (cur <= start)
2664 Perl_croak(aTHX_ "'%c' outside of string in pack",
2665 (int) TYPE_NO_MODIFIERS(datumtype));
2666 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2667 if (cur <= start)
2668 Perl_croak(aTHX_ "'%c' outside of string in pack",
2669 (int) TYPE_NO_MODIFIERS(datumtype));
2670 }
2671 len--;
2672 }
2673 } else {
2674 shrink:
2675 if (cur - start < len)
2676 Perl_croak(aTHX_ "'%c' outside of string in pack",
2677 (int) TYPE_NO_MODIFIERS(datumtype));
2678 cur -= len;
2679 }
2680 if (cur < start+symptr->strbeg) {
2681 /* Make sure group starts don't point into the void */
2682 tempsym_t *group;
2683 const STRLEN length = cur-start;
2684 for (group = symptr;
2685 group && length < group->strbeg;
2686 group = group->previous) group->strbeg = length;
2687 lookahead.strbeg = length;
2688 }
2689 break;
2690 case 'x' | TYPE_IS_SHRIEKING: {
2691 I32 ai32;
2692 if (!len) /* Avoid division by 0 */
2693 len = 1;
2694 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
2695 else ai32 = (cur - start) % len;
2696 if (ai32 == 0) goto no_change;
2697 len -= ai32;
2698 }
2699 /* FALL THROUGH */
2700 case 'x':
2701 goto grow;
2702 case 'A':
2703 case 'Z':
2704 case 'a': {
2705 const char *aptr;
2706
2707 fromstr = NEXTFROM;
2708 aptr = SvPV_const(fromstr, fromlen);
2709 if (DO_UTF8(fromstr)) {
2710 const char *end, *s;
2711
2712 if (!utf8 && !SvUTF8(cat)) {
2713 marked_upgrade(aTHX_ cat, symptr);
2714 lookahead.flags |= FLAG_DO_UTF8;
2715 lookahead.strbeg = symptr->strbeg;
2716 utf8 = 1;
2717 start = SvPVX(cat);
2718 cur = start + SvCUR(cat);
2719 }
2720 if (howlen == e_star) {
2721 if (utf8) goto string_copy;
2722 len = fromlen+1;
2723 }
2724 s = aptr;
2725 end = aptr + fromlen;
2726 fromlen = datumtype == 'Z' ? len-1 : len;
2727 while ((I32) fromlen > 0 && s < end) {
2728 s += UTF8SKIP(s);
2729 fromlen--;
2730 }
2731 if (s > end)
2732 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2733 if (utf8) {
2734 len = fromlen;
2735 if (datumtype == 'Z') len++;
2736 fromlen = s-aptr;
2737 len += fromlen;
2738
2739 goto string_copy;
2740 }
2741 fromlen = len - fromlen;
2742 if (datumtype == 'Z') fromlen--;
2743 if (howlen == e_star) {
2744 len = fromlen;
2745 if (datumtype == 'Z') len++;
2746 }
2747 GROWING(0, cat, start, cur, len);
2748 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
2749 datumtype | TYPE_IS_PACK))
2750 Perl_croak(aTHX_ "Perl bug: predicted utf8 length not available");
2751 cur += fromlen;
2752 len -= fromlen;
2753 } else if (utf8) {
2754 if (howlen == e_star) {
2755 len = fromlen;
2756 if (datumtype == 'Z') len++;
2757 }
2758 if (len <= (I32) fromlen) {
2759 fromlen = len;
2760 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2761 }
2762 /* assumes a byte expands to at most UTF8_EXPAND bytes on
2763 upgrade, so:
2764 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2765 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
2766 len -= fromlen;
2767 while (fromlen > 0) {
2768 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
2769 aptr++;
2770 fromlen--;
2771 }
2772 } else {
2773 string_copy:
2774 if (howlen == e_star) {
2775 len = fromlen;
2776 if (datumtype == 'Z') len++;
2777 }
2778 if (len <= (I32) fromlen) {
2779 fromlen = len;
2780 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2781 }
2782 GROWING(0, cat, start, cur, len);
2783 Copy(aptr, cur, fromlen, char);
2784 cur += fromlen;
2785 len -= fromlen;
2786 }
2787 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2788 cur += len;
2789 break;
2790 }
2791 case 'B':
2792 case 'b': {
2793 const char *str, *end;
2794 I32 l, field_len;
2795 U8 bits;
2796 bool utf8_source;
2797 U32 utf8_flags;
2798
2799 fromstr = NEXTFROM;
2800 str = SvPV_const(fromstr, fromlen);
2801 end = str + fromlen;
2802 if (DO_UTF8(fromstr)) {
2803 utf8_source = TRUE;
2804 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2805 } else {
2806 utf8_source = FALSE;
2807 utf8_flags = 0; /* Unused, but keep compilers happy */
2808 }
2809 if (howlen == e_star) len = fromlen;
2810 field_len = (len+7)/8;
2811 GROWING(utf8, cat, start, cur, field_len);
2812 if (len > (I32)fromlen) len = fromlen;
2813 bits = 0;
2814 l = 0;
2815 if (datumtype == 'B')
2816 while (l++ < len) {
2817 if (utf8_source) {
2818 UV val = 0;
2819 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2820 bits |= val & 1;
2821 } else bits |= *str++ & 1;
2822 if (l & 7) bits <<= 1;
2823 else {
2824 PUSH_BYTE(utf8, cur, bits);
2825 bits = 0;
2826 }
2827 }
2828 else
2829 /* datumtype == 'b' */
2830 while (l++ < len) {
2831 if (utf8_source) {
2832 UV val = 0;
2833 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2834 if (val & 1) bits |= 0x80;
2835 } else if (*str++ & 1)
2836 bits |= 0x80;
2837 if (l & 7) bits >>= 1;
2838 else {
2839 PUSH_BYTE(utf8, cur, bits);
2840 bits = 0;
2841 }
2842 }
2843 l--;
2844 if (l & 7) {
2845 if (datumtype == 'B')
2846 bits <<= 7 - (l & 7);
2847 else
2848 bits >>= 7 - (l & 7);
2849 PUSH_BYTE(utf8, cur, bits);
2850 l += 7;
2851 }
2852 /* Determine how many chars are left in the requested field */
2853 l /= 8;
2854 if (howlen == e_star) field_len = 0;
2855 else field_len -= l;
2856 Zero(cur, field_len, char);
2857 cur += field_len;
2858 break;
2859 }
2860 case 'H':
2861 case 'h': {
2862 const char *str, *end;
2863 I32 l, field_len;
2864 U8 bits;
2865 bool utf8_source;
2866 U32 utf8_flags;
2867
2868 fromstr = NEXTFROM;
2869 str = SvPV_const(fromstr, fromlen);
2870 end = str + fromlen;
2871 if (DO_UTF8(fromstr)) {
2872 utf8_source = TRUE;
2873 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2874 } else {
2875 utf8_source = FALSE;
2876 utf8_flags = 0; /* Unused, but keep compilers happy */
2877 }
2878 if (howlen == e_star) len = fromlen;
2879 field_len = (len+1)/2;
2880 GROWING(utf8, cat, start, cur, field_len);
2881 if (!utf8 && len > (I32)fromlen) len = fromlen;
2882 bits = 0;
2883 l = 0;
2884 if (datumtype == 'H')
2885 while (l++ < len) {
2886 if (utf8_source) {
2887 UV val = 0;
2888 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2889 if (val < 256 && isALPHA(val))
2890 bits |= (val + 9) & 0xf;
2891 else
2892 bits |= val & 0xf;
2893 } else if (isALPHA(*str))
2894 bits |= (*str++ + 9) & 0xf;
2895 else
2896 bits |= *str++ & 0xf;
2897 if (l & 1) bits <<= 4;
2898 else {
2899 PUSH_BYTE(utf8, cur, bits);
2900 bits = 0;
2901 }
2902 }
2903 else
2904 while (l++ < len) {
2905 if (utf8_source) {
2906 UV val = 0;
2907 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2908 if (val < 256 && isALPHA(val))
2909 bits |= ((val + 9) & 0xf) << 4;
2910 else
2911 bits |= (val & 0xf) << 4;
2912 } else if (isALPHA(*str))
2913 bits |= ((*str++ + 9) & 0xf) << 4;
2914 else
2915 bits |= (*str++ & 0xf) << 4;
2916 if (l & 1) bits >>= 4;
2917 else {
2918 PUSH_BYTE(utf8, cur, bits);
2919 bits = 0;
2920 }
2921 }
2922 l--;
2923 if (l & 1) {
2924 PUSH_BYTE(utf8, cur, bits);
2925 l++;
2926 }
2927 /* Determine how many chars are left in the requested field */
2928 l /= 2;
2929 if (howlen == e_star) field_len = 0;
2930 else field_len -= l;
2931 Zero(cur, field_len, char);
2932 cur += field_len;
2933 break;
2934 }
2935 case 'c':
2936 while (len-- > 0) {
2937 IV aiv;
2938 fromstr = NEXTFROM;
2939 aiv = SvIV(fromstr);
2940 if ((-128 > aiv || aiv > 127) &&
2941 ckWARN(WARN_PACK))
2942 Perl_warner(aTHX_ packWARN(WARN_PACK),
2943 "Character in 'c' format wrapped in pack");
2944 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
2945 }
2946 break;
2947 case 'C':
2948 if (len == 0) {
2949 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2950 break;
2951 }
2952 GROWING(0, cat, start, cur, len);
2953 while (len-- > 0) {
2954 IV aiv;
2955 fromstr = NEXTFROM;
2956 aiv = SvIV(fromstr);
2957 if ((0 > aiv || aiv > 0xff) &&
2958 ckWARN(WARN_PACK))
2959 Perl_warner(aTHX_ packWARN(WARN_PACK),
2960 "Character in 'C' format wrapped in pack");
2961 *cur++ = (char)(aiv & 0xff);
2962 }
2963 break;
2964 case 'W': {
2965 char *end;
2966 U8 in_bytes = (U8)IN_BYTES;
2967
2968 end = start+SvLEN(cat)-1;
2969 if (utf8) end -= UTF8_MAXLEN-1;
2970 while (len-- > 0) {
2971 UV auv;
2972 fromstr = NEXTFROM;
2973 auv = SvUV(fromstr);
2974 if (in_bytes) auv = auv % 0x100;
2975 if (utf8) {
2976 W_utf8:
2977 if (cur > end) {
2978 *cur = '\0';
2979 SvCUR_set(cat, cur - start);
2980
2981 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2982 end = start+SvLEN(cat)-UTF8_MAXLEN;
2983 }
2984 cur = (char *) uvuni_to_utf8_flags((U8 *) cur,
2985 NATIVE_TO_UNI(auv),
2986 warn_utf8 ?
2987 0 : UNICODE_ALLOW_ANY);
2988 } else {
2989 if (auv >= 0x100) {
2990 if (!SvUTF8(cat)) {
2991 *cur = '\0';
2992 SvCUR_set(cat, cur - start);
2993 marked_upgrade(aTHX_ cat, symptr);
2994 lookahead.flags |= FLAG_DO_UTF8;
2995 lookahead.strbeg = symptr->strbeg;
2996 utf8 = 1;
2997 start = SvPVX(cat);
2998 cur = start + SvCUR(cat);
2999 end = start+SvLEN(cat)-UTF8_MAXLEN;
3000 goto W_utf8;
3001 }
3002 if (ckWARN(WARN_PACK))
3003 Perl_warner(aTHX_ packWARN(WARN_PACK),
3004 "Character in 'W' format wrapped in pack");
3005 auv &= 0xff;
3006 }
3007 if (cur >= end) {
3008 *cur = '\0';
3009 SvCUR_set(cat, cur - start);
3010 GROWING(0, cat, start, cur, len+1);
3011 end = start+SvLEN(cat)-1;
3012 }
3013 *(U8 *) cur++ = (U8)auv;
3014 }
3015 }
3016 break;
3017 }
3018 case 'U': {
3019 char *end;
3020
3021 if (len == 0) {
3022 if (!(symptr->flags & FLAG_DO_UTF8)) {
3023 marked_upgrade(aTHX_ cat, symptr);
3024 lookahead.flags |= FLAG_DO_UTF8;
3025 lookahead.strbeg = symptr->strbeg;
3026 }
3027 utf8 = 0;
3028 goto no_change;
3029 }
3030
3031 end = start+SvLEN(cat);
3032 if (!utf8) end -= UTF8_MAXLEN;
3033 while (len-- > 0) {
3034 UV auv;
3035 fromstr = NEXTFROM;
3036 auv = SvUV(fromstr);
3037 if (utf8) {
3038 U8 buffer[UTF8_MAXLEN], *endb;
3039 endb = uvuni_to_utf8_flags(buffer, auv,
3040 warn_utf8 ?
3041 0 : UNICODE_ALLOW_ANY);
3042 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
3043 *cur = '\0';
3044 SvCUR_set(cat, cur - start);
3045 GROWING(0, cat, start, cur,
3046 len+(endb-buffer)*UTF8_EXPAND);
3047 end = start+SvLEN(cat);
3048 }
3049 cur = bytes_to_uni(buffer, endb-buffer, cur);
3050 } else {
3051 if (cur >= end) {
3052 *cur = '\0';
3053 SvCUR_set(cat, cur - start);
3054 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3055 end = start+SvLEN(cat)-UTF8_MAXLEN;
3056 }
3057 cur = (char *) uvuni_to_utf8_flags((U8 *) cur, auv,
3058 warn_utf8 ?
3059 0 : UNICODE_ALLOW_ANY);
3060 }
3061 }
3062 break;
3063 }
3064 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
3065 case 'f':
3066 while (len-- > 0) {
3067 float afloat;
3068 NV anv;
3069 fromstr = NEXTFROM;
3070 anv = SvNV(fromstr);
3071#ifdef __VOS__
3072 /* VOS does not automatically map a floating-point overflow
3073 during conversion from double to float into infinity, so we
3074 do it by hand. This code should either be generalized for
3075 any OS that needs it, or removed if and when VOS implements
3076 posix-976 (suggestion to support mapping to infinity).
3077 Paul.Green@stratus.com 02-04-02. */
3078 if (anv > FLT_MAX)
3079 afloat = _float_constants[0]; /* single prec. inf. */
3080 else if (anv < -FLT_MAX)
3081 afloat = _float_constants[0]; /* single prec. inf. */
3082 else afloat = (float) anv;
3083#else /* __VOS__ */
3084# if defined(VMS) && !defined(__IEEE_FP)
3085 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
3086 * on Alpha; fake it if we don't have them.
3087 */
3088 if (anv > FLT_MAX)
3089 afloat = FLT_MAX;
3090 else if (anv < -FLT_MAX)
3091 afloat = -FLT_MAX;
3092 else afloat = (float)anv;
3093# else
3094 afloat = (float)anv;
3095# endif
3096#endif /* __VOS__ */
3097 DO_BO_PACK_N(afloat, float);
3098 PUSH_VAR(utf8, cur, afloat);
3099 }
3100 break;
3101 case 'd':
3102 while (len-- > 0) {
3103 double adouble;
3104 NV anv;
3105 fromstr = NEXTFROM;
3106 anv = SvNV(fromstr);
3107#ifdef __VOS__
3108 /* VOS does not automatically map a floating-point overflow
3109 during conversion from long double to double into infinity,
3110 so we do it by hand. This code should either be generalized
3111 for any OS that needs it, or removed if and when VOS
3112 implements posix-976 (suggestion to support mapping to
3113 infinity). Paul.Green@stratus.com 02-04-02. */
3114 if (anv > DBL_MAX)
3115 adouble = _double_constants[0]; /* double prec. inf. */
3116 else if (anv < -DBL_MAX)
3117 adouble = _double_constants[0]; /* double prec. inf. */
3118 else adouble = (double) anv;
3119#else /* __VOS__ */
3120# if defined(VMS) && !defined(__IEEE_FP)
3121 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
3122 * on Alpha; fake it if we don't have them.
3123 */
3124 if (anv > DBL_MAX)
3125 adouble = DBL_MAX;
3126 else if (anv < -DBL_MAX)
3127 adouble = -DBL_MAX;
3128 else adouble = (double)anv;
3129# else
3130 adouble = (double)anv;
3131# endif
3132#endif /* __VOS__ */
3133 DO_BO_PACK_N(adouble, double);
3134 PUSH_VAR(utf8, cur, adouble);
3135 }
3136 break;
3137 case 'F': {
3138 NV anv;
3139 Zero(&anv, 1, NV); /* can be long double with unused bits */
3140 while (len-- > 0) {
3141 fromstr = NEXTFROM;
3142 anv = SvNV(fromstr);
3143 DO_BO_PACK_N(anv, NV);
3144 PUSH_VAR(utf8, cur, anv);
3145 }
3146 break;
3147 }
3148#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
3149 case 'D': {
3150 long double aldouble;
3151 /* long doubles can have unused bits, which may be nonzero */
3152 Zero(&aldouble, 1, long double);
3153 while (len-- > 0) {
3154 fromstr = NEXTFROM;
3155 aldouble = (long double)SvNV(fromstr);
3156 DO_BO_PACK_N(aldouble, long double);
3157 PUSH_VAR(utf8, cur, aldouble);
3158 }
3159 break;
3160 }
3161#endif
3162#ifdef PERL_PACK_CAN_SHRIEKSIGN
3163 case 'n' | TYPE_IS_SHRIEKING:
3164#endif
3165 case 'n':
3166 while (len-- > 0) {
3167 I16 ai16;
3168 fromstr = NEXTFROM;
3169 ai16 = (I16)SvIV(fromstr);
3170#ifdef HAS_HTONS
3171 ai16 = PerlSock_htons(ai16);
3172#endif
3173 PUSH16(utf8, cur, &ai16);
3174 }
3175 break;
3176#ifdef PERL_PACK_CAN_SHRIEKSIGN
3177 case 'v' | TYPE_IS_SHRIEKING:
3178#endif
3179 case 'v':
3180 while (len-- > 0) {
3181 I16 ai16;
3182 fromstr = NEXTFROM;
3183 ai16 = (I16)SvIV(fromstr);
3184#ifdef HAS_HTOVS
3185 ai16 = htovs(ai16);
3186#endif
3187 PUSH16(utf8, cur, &ai16);
3188 }
3189 break;
3190 case 'S' | TYPE_IS_SHRIEKING:
3191#if SHORTSIZE != SIZE16
3192 while (len-- > 0) {
3193 unsigned short aushort;
3194 fromstr = NEXTFROM;
3195 aushort = SvUV(fromstr);
3196 DO_BO_PACK(aushort, s);
3197 PUSH_VAR(utf8, cur, aushort);
3198 }
3199 break;
3200#else
3201 /* Fall through! */
3202#endif
3203 case 'S':
3204 while (len-- > 0) {
3205 U16 au16;
3206 fromstr = NEXTFROM;
3207 au16 = (U16)SvUV(fromstr);
3208 DO_BO_PACK(au16, 16);
3209 PUSH16(utf8, cur, &au16);
3210 }
3211 break;
3212 case 's' | TYPE_IS_SHRIEKING:
3213#if SHORTSIZE != SIZE16
3214 while (len-- > 0) {
3215 short ashort;
3216 fromstr = NEXTFROM;
3217 ashort = SvIV(fromstr);
3218 DO_BO_PACK(ashort, s);
3219 PUSH_VAR(utf8, cur, ashort);
3220 }
3221 break;
3222#else
3223 /* Fall through! */
3224#endif
3225 case 's':
3226 while (len-- > 0) {
3227 I16 ai16;
3228 fromstr = NEXTFROM;
3229 ai16 = (I16)SvIV(fromstr);
3230 DO_BO_PACK(ai16, 16);
3231 PUSH16(utf8, cur, &ai16);
3232 }
3233 break;
3234 case 'I':
3235 case 'I' | TYPE_IS_SHRIEKING:
3236 while (len-- > 0) {
3237 unsigned int auint;
3238 fromstr = NEXTFROM;
3239 auint = SvUV(fromstr);
3240 DO_BO_PACK(auint, i);
3241 PUSH_VAR(utf8, cur, auint);
3242 }
3243 break;
3244 case 'j':
3245 while (len-- > 0) {
3246 IV aiv;
3247 fromstr = NEXTFROM;
3248 aiv = SvIV(fromstr);
3249#if IVSIZE == INTSIZE
3250 DO_BO_PACK(aiv, i);
3251#elif IVSIZE == LONGSIZE
3252 DO_BO_PACK(aiv, l);
3253#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
3254 DO_BO_PACK(aiv, 64);
3255#else
3256 Perl_croak(aTHX_ "'j' not supported on this platform");
3257#endif
3258 PUSH_VAR(utf8, cur, aiv);
3259 }
3260 break;
3261 case 'J':
3262 while (len-- > 0) {
3263 UV auv;
3264 fromstr = NEXTFROM;
3265 auv = SvUV(fromstr);
3266#if UVSIZE == INTSIZE
3267 DO_BO_PACK(auv, i);
3268#elif UVSIZE == LONGSIZE
3269 DO_BO_PACK(auv, l);
3270#elif defined(HAS_QUAD) && UVSIZE == U64SIZE
3271 DO_BO_PACK(auv, 64);
3272#else
3273 Perl_croak(aTHX_ "'J' not supported on this platform");
3274#endif
3275 PUSH_VAR(utf8, cur, auv);
3276 }
3277 break;
3278 case 'w':
3279 while (len-- > 0) {
3280 NV anv;
3281 fromstr = NEXTFROM;
3282 anv = SvNV(fromstr);
3283
3284 if (anv < 0) {
3285 *cur = '\0';
3286 SvCUR_set(cat, cur - start);
3287 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
3288 }
3289
3290 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
3291 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
3292 any negative IVs will have already been got by the croak()
3293 above. IOK is untrue for fractions, so we test them
3294 against UV_MAX_P1. */
3295 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
3296 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
3297 char *in = buf + sizeof(buf);
3298 UV auv = SvUV(fromstr);
3299
3300 do {
3301 *--in = (char)((auv & 0x7f) | 0x80);
3302 auv >>= 7;
3303 } while (auv);
3304 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
3305 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3306 in, (buf + sizeof(buf)) - in);
3307 } else if (SvPOKp(fromstr))
3308 goto w_string;
3309 else if (SvNOKp(fromstr)) {
3310 /* 10**NV_MAX_10_EXP is the largest power of 10
3311 so 10**(NV_MAX_10_EXP+1) is definately unrepresentable
3312 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
3313 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
3314 And with that many bytes only Inf can overflow.
3315 Some C compilers are strict about integral constant
3316 expressions so we conservatively divide by a slightly
3317 smaller integer instead of multiplying by the exact
3318 floating-point value.
3319 */
3320#ifdef NV_MAX_10_EXP
3321 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
3322 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
3323#else
3324 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
3325 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
3326#endif
3327 char *in = buf + sizeof(buf);
3328
3329 anv = Perl_floor(anv);
3330 do {
3331 const NV next = Perl_floor(anv / 128);
3332 if (in <= buf) /* this cannot happen ;-) */
3333 Perl_croak(aTHX_ "Cannot compress integer in pack");
3334 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
3335 anv = next;
3336 } while (anv > 0);
3337 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
3338 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3339 in, (buf + sizeof(buf)) - in);
3340 } else {
3341 const char *from;
3342 char *result, *in;
3343 SV *norm;
3344 STRLEN len;
3345 bool done;
3346
3347 w_string:
3348 /* Copy string and check for compliance */
3349 from = SvPV_const(fromstr, len);
3350 if ((norm = is_an_int(from, len)) == NULL)
3351 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
3352
3353 Newx(result, len, char);
3354 in = result + len;
3355 done = FALSE;
3356 while (!done) *--in = div128(norm, &done) | 0x80;
3357 result[len - 1] &= 0x7F; /* clear continue bit */
3358 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3359 in, (result + len) - in);
3360 Safefree(result);
3361 SvREFCNT_dec(norm); /* free norm */
3362 }
3363 }
3364 break;
3365 case 'i':
3366 case 'i' | TYPE_IS_SHRIEKING:
3367 while (len-- > 0) {
3368 int aint;
3369 fromstr = NEXTFROM;
3370 aint = SvIV(fromstr);
3371 DO_BO_PACK(aint, i);
3372 PUSH_VAR(utf8, cur, aint);
3373 }
3374 break;
3375#ifdef PERL_PACK_CAN_SHRIEKSIGN
3376 case 'N' | TYPE_IS_SHRIEKING:
3377#endif
3378 case 'N':
3379 while (len-- > 0) {
3380 U32 au32;
3381 fromstr = NEXTFROM;
3382 au32 = SvUV(fromstr);
3383#ifdef HAS_HTONL
3384 au32 = PerlSock_htonl(au32);
3385#endif
3386 PUSH32(utf8, cur, &au32);
3387 }
3388 break;
3389#ifdef PERL_PACK_CAN_SHRIEKSIGN
3390 case 'V' | TYPE_IS_SHRIEKING:
3391#endif
3392 case 'V':
3393 while (len-- > 0) {
3394 U32 au32;
3395 fromstr = NEXTFROM;
3396 au32 = SvUV(fromstr);
3397#ifdef HAS_HTOVL
3398 au32 = htovl(au32);
3399#endif
3400 PUSH32(utf8, cur, &au32);
3401 }
3402 break;
3403 case 'L' | TYPE_IS_SHRIEKING:
3404#if LONGSIZE != SIZE32
3405 while (len-- > 0) {
3406 unsigned long aulong;
3407 fromstr = NEXTFROM;
3408 aulong = SvUV(fromstr);
3409 DO_BO_PACK(aulong, l);
3410 PUSH_VAR(utf8, cur, aulong);
3411 }
3412 break;
3413#else
3414 /* Fall though! */
3415#endif
3416 case 'L':
3417 while (len-- > 0) {
3418 U32 au32;
3419 fromstr = NEXTFROM;
3420 au32 = SvUV(fromstr);
3421 DO_BO_PACK(au32, 32);
3422 PUSH32(utf8, cur, &au32);
3423 }
3424 break;
3425 case 'l' | TYPE_IS_SHRIEKING:
3426#if LONGSIZE != SIZE32
3427 while (len-- > 0) {
3428 long along;
3429 fromstr = NEXTFROM;
3430 along = SvIV(fromstr);
3431 DO_BO_PACK(along, l);
3432 PUSH_VAR(utf8, cur, along);
3433 }
3434 break;
3435#else
3436 /* Fall though! */
3437#endif
3438 case 'l':
3439 while (len-- > 0) {
3440 I32 ai32;
3441 fromstr = NEXTFROM;
3442 ai32 = SvIV(fromstr);
3443 DO_BO_PACK(ai32, 32);
3444 PUSH32(utf8, cur, &ai32);
3445 }
3446 break;
3447#ifdef HAS_QUAD
3448 case 'Q':
3449 while (len-- > 0) {
3450 Uquad_t auquad;
3451 fromstr = NEXTFROM;
3452 auquad = (Uquad_t) SvUV(fromstr);
3453 DO_BO_PACK(auquad, 64);
3454 PUSH_VAR(utf8, cur, auquad);
3455 }
3456 break;
3457 case 'q':
3458 while (len-- > 0) {
3459 Quad_t aquad;
3460 fromstr = NEXTFROM;
3461 aquad = (Quad_t)SvIV(fromstr);
3462 DO_BO_PACK(aquad, 64);
3463 PUSH_VAR(utf8, cur, aquad);
3464 }
3465 break;
3466#endif /* HAS_QUAD */
3467 case 'P':
3468 len = 1; /* assume SV is correct length */
3469 GROWING(utf8, cat, start, cur, sizeof(char *));
3470 /* Fall through! */
3471 case 'p':
3472 while (len-- > 0) {
3473 const char *aptr;
3474
3475 fromstr = NEXTFROM;
3476 SvGETMAGIC(fromstr);
3477 if (!SvOK(fromstr)) aptr = NULL;
3478 else {
3479 /* XXX better yet, could spirit away the string to
3480 * a safe spot and hang on to it until the result
3481 * of pack() (and all copies of the result) are
3482 * gone.
3483 */
3484 if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
3485 !SvREADONLY(fromstr))) && ckWARN(WARN_PACK)) {
3486 Perl_warner(aTHX_ packWARN(WARN_PACK),
3487 "Attempt to pack pointer to temporary value");
3488 }
3489 if (SvPOK(fromstr) || SvNIOK(fromstr))
3490 aptr = SvPV_nomg_const_nolen(fromstr);
3491 else
3492 aptr = SvPV_force_flags_nolen(fromstr, 0);
3493 }
3494 DO_BO_PACK_PC(aptr);
3495 PUSH_VAR(utf8, cur, aptr);
3496 }
3497 break;
3498 case 'u': {
3499 const char *aptr, *aend;
3500 bool from_utf8;
3501
3502 fromstr = NEXTFROM;
3503 if (len <= 2) len = 45;
3504 else len = len / 3 * 3;
3505 if (len >= 64) {
3506 if (ckWARN(WARN_PACK))
3507 Perl_warner(aTHX_ packWARN(WARN_PACK),
3508 "Field too wide in 'u' format in pack");
3509 len = 63;
3510 }
3511 aptr = SvPV_const(fromstr, fromlen);
3512 from_utf8 = DO_UTF8(fromstr);
3513 if (from_utf8) {
3514 aend = aptr + fromlen;
3515 fromlen = sv_len_utf8(fromstr);
3516 } else aend = NULL; /* Unused, but keep compilers happy */
3517 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
3518 while (fromlen > 0) {
3519 U8 *end;
3520 I32 todo;
3521 U8 hunk[1+63/3*4+1];
3522
3523 if ((I32)fromlen > len)
3524 todo = len;
3525 else
3526 todo = fromlen;
3527 if (from_utf8) {
3528 char buffer[64];
3529 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3530 'u' | TYPE_IS_PACK)) {
3531 *cur = '\0';
3532 SvCUR_set(cat, cur - start);
3533 Perl_croak(aTHX_ "Assertion: string is shorter than advertised");
3534 }
3535 end = doencodes(hunk, buffer, todo);
3536 } else {
3537 end = doencodes(hunk, aptr, todo);
3538 aptr += todo;
3539 }
3540 PUSH_BYTES(utf8, cur, hunk, end-hunk);
3541 fromlen -= todo;
3542 }
3543 break;
3544 }
3545 }
3546 *cur = '\0';
3547 SvCUR_set(cat, cur - start);
3548 no_change:
3549 *symptr = lookahead;
3550 }
3551 return beglist;
3552}
3553#undef NEXTFROM
3554
3555
3556PP(pp_pack)
3557{
3558 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
3559 register SV *cat = TARG;
3560 STRLEN fromlen;
3561 SV *pat_sv = *++MARK;
3562 register const char *pat = SvPV_const(pat_sv, fromlen);
3563 register const char *patend = pat + fromlen;
3564
3565 MARK++;
3566 sv_setpvn(cat, "", 0);
3567 SvUTF8_off(cat);
3568
3569 packlist(cat, pat, patend, MARK, SP + 1);
3570
3571 SvSETMAGIC(cat);
3572 SP = ORIGMARK;
3573 PUSHs(cat);
3574 RETURN;
3575}
3576
3577/*
3578 * Local variables:
3579 * c-indentation-style: bsd
3580 * c-basic-offset: 4
3581 * indent-tabs-mode: t
3582 * End:
3583 *
3584 * ex: set ts=8 sts=4 sw=4 noet:
3585 */