This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Regression test for the fix in change #25308,
[perl5.git] / pp_pack.c
... / ...
CommitLineData
1/* pp_pack.c
2 *
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
4 * 2000, 2001, 2002, 2003, 2004, 2005, by Larry Wall and others
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
9 */
10
11/*
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
16 * some salt.
17 */
18
19/* This file contains pp ("push/pop") functions that
20 * execute the opcodes that make up a perl program. A typical pp function
21 * expects to find its arguments on the stack, and usually pushes its
22 * results onto the stack, hence the 'pp' terminology. Each OP structure
23 * contains a pointer to the relevant pp_foo() function.
24 *
25 * This particular file just contains pp_pack() and pp_unpack(). See the
26 * other pp*.c files for the rest of the pp_ functions.
27 */
28
29
30#include "EXTERN.h"
31#define PERL_IN_PP_PACK_C
32#include "perl.h"
33
34/* Types used by pack/unpack */
35typedef enum {
36 e_no_len, /* no length */
37 e_number, /* number, [] */
38 e_star /* asterisk */
39} howlen_t;
40
41typedef struct tempsym {
42 const char* patptr; /* current template char */
43 const char* patend; /* one after last char */
44 const char* grpbeg; /* 1st char of ()-group */
45 const char* grpend; /* end of ()-group */
46 I32 code; /* template code (!<>) */
47 I32 length; /* length/repeat count */
48 howlen_t howlen; /* how length is given */
49 int level; /* () nesting level */
50 U32 flags; /* /=4, comma=2, pack=1 */
51 /* and group modifiers */
52 STRLEN strbeg; /* offset of group start */
53 struct tempsym *previous; /* previous group */
54} tempsym_t;
55
56#define TEMPSYM_INIT(symptr, p, e, f) \
57 STMT_START { \
58 (symptr)->patptr = (p); \
59 (symptr)->patend = (e); \
60 (symptr)->grpbeg = NULL; \
61 (symptr)->grpend = NULL; \
62 (symptr)->grpend = NULL; \
63 (symptr)->code = 0; \
64 (symptr)->length = 0; \
65 (symptr)->howlen = 0; \
66 (symptr)->level = 0; \
67 (symptr)->flags = (f); \
68 (symptr)->strbeg = 0; \
69 (symptr)->previous = NULL; \
70 } STMT_END
71
72#if PERL_VERSION >= 9
73# define PERL_PACK_CAN_BYTEORDER
74# define PERL_PACK_CAN_SHRIEKSIGN
75#endif
76
77#ifndef CHAR_BIT
78# define CHAR_BIT 8
79#endif
80/* Maximum number of bytes to which a byte can grow due to upgrade */
81#define UTF8_EXPAND 2
82
83/*
84 * Offset for integer pack/unpack.
85 *
86 * On architectures where I16 and I32 aren't really 16 and 32 bits,
87 * which for now are all Crays, pack and unpack have to play games.
88 */
89
90/*
91 * These values are required for portability of pack() output.
92 * If they're not right on your machine, then pack() and unpack()
93 * wouldn't work right anyway; you'll need to apply the Cray hack.
94 * (I'd like to check them with #if, but you can't use sizeof() in
95 * the preprocessor.) --???
96 */
97/*
98 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
99 defines are now in config.h. --Andy Dougherty April 1998
100 */
101#define SIZE16 2
102#define SIZE32 4
103
104/* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
105 --jhi Feb 1999 */
106
107#if U16SIZE > SIZE16 || U32SIZE > SIZE32
108# if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
109# define OFF16(p) ((char*)(p))
110# define OFF32(p) ((char*)(p))
111# else
112# if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
113# define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
114# define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
115# else
116 ++++ bad cray byte order
117# endif
118# endif
119#else
120# define OFF16(p) ((char *) (p))
121# define OFF32(p) ((char *) (p))
122#endif
123
124/* Only to be used inside a loop (see the break) */
125#define SHIFT16(utf8, s, strend, p, datumtype) STMT_START { \
126 if (utf8) { \
127 if (!uni_to_bytes(aTHX_ &(s), strend, OFF16(p), SIZE16, datumtype)) break; \
128 } else { \
129 Copy(s, OFF16(p), SIZE16, char); \
130 (s) += SIZE16; \
131 } \
132} STMT_END
133
134/* Only to be used inside a loop (see the break) */
135#define SHIFT32(utf8, s, strend, p, datumtype) STMT_START { \
136 if (utf8) { \
137 if (!uni_to_bytes(aTHX_ &(s), strend, OFF32(p), SIZE32, datumtype)) break; \
138 } else { \
139 Copy(s, OFF32(p), SIZE32, char); \
140 (s) += SIZE32; \
141 } \
142} STMT_END
143
144#define PUSH16(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF16(p), SIZE16)
145#define PUSH32(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF32(p), SIZE32)
146
147/* Only to be used inside a loop (see the break) */
148#define SHIFT_VAR(utf8, s, strend, var, datumtype) \
149STMT_START { \
150 if (utf8) { \
151 if (!uni_to_bytes(aTHX_ &s, strend, \
152 (char *) &var, sizeof(var), datumtype)) break;\
153 } else { \
154 Copy(s, (char *) &var, sizeof(var), char); \
155 s += sizeof(var); \
156 } \
157} STMT_END
158
159#define PUSH_VAR(utf8, aptr, var) \
160 PUSH_BYTES(utf8, aptr, &(var), sizeof(var))
161
162/* Avoid stack overflow due to pathological templates. 100 should be plenty. */
163#define MAX_SUB_TEMPLATE_LEVEL 100
164
165/* flags (note that type modifiers can also be used as flags!) */
166#define FLAG_WAS_UTF8 0x40
167#define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
168#define FLAG_UNPACK_ONLY_ONE 0x10
169#define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
170#define FLAG_SLASH 0x04
171#define FLAG_COMMA 0x02
172#define FLAG_PACK 0x01
173
174STATIC SV *
175S_mul128(pTHX_ SV *sv, U8 m)
176{
177 STRLEN len;
178 char *s = SvPV(sv, len);
179 char *t;
180
181 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
182 SV *tmpNew = newSVpvn("0000000000", 10);
183
184 sv_catsv(tmpNew, sv);
185 SvREFCNT_dec(sv); /* free old sv */
186 sv = tmpNew;
187 s = SvPV(sv, len);
188 }
189 t = s + len - 1;
190 while (!*t) /* trailing '\0'? */
191 t--;
192 while (t > s) {
193 const U32 i = ((*t - '0') << 7) + m;
194 *(t--) = '0' + (char)(i % 10);
195 m = (char)(i / 10);
196 }
197 return (sv);
198}
199
200/* Explosives and implosives. */
201
202#if 'I' == 73 && 'J' == 74
203/* On an ASCII/ISO kind of system */
204#define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
205#else
206/*
207 Some other sort of character set - use memchr() so we don't match
208 the null byte.
209 */
210#define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
211#endif
212
213/* type modifiers */
214#define TYPE_IS_SHRIEKING 0x100
215#define TYPE_IS_BIG_ENDIAN 0x200
216#define TYPE_IS_LITTLE_ENDIAN 0x400
217#define TYPE_IS_PACK 0x800
218#define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
219#define TYPE_MODIFIERS(t) ((t) & ~0xFF)
220#define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
221
222#ifdef PERL_PACK_CAN_SHRIEKSIGN
223# define SHRIEKING_ALLOWED_TYPES "sSiIlLxXnNvV@."
224#else
225# define SHRIEKING_ALLOWED_TYPES "sSiIlLxX"
226#endif
227
228#ifndef PERL_PACK_CAN_BYTEORDER
229/* Put "can't" first because it is shorter */
230# define TYPE_ENDIANNESS(t) 0
231# define TYPE_NO_ENDIANNESS(t) (t)
232
233# define ENDIANNESS_ALLOWED_TYPES ""
234
235# define DO_BO_UNPACK(var, type)
236# define DO_BO_PACK(var, type)
237# define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast)
238# define DO_BO_PACK_PTR(var, type, pre_cast, post_cast)
239# define DO_BO_UNPACK_N(var, type)
240# define DO_BO_PACK_N(var, type)
241# define DO_BO_UNPACK_P(var)
242# define DO_BO_PACK_P(var)
243
244#else /* PERL_PACK_CAN_BYTEORDER */
245
246# define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
247# define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
248
249# define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
250
251# define DO_BO_UNPACK(var, type) \
252 STMT_START { \
253 switch (TYPE_ENDIANNESS(datumtype)) { \
254 case TYPE_IS_BIG_ENDIAN: var = my_betoh ## type (var); break; \
255 case TYPE_IS_LITTLE_ENDIAN: var = my_letoh ## type (var); break; \
256 default: break; \
257 } \
258 } STMT_END
259
260# define DO_BO_PACK(var, type) \
261 STMT_START { \
262 switch (TYPE_ENDIANNESS(datumtype)) { \
263 case TYPE_IS_BIG_ENDIAN: var = my_htobe ## type (var); break; \
264 case TYPE_IS_LITTLE_ENDIAN: var = my_htole ## type (var); break; \
265 default: break; \
266 } \
267 } STMT_END
268
269# define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast) \
270 STMT_START { \
271 switch (TYPE_ENDIANNESS(datumtype)) { \
272 case TYPE_IS_BIG_ENDIAN: \
273 var = (post_cast*) my_betoh ## type ((pre_cast) var); \
274 break; \
275 case TYPE_IS_LITTLE_ENDIAN: \
276 var = (post_cast *) my_letoh ## type ((pre_cast) var); \
277 break; \
278 default: \
279 break; \
280 } \
281 } STMT_END
282
283# define DO_BO_PACK_PTR(var, type, pre_cast, post_cast) \
284 STMT_START { \
285 switch (TYPE_ENDIANNESS(datumtype)) { \
286 case TYPE_IS_BIG_ENDIAN: \
287 var = (post_cast *) my_htobe ## type ((pre_cast) var); \
288 break; \
289 case TYPE_IS_LITTLE_ENDIAN: \
290 var = (post_cast *) my_htole ## type ((pre_cast) var); \
291 break; \
292 default: \
293 break; \
294 } \
295 } STMT_END
296
297# define BO_CANT_DOIT(action, type) \
298 STMT_START { \
299 switch (TYPE_ENDIANNESS(datumtype)) { \
300 case TYPE_IS_BIG_ENDIAN: \
301 Perl_croak(aTHX_ "Can't %s big-endian %ss on this " \
302 "platform", #action, #type); \
303 break; \
304 case TYPE_IS_LITTLE_ENDIAN: \
305 Perl_croak(aTHX_ "Can't %s little-endian %ss on this " \
306 "platform", #action, #type); \
307 break; \
308 default: \
309 break; \
310 } \
311 } STMT_END
312
313# if PTRSIZE == INTSIZE
314# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, i, int, void)
315# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, i, int, void)
316# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, i, int, char)
317# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, i, int, char)
318# elif PTRSIZE == LONGSIZE
319# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, long, void)
320# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, long, void)
321# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, long, char)
322# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, long, char)
323# else
324# define DO_BO_UNPACK_P(var) BO_CANT_DOIT(unpack, pointer)
325# define DO_BO_PACK_P(var) BO_CANT_DOIT(pack, pointer)
326# endif
327
328# if defined(my_htolen) && defined(my_letohn) && \
329 defined(my_htoben) && defined(my_betohn)
330# define DO_BO_UNPACK_N(var, type) \
331 STMT_START { \
332 switch (TYPE_ENDIANNESS(datumtype)) { \
333 case TYPE_IS_BIG_ENDIAN: my_betohn(&var, sizeof(type)); break;\
334 case TYPE_IS_LITTLE_ENDIAN: my_letohn(&var, sizeof(type)); break;\
335 default: break; \
336 } \
337 } STMT_END
338
339# define DO_BO_PACK_N(var, type) \
340 STMT_START { \
341 switch (TYPE_ENDIANNESS(datumtype)) { \
342 case TYPE_IS_BIG_ENDIAN: my_htoben(&var, sizeof(type)); break;\
343 case TYPE_IS_LITTLE_ENDIAN: my_htolen(&var, sizeof(type)); break;\
344 default: break; \
345 } \
346 } STMT_END
347# else
348# define DO_BO_UNPACK_N(var, type) BO_CANT_DOIT(unpack, type)
349# define DO_BO_PACK_N(var, type) BO_CANT_DOIT(pack, type)
350# endif
351
352#endif /* PERL_PACK_CAN_BYTEORDER */
353
354#define PACK_SIZE_CANNOT_CSUM 0x80
355#define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
356#define PACK_SIZE_MASK 0x3F
357
358/* These tables are regenerated by genpacksizetables.pl (and then hand pasted
359 in). You're unlikely ever to need to regenerate them. */
360
361#if TYPE_IS_SHRIEKING != 0x100
362 ++++shriek offset should be 256
363#endif
364
365typedef U8 packprops_t;
366#if 'J'-'I' == 1
367/* ASCII */
368const packprops_t packprops[512] = {
369 /* normal */
370 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
371 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
372 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
373 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
374 0, 0, 0,
375 /* C */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
376#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
377 /* D */ LONG_DOUBLESIZE,
378#else
379 0,
380#endif
381 0,
382 /* F */ NVSIZE,
383 0, 0,
384 /* I */ sizeof(unsigned int),
385 /* J */ UVSIZE,
386 0,
387 /* L */ SIZE32,
388 0,
389 /* N */ SIZE32,
390 0, 0,
391#if defined(HAS_QUAD)
392 /* Q */ sizeof(Uquad_t),
393#else
394 0,
395#endif
396 0,
397 /* S */ SIZE16,
398 0,
399 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
400 /* V */ SIZE32,
401 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
402 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
403 /* c */ sizeof(char),
404 /* d */ sizeof(double),
405 0,
406 /* f */ sizeof(float),
407 0, 0,
408 /* i */ sizeof(int),
409 /* j */ IVSIZE,
410 0,
411 /* l */ SIZE32,
412 0,
413 /* n */ SIZE16,
414 0,
415 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
416#if defined(HAS_QUAD)
417 /* q */ sizeof(Quad_t),
418#else
419 0,
420#endif
421 0,
422 /* s */ SIZE16,
423 0, 0,
424 /* v */ SIZE16,
425 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
426 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
427 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
428 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
429 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
430 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
431 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
432 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
433 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
434 0, 0, 0, 0, 0, 0, 0, 0,
435 /* shrieking */
436 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
437 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
438 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
439 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
440 0, 0, 0, 0, 0, 0, 0, 0, 0,
441 /* I */ sizeof(unsigned int),
442 0, 0,
443 /* L */ sizeof(unsigned long),
444 0,
445#if defined(PERL_PACK_CAN_SHRIEKSIGN)
446 /* N */ SIZE32,
447#else
448 0,
449#endif
450 0, 0, 0, 0,
451 /* S */ sizeof(unsigned short),
452 0, 0,
453#if defined(PERL_PACK_CAN_SHRIEKSIGN)
454 /* V */ SIZE32,
455#else
456 0,
457#endif
458 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
459 0, 0,
460 /* i */ sizeof(int),
461 0, 0,
462 /* l */ sizeof(long),
463 0,
464#if defined(PERL_PACK_CAN_SHRIEKSIGN)
465 /* n */ SIZE16,
466#else
467 0,
468#endif
469 0, 0, 0, 0,
470 /* s */ sizeof(short),
471 0, 0,
472#if defined(PERL_PACK_CAN_SHRIEKSIGN)
473 /* v */ SIZE16,
474#else
475 0,
476#endif
477 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
478 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
479 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
480 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
481 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
482 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
483 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
484 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
485 0, 0, 0, 0, 0, 0, 0, 0, 0
486};
487#else
488/* EBCDIC (or bust) */
489const packprops_t packprops[512] = {
490 /* normal */
491 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
492 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
493 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
494 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
495 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
496 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
497 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
498 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
499 0, 0, 0,
500 /* c */ sizeof(char),
501 /* d */ sizeof(double),
502 0,
503 /* f */ sizeof(float),
504 0, 0,
505 /* i */ sizeof(int),
506 0, 0, 0, 0, 0, 0, 0,
507 /* j */ IVSIZE,
508 0,
509 /* l */ SIZE32,
510 0,
511 /* n */ SIZE16,
512 0,
513 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
514#if defined(HAS_QUAD)
515 /* q */ sizeof(Quad_t),
516#else
517 0,
518#endif
519 0, 0, 0, 0, 0, 0, 0, 0, 0,
520 /* s */ SIZE16,
521 0, 0,
522 /* v */ SIZE16,
523 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
524 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
525 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
526 /* C */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
527#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
528 /* D */ LONG_DOUBLESIZE,
529#else
530 0,
531#endif
532 0,
533 /* F */ NVSIZE,
534 0, 0,
535 /* I */ sizeof(unsigned int),
536 0, 0, 0, 0, 0, 0, 0,
537 /* J */ UVSIZE,
538 0,
539 /* L */ SIZE32,
540 0,
541 /* N */ SIZE32,
542 0, 0,
543#if defined(HAS_QUAD)
544 /* Q */ sizeof(Uquad_t),
545#else
546 0,
547#endif
548 0, 0, 0, 0, 0, 0, 0, 0, 0,
549 /* S */ SIZE16,
550 0,
551 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
552 /* V */ SIZE32,
553 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
554 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
555 0, 0, 0, 0, 0, 0, 0, 0, 0,
556 /* shrieking */
557 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
558 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
559 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
560 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
561 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
562 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
563 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
564 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
565 0, 0, 0, 0, 0, 0, 0, 0, 0,
566 /* i */ sizeof(int),
567 0, 0, 0, 0, 0, 0, 0, 0, 0,
568 /* l */ sizeof(long),
569 0,
570#if defined(PERL_PACK_CAN_SHRIEKSIGN)
571 /* n */ SIZE16,
572#else
573 0,
574#endif
575 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
576 /* s */ sizeof(short),
577 0, 0,
578#if defined(PERL_PACK_CAN_SHRIEKSIGN)
579 /* v */ SIZE16,
580#else
581 0,
582#endif
583 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
584 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
585 0, 0, 0,
586 /* I */ sizeof(unsigned int),
587 0, 0, 0, 0, 0, 0, 0, 0, 0,
588 /* L */ sizeof(unsigned long),
589 0,
590#if defined(PERL_PACK_CAN_SHRIEKSIGN)
591 /* N */ SIZE32,
592#else
593 0,
594#endif
595 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
596 /* S */ sizeof(unsigned short),
597 0, 0,
598#if defined(PERL_PACK_CAN_SHRIEKSIGN)
599 /* V */ SIZE32,
600#else
601 0,
602#endif
603 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
604 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
605};
606#endif
607
608STATIC U8
609uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
610{
611 UV val;
612 STRLEN retlen;
613 val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
614 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
615 /* We try to process malformed UTF-8 as much as possible (preferrably with
616 warnings), but these two mean we make no progress in the string and
617 might enter an infinite loop */
618 if (retlen == (STRLEN) -1 || retlen == 0)
619 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
620 (int) TYPE_NO_MODIFIERS(datumtype));
621 if (val >= 0x100) {
622 if (ckWARN(WARN_UNPACK))
623 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
624 "Character in '%c' format wrapped in unpack",
625 (int) TYPE_NO_MODIFIERS(datumtype));
626 val &= 0xff;
627 }
628 *s += retlen;
629 return (U8)val;
630}
631
632#define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
633 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
634 *(U8 *)(s)++)
635
636STATIC bool
637uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
638{
639 UV val;
640 STRLEN retlen;
641 const char *from = *s;
642 int bad = 0;
643 const U32 flags = ckWARN(WARN_UTF8) ?
644 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
645 for (;buf_len > 0; buf_len--) {
646 if (from >= end) return FALSE;
647 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
648 if (retlen == (STRLEN) -1 || retlen == 0) {
649 from += UTF8SKIP(from);
650 bad |= 1;
651 } else from += retlen;
652 if (val >= 0x100) {
653 bad |= 2;
654 val &= 0xff;
655 }
656 *(U8 *)buf++ = (U8)val;
657 }
658 /* We have enough characters for the buffer. Did we have problems ? */
659 if (bad) {
660 if (bad & 1) {
661 /* Rewalk the string fragment while warning */
662 const char *ptr;
663 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
664 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
665 if (ptr >= end) break;
666 utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
667 }
668 if (from > end) from = end;
669 }
670 if ((bad & 2) && ckWARN(WARN_UNPACK))
671 Perl_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
672 WARN_PACK : WARN_UNPACK),
673 "Character(s) in '%c' format wrapped in %s",
674 (int) TYPE_NO_MODIFIERS(datumtype),
675 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
676 }
677 *s = from;
678 return TRUE;
679}
680
681STATIC bool
682next_uni_uu(pTHX_ const char **s, const char *end, I32 *out)
683{
684 UV val;
685 STRLEN retlen;
686 val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
687 if (val >= 0x100 || !ISUUCHAR(val) ||
688 retlen == (STRLEN) -1 || retlen == 0) {
689 *out = 0;
690 return FALSE;
691 }
692 *out = PL_uudmap[val] & 077;
693 *s += retlen;
694 return TRUE;
695}
696
697STATIC void
698bytes_to_uni(pTHX_ const U8 *start, STRLEN len, char **dest) {
699 U8 buffer[UTF8_MAXLEN];
700 const U8 *end = start + len;
701 char *d = *dest;
702 while (start < end) {
703 const int length =
704 uvuni_to_utf8_flags(buffer, NATIVE_TO_UNI(*start), 0) - buffer;
705 switch(length) {
706 case 1:
707 *d++ = buffer[0];
708 break;
709 case 2:
710 *d++ = buffer[0];
711 *d++ = buffer[1];
712 break;
713 default:
714 Perl_croak(aTHX_ "Perl bug: value %d UTF-8 expands to %d bytes",
715 *start, length);
716 }
717 start++;
718 }
719 *dest = d;
720}
721
722#define PUSH_BYTES(utf8, cur, buf, len) \
723STMT_START { \
724 if (utf8) bytes_to_uni(aTHX_ (U8 *) buf, len, &(cur)); \
725 else { \
726 Copy(buf, cur, len, char); \
727 (cur) += (len); \
728 } \
729} STMT_END
730
731#define GROWING(utf8, cat, start, cur, in_len) \
732STMT_START { \
733 STRLEN glen = (in_len); \
734 if (utf8) glen *= UTF8_EXPAND; \
735 if ((cur) + glen >= (start) + SvLEN(cat)) { \
736 (start) = sv_exp_grow(aTHX_ cat, glen); \
737 (cur) = (start) + SvCUR(cat); \
738 } \
739} STMT_END
740
741#define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
742STMT_START { \
743 const STRLEN glen = (in_len); \
744 STRLEN gl = glen; \
745 if (utf8) gl *= UTF8_EXPAND; \
746 if ((cur) + gl >= (start) + SvLEN(cat)) { \
747 *cur = '\0'; \
748 SvCUR_set((cat), (cur) - (start)); \
749 (start) = sv_exp_grow(aTHX_ cat, gl); \
750 (cur) = (start) + SvCUR(cat); \
751 } \
752 PUSH_BYTES(utf8, cur, buf, glen); \
753} STMT_END
754
755#define PUSH_BYTE(utf8, s, byte) \
756STMT_START { \
757 if (utf8) { \
758 const U8 au8 = (byte); \
759 bytes_to_uni(aTHX_ &au8, 1, &(s)); \
760 } else *(U8 *)(s)++ = (byte); \
761} STMT_END
762
763/* Only to be used inside a loop (see the break) */
764#define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
765STMT_START { \
766 STRLEN retlen; \
767 if (str >= end) break; \
768 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
769 if (retlen == (STRLEN) -1 || retlen == 0) { \
770 *cur = '\0'; \
771 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
772 } \
773 str += retlen; \
774} STMT_END
775
776static const char *_action( const tempsym_t* symptr )
777{
778 return ( symptr->flags & FLAG_PACK ) ? "pack" : "unpack";
779}
780
781/* Returns the sizeof() struct described by pat */
782STATIC I32
783S_measure_struct(pTHX_ tempsym_t* symptr)
784{
785 I32 total = 0;
786
787 while (next_symbol(symptr)) {
788 I32 len;
789 int size;
790
791 switch (symptr->howlen) {
792 case e_star:
793 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
794 _action( symptr ) );
795 break;
796 default:
797 /* e_no_len and e_number */
798 len = symptr->length;
799 break;
800 }
801
802 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
803 if (!size) {
804 int star;
805 /* endianness doesn't influence the size of a type */
806 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
807 default:
808 Perl_croak(aTHX_ "Invalid type '%c' in %s",
809 (int)TYPE_NO_MODIFIERS(symptr->code),
810 _action( symptr ) );
811#ifdef PERL_PACK_CAN_SHRIEKSIGN
812 case '.' | TYPE_IS_SHRIEKING:
813 case '@' | TYPE_IS_SHRIEKING:
814#endif
815 case '@':
816 case '.':
817 case '/':
818 case 'U': /* XXXX Is it correct? */
819 case 'w':
820 case 'u':
821 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
822 (int) TYPE_NO_MODIFIERS(symptr->code),
823 _action( symptr ) );
824 case '%':
825 size = 0;
826 break;
827 case '(':
828 {
829 tempsym_t savsym = *symptr;
830 symptr->patptr = savsym.grpbeg;
831 symptr->patend = savsym.grpend;
832 /* XXXX Theoretically, we need to measure many times at
833 different positions, since the subexpression may contain
834 alignment commands, but be not of aligned length.
835 Need to detect this and croak(). */
836 size = measure_struct(symptr);
837 *symptr = savsym;
838 break;
839 }
840 case 'X' | TYPE_IS_SHRIEKING:
841 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
842 */
843 if (!len) /* Avoid division by 0 */
844 len = 1;
845 len = total % len; /* Assumed: the start is aligned. */
846 /* FALL THROUGH */
847 case 'X':
848 size = -1;
849 if (total < len)
850 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
851 break;
852 case 'x' | TYPE_IS_SHRIEKING:
853 if (!len) /* Avoid division by 0 */
854 len = 1;
855 star = total % len; /* Assumed: the start is aligned. */
856 if (star) /* Other portable ways? */
857 len = len - star;
858 else
859 len = 0;
860 /* FALL THROUGH */
861 case 'x':
862 case 'A':
863 case 'Z':
864 case 'a':
865 size = 1;
866 break;
867 case 'B':
868 case 'b':
869 len = (len + 7)/8;
870 size = 1;
871 break;
872 case 'H':
873 case 'h':
874 len = (len + 1)/2;
875 size = 1;
876 break;
877
878 case 'P':
879 len = 1;
880 size = sizeof(char*);
881 break;
882 }
883 }
884 total += len * size;
885 }
886 return total;
887}
888
889
890/* locate matching closing parenthesis or bracket
891 * returns char pointer to char after match, or NULL
892 */
893STATIC const char *
894S_group_end(pTHX_ register const char *patptr, register const char *patend, char ender)
895{
896 while (patptr < patend) {
897 const char c = *patptr++;
898
899 if (isSPACE(c))
900 continue;
901 else if (c == ender)
902 return patptr-1;
903 else if (c == '#') {
904 while (patptr < patend && *patptr != '\n')
905 patptr++;
906 continue;
907 } else if (c == '(')
908 patptr = group_end(patptr, patend, ')') + 1;
909 else if (c == '[')
910 patptr = group_end(patptr, patend, ']') + 1;
911 }
912 Perl_croak(aTHX_ "No group ending character '%c' found in template",
913 ender);
914 return 0;
915}
916
917
918/* Convert unsigned decimal number to binary.
919 * Expects a pointer to the first digit and address of length variable
920 * Advances char pointer to 1st non-digit char and returns number
921 */
922STATIC const char *
923S_get_num(pTHX_ register const char *patptr, I32 *lenptr )
924{
925 I32 len = *patptr++ - '0';
926 while (isDIGIT(*patptr)) {
927 if (len >= 0x7FFFFFFF/10)
928 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
929 len = (len * 10) + (*patptr++ - '0');
930 }
931 *lenptr = len;
932 return patptr;
933}
934
935/* The marvellous template parsing routine: Using state stored in *symptr,
936 * locates next template code and count
937 */
938STATIC bool
939S_next_symbol(pTHX_ tempsym_t* symptr )
940{
941 const char* patptr = symptr->patptr;
942 const char* patend = symptr->patend;
943
944 symptr->flags &= ~FLAG_SLASH;
945
946 while (patptr < patend) {
947 if (isSPACE(*patptr))
948 patptr++;
949 else if (*patptr == '#') {
950 patptr++;
951 while (patptr < patend && *patptr != '\n')
952 patptr++;
953 if (patptr < patend)
954 patptr++;
955 } else {
956 /* We should have found a template code */
957 I32 code = *patptr++ & 0xFF;
958 U32 inherited_modifiers = 0;
959
960 if (code == ','){ /* grandfather in commas but with a warning */
961 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
962 symptr->flags |= FLAG_COMMA;
963 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
964 "Invalid type ',' in %s", _action( symptr ) );
965 }
966 continue;
967 }
968
969 /* for '(', skip to ')' */
970 if (code == '(') {
971 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
972 Perl_croak(aTHX_ "()-group starts with a count in %s",
973 _action( symptr ) );
974 symptr->grpbeg = patptr;
975 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
976 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
977 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
978 _action( symptr ) );
979 }
980
981 /* look for group modifiers to inherit */
982 if (TYPE_ENDIANNESS(symptr->flags)) {
983 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
984 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
985 }
986
987 /* look for modifiers */
988 while (patptr < patend) {
989 const char *allowed;
990 I32 modifier;
991 switch (*patptr) {
992 case '!':
993 modifier = TYPE_IS_SHRIEKING;
994 allowed = SHRIEKING_ALLOWED_TYPES;
995 break;
996#ifdef PERL_PACK_CAN_BYTEORDER
997 case '>':
998 modifier = TYPE_IS_BIG_ENDIAN;
999 allowed = ENDIANNESS_ALLOWED_TYPES;
1000 break;
1001 case '<':
1002 modifier = TYPE_IS_LITTLE_ENDIAN;
1003 allowed = ENDIANNESS_ALLOWED_TYPES;
1004 break;
1005#endif /* PERL_PACK_CAN_BYTEORDER */
1006 default:
1007 allowed = "";
1008 modifier = 0;
1009 break;
1010 }
1011
1012 if (modifier == 0)
1013 break;
1014
1015 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
1016 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
1017 allowed, _action( symptr ) );
1018
1019 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
1020 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
1021 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
1022 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
1023 TYPE_ENDIANNESS_MASK)
1024 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
1025 *patptr, _action( symptr ) );
1026
1027 if ((code & modifier) && ckWARN(WARN_UNPACK)) {
1028 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
1029 "Duplicate modifier '%c' after '%c' in %s",
1030 *patptr, (int) TYPE_NO_MODIFIERS(code),
1031 _action( symptr ) );
1032 }
1033
1034 code |= modifier;
1035 patptr++;
1036 }
1037
1038 /* inherit modifiers */
1039 code |= inherited_modifiers;
1040
1041 /* look for count and/or / */
1042 if (patptr < patend) {
1043 if (isDIGIT(*patptr)) {
1044 patptr = get_num( patptr, &symptr->length );
1045 symptr->howlen = e_number;
1046
1047 } else if (*patptr == '*') {
1048 patptr++;
1049 symptr->howlen = e_star;
1050
1051 } else if (*patptr == '[') {
1052 const char* lenptr = ++patptr;
1053 symptr->howlen = e_number;
1054 patptr = group_end( patptr, patend, ']' ) + 1;
1055 /* what kind of [] is it? */
1056 if (isDIGIT(*lenptr)) {
1057 lenptr = get_num( lenptr, &symptr->length );
1058 if( *lenptr != ']' )
1059 Perl_croak(aTHX_ "Malformed integer in [] in %s",
1060 _action( symptr ) );
1061 } else {
1062 tempsym_t savsym = *symptr;
1063 symptr->patend = patptr-1;
1064 symptr->patptr = lenptr;
1065 savsym.length = measure_struct(symptr);
1066 *symptr = savsym;
1067 }
1068 } else {
1069 symptr->howlen = e_no_len;
1070 symptr->length = 1;
1071 }
1072
1073 /* try to find / */
1074 while (patptr < patend) {
1075 if (isSPACE(*patptr))
1076 patptr++;
1077 else if (*patptr == '#') {
1078 patptr++;
1079 while (patptr < patend && *patptr != '\n')
1080 patptr++;
1081 if (patptr < patend)
1082 patptr++;
1083 } else {
1084 if (*patptr == '/') {
1085 symptr->flags |= FLAG_SLASH;
1086 patptr++;
1087 if (patptr < patend &&
1088 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
1089 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
1090 _action( symptr ) );
1091 }
1092 break;
1093 }
1094 }
1095 } else {
1096 /* at end - no count, no / */
1097 symptr->howlen = e_no_len;
1098 symptr->length = 1;
1099 }
1100
1101 symptr->code = code;
1102 symptr->patptr = patptr;
1103 return TRUE;
1104 }
1105 }
1106 symptr->patptr = patptr;
1107 return FALSE;
1108}
1109
1110/*
1111 There is no way to cleanly handle the case where we should process the
1112 string per byte in its upgraded form while it's really in downgraded form
1113 (e.g. estimates like strend-s as an upper bound for the number of
1114 characters left wouldn't work). So if we foresee the need of this
1115 (pattern starts with U or contains U0), we want to work on the encoded
1116 version of the string. Users are advised to upgrade their pack string
1117 themselves if they need to do a lot of unpacks like this on it
1118*/
1119STATIC bool
1120need_utf8(const char *pat, const char *patend)
1121{
1122 bool first = TRUE;
1123 while (pat < patend) {
1124 if (pat[0] == '#') {
1125 pat++;
1126 pat = (const char *) memchr(pat, '\n', patend-pat);
1127 if (!pat) return FALSE;
1128 } else if (pat[0] == 'U') {
1129 if (first || pat[1] == '0') return TRUE;
1130 } else first = FALSE;
1131 pat++;
1132 }
1133 return FALSE;
1134}
1135
1136STATIC char
1137first_symbol(const char *pat, const char *patend) {
1138 while (pat < patend) {
1139 if (pat[0] != '#') return pat[0];
1140 pat++;
1141 pat = (const char *) memchr(pat, '\n', patend-pat);
1142 if (!pat) return 0;
1143 pat++;
1144 }
1145 return 0;
1146}
1147
1148/*
1149=for apidoc unpack_str
1150
1151The engine implementing unpack() Perl function. Note: parameters strbeg, new_s
1152and ocnt are not used. This call should not be used, use unpackstring instead.
1153
1154=cut */
1155
1156I32
1157Perl_unpack_str(pTHX_ const char *pat, const char *patend, const char *s, const char *strbeg, const char *strend, char **new_s, I32 ocnt, U32 flags)
1158{
1159 tempsym_t sym;
1160 PERL_UNUSED_ARG(strbeg);
1161 PERL_UNUSED_ARG(new_s);
1162 PERL_UNUSED_ARG(ocnt);
1163
1164 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
1165 else if (need_utf8(pat, patend)) {
1166 /* We probably should try to avoid this in case a scalar context call
1167 wouldn't get to the "U0" */
1168 STRLEN len = strend - s;
1169 s = (char *) bytes_to_utf8((U8 *) s, &len);
1170 SAVEFREEPV(s);
1171 strend = s + len;
1172 flags |= FLAG_DO_UTF8;
1173 }
1174
1175 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
1176 flags |= FLAG_PARSE_UTF8;
1177
1178 TEMPSYM_INIT(&sym, pat, patend, flags);
1179
1180 return unpack_rec(&sym, s, s, strend, NULL );
1181}
1182
1183/*
1184=for apidoc unpackstring
1185
1186The engine implementing unpack() Perl function. C<unpackstring> puts the
1187extracted list items on the stack and returns the number of elements.
1188Issue C<PUTBACK> before and C<SPAGAIN> after the call to this function.
1189
1190=cut */
1191
1192I32
1193Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
1194{
1195 tempsym_t sym;
1196
1197 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
1198 else if (need_utf8(pat, patend)) {
1199 /* We probably should try to avoid this in case a scalar context call
1200 wouldn't get to the "U0" */
1201 STRLEN len = strend - s;
1202 s = (char *) bytes_to_utf8((U8 *) s, &len);
1203 SAVEFREEPV(s);
1204 strend = s + len;
1205 flags |= FLAG_DO_UTF8;
1206 }
1207
1208 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
1209 flags |= FLAG_PARSE_UTF8;
1210
1211 TEMPSYM_INIT(&sym, pat, patend, flags);
1212
1213 return unpack_rec(&sym, s, s, strend, NULL );
1214}
1215
1216STATIC
1217I32
1218S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
1219{
1220 dVAR; dSP;
1221 SV *sv;
1222 const I32 start_sp_offset = SP - PL_stack_base;
1223 howlen_t howlen;
1224
1225 I32 checksum = 0;
1226 UV cuv = 0;
1227 NV cdouble = 0.0;
1228 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
1229 bool beyond = FALSE;
1230 bool explicit_length;
1231 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
1232 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
1233 symptr->strbeg = s - strbeg;
1234
1235 while (next_symbol(symptr)) {
1236 packprops_t props;
1237 I32 len;
1238 I32 datumtype = symptr->code;
1239 /* do first one only unless in list context
1240 / is implemented by unpacking the count, then popping it from the
1241 stack, so must check that we're not in the middle of a / */
1242 if ( unpack_only_one
1243 && (SP - PL_stack_base == start_sp_offset + 1)
1244 && (datumtype != '/') ) /* XXX can this be omitted */
1245 break;
1246
1247 switch (howlen = symptr->howlen) {
1248 case e_star:
1249 len = strend - strbeg; /* long enough */
1250 break;
1251 default:
1252 /* e_no_len and e_number */
1253 len = symptr->length;
1254 break;
1255 }
1256
1257 explicit_length = TRUE;
1258 redo_switch:
1259 beyond = s >= strend;
1260
1261 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
1262 if (props) {
1263 /* props nonzero means we can process this letter. */
1264 const long size = props & PACK_SIZE_MASK;
1265 const long howmany = (strend - s) / size;
1266 if (len > howmany)
1267 len = howmany;
1268
1269 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
1270 if (len && unpack_only_one) len = 1;
1271 EXTEND(SP, len);
1272 EXTEND_MORTAL(len);
1273 }
1274 }
1275
1276 switch(TYPE_NO_ENDIANNESS(datumtype)) {
1277 default:
1278 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
1279
1280 case '%':
1281 if (howlen == e_no_len)
1282 len = 16; /* len is not specified */
1283 checksum = len;
1284 cuv = 0;
1285 cdouble = 0;
1286 continue;
1287 break;
1288 case '(':
1289 {
1290 tempsym_t savsym = *symptr;
1291 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
1292 symptr->flags |= group_modifiers;
1293 symptr->patend = savsym.grpend;
1294 symptr->previous = &savsym;
1295 symptr->level++;
1296 PUTBACK;
1297 while (len--) {
1298 symptr->patptr = savsym.grpbeg;
1299 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
1300 else symptr->flags &= ~FLAG_PARSE_UTF8;
1301 unpack_rec(symptr, s, strbeg, strend, &s);
1302 if (s == strend && savsym.howlen == e_star)
1303 break; /* No way to continue */
1304 }
1305 SPAGAIN;
1306 savsym.flags = symptr->flags & ~group_modifiers;
1307 *symptr = savsym;
1308 break;
1309 }
1310#ifdef PERL_PACK_CAN_SHRIEKSIGN
1311 case '.' | TYPE_IS_SHRIEKING:
1312#endif
1313 case '.': {
1314 const char *from;
1315 SV *sv;
1316#ifdef PERL_PACK_CAN_SHRIEKSIGN
1317 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
1318#else /* PERL_PACK_CAN_SHRIEKSIGN */
1319 const bool u8 = utf8;
1320#endif
1321 if (howlen == e_star) from = strbeg;
1322 else if (len <= 0) from = s;
1323 else {
1324 tempsym_t *group = symptr;
1325
1326 while (--len && group) group = group->previous;
1327 from = group ? strbeg + group->strbeg : strbeg;
1328 }
1329 sv = from <= s ?
1330 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
1331 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
1332 XPUSHs(sv_2mortal(sv));
1333 break;
1334 }
1335#ifdef PERL_PACK_CAN_SHRIEKSIGN
1336 case '@' | TYPE_IS_SHRIEKING:
1337#endif
1338 case '@':
1339 s = strbeg + symptr->strbeg;
1340#ifdef PERL_PACK_CAN_SHRIEKSIGN
1341 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
1342#else /* PERL_PACK_CAN_SHRIEKSIGN */
1343 if (utf8)
1344#endif
1345 {
1346 while (len > 0) {
1347 if (s >= strend)
1348 Perl_croak(aTHX_ "'@' outside of string in unpack");
1349 s += UTF8SKIP(s);
1350 len--;
1351 }
1352 if (s > strend)
1353 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
1354 } else {
1355 if (strend-s < len)
1356 Perl_croak(aTHX_ "'@' outside of string in unpack");
1357 s += len;
1358 }
1359 break;
1360 case 'X' | TYPE_IS_SHRIEKING:
1361 if (!len) /* Avoid division by 0 */
1362 len = 1;
1363 if (utf8) {
1364 const char *hop, *last;
1365 I32 l = len;
1366 hop = last = strbeg;
1367 while (hop < s) {
1368 hop += UTF8SKIP(hop);
1369 if (--l == 0) {
1370 last = hop;
1371 l = len;
1372 }
1373 }
1374 if (last > s)
1375 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1376 s = last;
1377 break;
1378 }
1379 len = (s - strbeg) % len;
1380 /* FALL THROUGH */
1381 case 'X':
1382 if (utf8) {
1383 while (len > 0) {
1384 if (s <= strbeg)
1385 Perl_croak(aTHX_ "'X' outside of string in unpack");
1386 while (--s, UTF8_IS_CONTINUATION(*s)) {
1387 if (s <= strbeg)
1388 Perl_croak(aTHX_ "'X' outside of string in unpack");
1389 }
1390 len--;
1391 }
1392 } else {
1393 if (len > s - strbeg)
1394 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1395 s -= len;
1396 }
1397 break;
1398 case 'x' | TYPE_IS_SHRIEKING: {
1399 I32 ai32;
1400 if (!len) /* Avoid division by 0 */
1401 len = 1;
1402 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1403 else ai32 = (s - strbeg) % len;
1404 if (ai32 == 0) break;
1405 len -= ai32;
1406 }
1407 /* FALL THROUGH */
1408 case 'x':
1409 if (utf8) {
1410 while (len>0) {
1411 if (s >= strend)
1412 Perl_croak(aTHX_ "'x' outside of string in unpack");
1413 s += UTF8SKIP(s);
1414 len--;
1415 }
1416 } else {
1417 if (len > strend - s)
1418 Perl_croak(aTHX_ "'x' outside of string in unpack");
1419 s += len;
1420 }
1421 break;
1422 case '/':
1423 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1424 break;
1425 case 'A':
1426 case 'Z':
1427 case 'a':
1428 if (checksum) {
1429 /* Preliminary length estimate is assumed done in 'W' */
1430 if (len > strend - s) len = strend - s;
1431 goto W_checksum;
1432 }
1433 if (utf8) {
1434 I32 l;
1435 const char *hop;
1436 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1437 if (hop >= strend) {
1438 if (hop > strend)
1439 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1440 break;
1441 }
1442 }
1443 if (hop > strend)
1444 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1445 len = hop - s;
1446 } else if (len > strend - s)
1447 len = strend - s;
1448
1449 if (datumtype == 'Z') {
1450 /* 'Z' strips stuff after first null */
1451 const char *ptr, *end;
1452 end = s + len;
1453 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
1454 sv = newSVpvn(s, ptr-s);
1455 if (howlen == e_star) /* exact for 'Z*' */
1456 len = ptr-s + (ptr != strend ? 1 : 0);
1457 } else if (datumtype == 'A') {
1458 /* 'A' strips both nulls and spaces */
1459 const char *ptr;
1460 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1461 for (ptr = s+len-1; ptr >= s; ptr--)
1462 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
1463 !is_utf8_space((U8 *) ptr)) break;
1464 if (ptr >= s) ptr += UTF8SKIP(ptr);
1465 else ptr++;
1466 if (ptr > s+len)
1467 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1468 } else {
1469 for (ptr = s+len-1; ptr >= s; ptr--)
1470 if (*ptr != 0 && !isSPACE(*ptr)) break;
1471 ptr++;
1472 }
1473 sv = newSVpvn(s, ptr-s);
1474 } else sv = newSVpvn(s, len);
1475
1476 if (utf8) {
1477 SvUTF8_on(sv);
1478 /* Undo any upgrade done due to need_utf8() */
1479 if (!(symptr->flags & FLAG_WAS_UTF8))
1480 sv_utf8_downgrade(sv, 0);
1481 }
1482 XPUSHs(sv_2mortal(sv));
1483 s += len;
1484 break;
1485 case 'B':
1486 case 'b': {
1487 char *str;
1488 if (howlen == e_star || len > (strend - s) * 8)
1489 len = (strend - s) * 8;
1490 if (checksum) {
1491 if (!PL_bitcount) {
1492 int bits;
1493 Newxz(PL_bitcount, 256, char);
1494 for (bits = 1; bits < 256; bits++) {
1495 if (bits & 1) PL_bitcount[bits]++;
1496 if (bits & 2) PL_bitcount[bits]++;
1497 if (bits & 4) PL_bitcount[bits]++;
1498 if (bits & 8) PL_bitcount[bits]++;
1499 if (bits & 16) PL_bitcount[bits]++;
1500 if (bits & 32) PL_bitcount[bits]++;
1501 if (bits & 64) PL_bitcount[bits]++;
1502 if (bits & 128) PL_bitcount[bits]++;
1503 }
1504 }
1505 if (utf8)
1506 while (len >= 8 && s < strend) {
1507 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
1508 len -= 8;
1509 }
1510 else
1511 while (len >= 8) {
1512 cuv += PL_bitcount[*(U8 *)s++];
1513 len -= 8;
1514 }
1515 if (len && s < strend) {
1516 U8 bits;
1517 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1518 if (datumtype == 'b')
1519 while (len-- > 0) {
1520 if (bits & 1) cuv++;
1521 bits >>= 1;
1522 }
1523 else
1524 while (len-- > 0) {
1525 if (bits & 0x80) cuv++;
1526 bits <<= 1;
1527 }
1528 }
1529 break;
1530 }
1531
1532 sv = sv_2mortal(NEWSV(35, len ? len : 1));
1533 SvPOK_on(sv);
1534 str = SvPVX(sv);
1535 if (datumtype == 'b') {
1536 U8 bits = 0;
1537 const I32 ai32 = len;
1538 for (len = 0; len < ai32; len++) {
1539 if (len & 7) bits >>= 1;
1540 else if (utf8) {
1541 if (s >= strend) break;
1542 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1543 } else bits = *(U8 *) s++;
1544 *str++ = bits & 1 ? '1' : '0';
1545 }
1546 } else {
1547 U8 bits = 0;
1548 const I32 ai32 = len;
1549 for (len = 0; len < ai32; len++) {
1550 if (len & 7) bits <<= 1;
1551 else if (utf8) {
1552 if (s >= strend) break;
1553 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1554 } else bits = *(U8 *) s++;
1555 *str++ = bits & 0x80 ? '1' : '0';
1556 }
1557 }
1558 *str = '\0';
1559 SvCUR_set(sv, str - SvPVX_const(sv));
1560 XPUSHs(sv);
1561 break;
1562 }
1563 case 'H':
1564 case 'h': {
1565 char *str;
1566 /* Preliminary length estimate, acceptable for utf8 too */
1567 if (howlen == e_star || len > (strend - s) * 2)
1568 len = (strend - s) * 2;
1569 sv = sv_2mortal(NEWSV(35, len ? len : 1));
1570 SvPOK_on(sv);
1571 str = SvPVX(sv);
1572 if (datumtype == 'h') {
1573 U8 bits = 0;
1574 I32 ai32 = len;
1575 for (len = 0; len < ai32; len++) {
1576 if (len & 1) bits >>= 4;
1577 else if (utf8) {
1578 if (s >= strend) break;
1579 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1580 } else bits = * (U8 *) s++;
1581 *str++ = PL_hexdigit[bits & 15];
1582 }
1583 } else {
1584 U8 bits = 0;
1585 const I32 ai32 = len;
1586 for (len = 0; len < ai32; len++) {
1587 if (len & 1) bits <<= 4;
1588 else if (utf8) {
1589 if (s >= strend) break;
1590 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1591 } else bits = *(U8 *) s++;
1592 *str++ = PL_hexdigit[(bits >> 4) & 15];
1593 }
1594 }
1595 *str = '\0';
1596 SvCUR_set(sv, str - SvPVX_const(sv));
1597 XPUSHs(sv);
1598 break;
1599 }
1600 case 'c':
1601 while (len-- > 0) {
1602 int aint = SHIFT_BYTE(utf8, s, strend, datumtype);
1603 if (aint >= 128) /* fake up signed chars */
1604 aint -= 256;
1605 if (!checksum)
1606 PUSHs(sv_2mortal(newSViv((IV)aint)));
1607 else if (checksum > bits_in_uv)
1608 cdouble += (NV)aint;
1609 else
1610 cuv += aint;
1611 }
1612 break;
1613 case 'C':
1614 case 'W':
1615 W_checksum:
1616 if (len == 0) {
1617 if (explicit_length && datumtype == 'C')
1618 /* Switch to "character" mode */
1619 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1620 break;
1621 }
1622 if (datumtype == 'C' ?
1623 (symptr->flags & FLAG_DO_UTF8) &&
1624 !(symptr->flags & FLAG_WAS_UTF8) : utf8) {
1625 while (len-- > 0 && s < strend) {
1626 STRLEN retlen;
1627 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1628 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1629 if (retlen == (STRLEN) -1 || retlen == 0)
1630 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1631 s += retlen;
1632 if (!checksum)
1633 PUSHs(sv_2mortal(newSVuv((UV) val)));
1634 else if (checksum > bits_in_uv)
1635 cdouble += (NV) val;
1636 else
1637 cuv += val;
1638 }
1639 } else if (!checksum)
1640 while (len-- > 0) {
1641 const U8 ch = *(U8 *) s++;
1642 PUSHs(sv_2mortal(newSVuv((UV) ch)));
1643 }
1644 else if (checksum > bits_in_uv)
1645 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1646 else
1647 while (len-- > 0) cuv += *(U8 *) s++;
1648 break;
1649 case 'U':
1650 if (len == 0) {
1651 if (explicit_length) {
1652 /* Switch to "bytes in UTF-8" mode */
1653 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
1654 else
1655 /* Should be impossible due to the need_utf8() test */
1656 Perl_croak(aTHX_ "U0 mode on a byte string");
1657 }
1658 break;
1659 }
1660 if (len > strend - s) len = strend - s;
1661 if (!checksum) {
1662 if (len && unpack_only_one) len = 1;
1663 EXTEND(SP, len);
1664 EXTEND_MORTAL(len);
1665 }
1666 while (len-- > 0 && s < strend) {
1667 STRLEN retlen;
1668 UV auv;
1669 if (utf8) {
1670 U8 result[UTF8_MAXLEN];
1671 const char *ptr = s;
1672 STRLEN len;
1673 /* Bug: warns about bad utf8 even if we are short on bytes
1674 and will break out of the loop */
1675 if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1676 'U'))
1677 break;
1678 len = UTF8SKIP(result);
1679 if (!uni_to_bytes(aTHX_ &ptr, strend,
1680 (char *) &result[1], len-1, 'U')) break;
1681 auv = utf8n_to_uvuni(result, len, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1682 s = ptr;
1683 } else {
1684 auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1685 if (retlen == (STRLEN) -1 || retlen == 0)
1686 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1687 s += retlen;
1688 }
1689 if (!checksum)
1690 PUSHs(sv_2mortal(newSVuv((UV) auv)));
1691 else if (checksum > bits_in_uv)
1692 cdouble += (NV) auv;
1693 else
1694 cuv += auv;
1695 }
1696 break;
1697 case 's' | TYPE_IS_SHRIEKING:
1698#if SHORTSIZE != SIZE16
1699 while (len-- > 0) {
1700 short ashort;
1701 SHIFT_VAR(utf8, s, strend, ashort, datumtype);
1702 DO_BO_UNPACK(ashort, s);
1703 if (!checksum)
1704 PUSHs(sv_2mortal(newSViv((IV)ashort)));
1705 else if (checksum > bits_in_uv)
1706 cdouble += (NV)ashort;
1707 else
1708 cuv += ashort;
1709 }
1710 break;
1711#else
1712 /* Fallthrough! */
1713#endif
1714 case 's':
1715 while (len-- > 0) {
1716 I16 ai16;
1717
1718#if U16SIZE > SIZE16
1719 ai16 = 0;
1720#endif
1721 SHIFT16(utf8, s, strend, &ai16, datumtype);
1722 DO_BO_UNPACK(ai16, 16);
1723#if U16SIZE > SIZE16
1724 if (ai16 > 32767)
1725 ai16 -= 65536;
1726#endif
1727 if (!checksum)
1728 PUSHs(sv_2mortal(newSViv((IV)ai16)));
1729 else if (checksum > bits_in_uv)
1730 cdouble += (NV)ai16;
1731 else
1732 cuv += ai16;
1733 }
1734 break;
1735 case 'S' | TYPE_IS_SHRIEKING:
1736#if SHORTSIZE != SIZE16
1737 while (len-- > 0) {
1738 unsigned short aushort;
1739 SHIFT_VAR(utf8, s, strend, aushort, datumtype);
1740 DO_BO_UNPACK(aushort, s);
1741 if (!checksum)
1742 PUSHs(sv_2mortal(newSVuv((UV) aushort)));
1743 else if (checksum > bits_in_uv)
1744 cdouble += (NV)aushort;
1745 else
1746 cuv += aushort;
1747 }
1748 break;
1749#else
1750 /* Fallhrough! */
1751#endif
1752 case 'v':
1753 case 'n':
1754 case 'S':
1755 while (len-- > 0) {
1756 U16 au16;
1757#if U16SIZE > SIZE16
1758 au16 = 0;
1759#endif
1760 SHIFT16(utf8, s, strend, &au16, datumtype);
1761 DO_BO_UNPACK(au16, 16);
1762#ifdef HAS_NTOHS
1763 if (datumtype == 'n')
1764 au16 = PerlSock_ntohs(au16);
1765#endif
1766#ifdef HAS_VTOHS
1767 if (datumtype == 'v')
1768 au16 = vtohs(au16);
1769#endif
1770 if (!checksum)
1771 PUSHs(sv_2mortal(newSVuv((UV)au16)));
1772 else if (checksum > bits_in_uv)
1773 cdouble += (NV) au16;
1774 else
1775 cuv += au16;
1776 }
1777 break;
1778#ifdef PERL_PACK_CAN_SHRIEKSIGN
1779 case 'v' | TYPE_IS_SHRIEKING:
1780 case 'n' | TYPE_IS_SHRIEKING:
1781 while (len-- > 0) {
1782 I16 ai16;
1783# if U16SIZE > SIZE16
1784 ai16 = 0;
1785# endif
1786 SHIFT16(utf8, s, strend, &ai16, datumtype);
1787# ifdef HAS_NTOHS
1788 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
1789 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1790# endif /* HAS_NTOHS */
1791# ifdef HAS_VTOHS
1792 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
1793 ai16 = (I16) vtohs((U16) ai16);
1794# endif /* HAS_VTOHS */
1795 if (!checksum)
1796 PUSHs(sv_2mortal(newSViv((IV)ai16)));
1797 else if (checksum > bits_in_uv)
1798 cdouble += (NV) ai16;
1799 else
1800 cuv += ai16;
1801 }
1802 break;
1803#endif /* PERL_PACK_CAN_SHRIEKSIGN */
1804 case 'i':
1805 case 'i' | TYPE_IS_SHRIEKING:
1806 while (len-- > 0) {
1807 int aint;
1808 SHIFT_VAR(utf8, s, strend, aint, datumtype);
1809 DO_BO_UNPACK(aint, i);
1810 if (!checksum)
1811 PUSHs(sv_2mortal(newSViv((IV)aint)));
1812 else if (checksum > bits_in_uv)
1813 cdouble += (NV)aint;
1814 else
1815 cuv += aint;
1816 }
1817 break;
1818 case 'I':
1819 case 'I' | TYPE_IS_SHRIEKING:
1820 while (len-- > 0) {
1821 unsigned int auint;
1822 SHIFT_VAR(utf8, s, strend, auint, datumtype);
1823 DO_BO_UNPACK(auint, i);
1824 if (!checksum)
1825 PUSHs(sv_2mortal(newSVuv((UV)auint)));
1826 else if (checksum > bits_in_uv)
1827 cdouble += (NV)auint;
1828 else
1829 cuv += auint;
1830 }
1831 break;
1832 case 'j':
1833 while (len-- > 0) {
1834 IV aiv;
1835 SHIFT_VAR(utf8, s, strend, aiv, datumtype);
1836#if IVSIZE == INTSIZE
1837 DO_BO_UNPACK(aiv, i);
1838#elif IVSIZE == LONGSIZE
1839 DO_BO_UNPACK(aiv, l);
1840#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
1841 DO_BO_UNPACK(aiv, 64);
1842#else
1843 Perl_croak(aTHX_ "'j' not supported on this platform");
1844#endif
1845 if (!checksum)
1846 PUSHs(sv_2mortal(newSViv(aiv)));
1847 else if (checksum > bits_in_uv)
1848 cdouble += (NV)aiv;
1849 else
1850 cuv += aiv;
1851 }
1852 break;
1853 case 'J':
1854 while (len-- > 0) {
1855 UV auv;
1856 SHIFT_VAR(utf8, s, strend, auv, datumtype);
1857#if IVSIZE == INTSIZE
1858 DO_BO_UNPACK(auv, i);
1859#elif IVSIZE == LONGSIZE
1860 DO_BO_UNPACK(auv, l);
1861#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
1862 DO_BO_UNPACK(auv, 64);
1863#else
1864 Perl_croak(aTHX_ "'J' not supported on this platform");
1865#endif
1866 if (!checksum)
1867 PUSHs(sv_2mortal(newSVuv(auv)));
1868 else if (checksum > bits_in_uv)
1869 cdouble += (NV)auv;
1870 else
1871 cuv += auv;
1872 }
1873 break;
1874 case 'l' | TYPE_IS_SHRIEKING:
1875#if LONGSIZE != SIZE32
1876 while (len-- > 0) {
1877 long along;
1878 SHIFT_VAR(utf8, s, strend, along, datumtype);
1879 DO_BO_UNPACK(along, l);
1880 if (!checksum)
1881 PUSHs(sv_2mortal(newSViv((IV)along)));
1882 else if (checksum > bits_in_uv)
1883 cdouble += (NV)along;
1884 else
1885 cuv += along;
1886 }
1887 break;
1888#else
1889 /* Fallthrough! */
1890#endif
1891 case 'l':
1892 while (len-- > 0) {
1893 I32 ai32;
1894#if U32SIZE > SIZE32
1895 ai32 = 0;
1896#endif
1897 SHIFT32(utf8, s, strend, &ai32, datumtype);
1898 DO_BO_UNPACK(ai32, 32);
1899#if U32SIZE > SIZE32
1900 if (ai32 > 2147483647) ai32 -= 4294967296;
1901#endif
1902 if (!checksum)
1903 PUSHs(sv_2mortal(newSViv((IV)ai32)));
1904 else if (checksum > bits_in_uv)
1905 cdouble += (NV)ai32;
1906 else
1907 cuv += ai32;
1908 }
1909 break;
1910 case 'L' | TYPE_IS_SHRIEKING:
1911#if LONGSIZE != SIZE32
1912 while (len-- > 0) {
1913 unsigned long aulong;
1914 SHIFT_VAR(utf8, s, strend, aulong, datumtype);
1915 DO_BO_UNPACK(aulong, l);
1916 if (!checksum)
1917 PUSHs(sv_2mortal(newSVuv((UV)aulong)));
1918 else if (checksum > bits_in_uv)
1919 cdouble += (NV)aulong;
1920 else
1921 cuv += aulong;
1922 }
1923 break;
1924#else
1925 /* Fall through! */
1926#endif
1927 case 'V':
1928 case 'N':
1929 case 'L':
1930 while (len-- > 0) {
1931 U32 au32;
1932#if U32SIZE > SIZE32
1933 au32 = 0;
1934#endif
1935 SHIFT32(utf8, s, strend, &au32, datumtype);
1936 DO_BO_UNPACK(au32, 32);
1937#ifdef HAS_NTOHL
1938 if (datumtype == 'N')
1939 au32 = PerlSock_ntohl(au32);
1940#endif
1941#ifdef HAS_VTOHL
1942 if (datumtype == 'V')
1943 au32 = vtohl(au32);
1944#endif
1945 if (!checksum)
1946 PUSHs(sv_2mortal(newSVuv((UV)au32)));
1947 else if (checksum > bits_in_uv)
1948 cdouble += (NV)au32;
1949 else
1950 cuv += au32;
1951 }
1952 break;
1953#ifdef PERL_PACK_CAN_SHRIEKSIGN
1954 case 'V' | TYPE_IS_SHRIEKING:
1955 case 'N' | TYPE_IS_SHRIEKING:
1956 while (len-- > 0) {
1957 I32 ai32;
1958# if U32SIZE > SIZE32
1959 ai32 = 0;
1960# endif
1961 SHIFT32(utf8, s, strend, &ai32, datumtype);
1962# ifdef HAS_NTOHL
1963 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1964 ai32 = (I32)PerlSock_ntohl((U32)ai32);
1965# endif
1966# ifdef HAS_VTOHL
1967 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1968 ai32 = (I32)vtohl((U32)ai32);
1969# endif
1970 if (!checksum)
1971 PUSHs(sv_2mortal(newSViv((IV)ai32)));
1972 else if (checksum > bits_in_uv)
1973 cdouble += (NV)ai32;
1974 else
1975 cuv += ai32;
1976 }
1977 break;
1978#endif /* PERL_PACK_CAN_SHRIEKSIGN */
1979 case 'p':
1980 while (len-- > 0) {
1981 const char *aptr;
1982 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
1983 DO_BO_UNPACK_PC(aptr);
1984 /* newSVpv generates undef if aptr is NULL */
1985 PUSHs(sv_2mortal(newSVpv(aptr, 0)));
1986 }
1987 break;
1988 case 'w':
1989 {
1990 UV auv = 0;
1991 U32 bytes = 0;
1992
1993 while (len > 0 && s < strend) {
1994 U8 ch;
1995 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1996 auv = (auv << 7) | (ch & 0x7f);
1997 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
1998 if (ch < 0x80) {
1999 bytes = 0;
2000 PUSHs(sv_2mortal(newSVuv(auv)));
2001 len--;
2002 auv = 0;
2003 continue;
2004 }
2005 if (++bytes >= sizeof(UV)) { /* promote to string */
2006 const char *t;
2007
2008 sv = Perl_newSVpvf(aTHX_ "%.*"UVf, (int)TYPE_DIGITS(UV), auv);
2009 while (s < strend) {
2010 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
2011 sv = mul128(sv, (U8)(ch & 0x7f));
2012 if (!(ch & 0x80)) {
2013 bytes = 0;
2014 break;
2015 }
2016 }
2017 t = SvPV_nolen_const(sv);
2018 while (*t == '0')
2019 t++;
2020 sv_chop(sv, t);
2021 PUSHs(sv_2mortal(sv));
2022 len--;
2023 auv = 0;
2024 }
2025 }
2026 if ((s >= strend) && bytes)
2027 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
2028 }
2029 break;
2030 case 'P':
2031 if (symptr->howlen == e_star)
2032 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
2033 EXTEND(SP, 1);
2034 if (sizeof(char*) <= strend - s) {
2035 char *aptr;
2036 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
2037 DO_BO_UNPACK_PC(aptr);
2038 /* newSVpvn generates undef if aptr is NULL */
2039 PUSHs(sv_2mortal(newSVpvn(aptr, len)));
2040 }
2041 break;
2042#ifdef HAS_QUAD
2043 case 'q':
2044 while (len-- > 0) {
2045 Quad_t aquad;
2046 SHIFT_VAR(utf8, s, strend, aquad, datumtype);
2047 DO_BO_UNPACK(aquad, 64);
2048 if (!checksum)
2049 PUSHs(sv_2mortal(aquad >= IV_MIN && aquad <= IV_MAX ?
2050 newSViv((IV)aquad) : newSVnv((NV)aquad)));
2051 else if (checksum > bits_in_uv)
2052 cdouble += (NV)aquad;
2053 else
2054 cuv += aquad;
2055 }
2056 break;
2057 case 'Q':
2058 while (len-- > 0) {
2059 Uquad_t auquad;
2060 SHIFT_VAR(utf8, s, strend, auquad, datumtype);
2061 DO_BO_UNPACK(auquad, 64);
2062 if (!checksum)
2063 PUSHs(sv_2mortal(auquad <= UV_MAX ?
2064 newSVuv((UV)auquad):newSVnv((NV)auquad)));
2065 else if (checksum > bits_in_uv)
2066 cdouble += (NV)auquad;
2067 else
2068 cuv += auquad;
2069 }
2070 break;
2071#endif /* HAS_QUAD */
2072 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
2073 case 'f':
2074 while (len-- > 0) {
2075 float afloat;
2076 SHIFT_VAR(utf8, s, strend, afloat, datumtype);
2077 DO_BO_UNPACK_N(afloat, float);
2078 if (!checksum)
2079 PUSHs(sv_2mortal(newSVnv((NV)afloat)));
2080 else
2081 cdouble += afloat;
2082 }
2083 break;
2084 case 'd':
2085 while (len-- > 0) {
2086 double adouble;
2087 SHIFT_VAR(utf8, s, strend, adouble, datumtype);
2088 DO_BO_UNPACK_N(adouble, double);
2089 if (!checksum)
2090 PUSHs(sv_2mortal(newSVnv((NV)adouble)));
2091 else
2092 cdouble += adouble;
2093 }
2094 break;
2095 case 'F':
2096 while (len-- > 0) {
2097 NV anv;
2098 SHIFT_VAR(utf8, s, strend, anv, datumtype);
2099 DO_BO_UNPACK_N(anv, NV);
2100 if (!checksum)
2101 PUSHs(sv_2mortal(newSVnv(anv)));
2102 else
2103 cdouble += anv;
2104 }
2105 break;
2106#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2107 case 'D':
2108 while (len-- > 0) {
2109 long double aldouble;
2110 SHIFT_VAR(utf8, s, strend, aldouble, datumtype);
2111 DO_BO_UNPACK_N(aldouble, long double);
2112 if (!checksum)
2113 PUSHs(sv_2mortal(newSVnv((NV)aldouble)));
2114 else
2115 cdouble += aldouble;
2116 }
2117 break;
2118#endif
2119 case 'u':
2120 /* MKS:
2121 * Initialise the decode mapping. By using a table driven
2122 * algorithm, the code will be character-set independent
2123 * (and just as fast as doing character arithmetic)
2124 */
2125 if (PL_uudmap['M'] == 0) {
2126 int i;
2127
2128 for (i = 0; i < sizeof(PL_uuemap); i += 1)
2129 PL_uudmap[(U8)PL_uuemap[i]] = i;
2130 /*
2131 * Because ' ' and '`' map to the same value,
2132 * we need to decode them both the same.
2133 */
2134 PL_uudmap[' '] = 0;
2135 }
2136 {
2137 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
2138 sv = sv_2mortal(NEWSV(42, l));
2139 if (l) SvPOK_on(sv);
2140 }
2141 if (utf8) {
2142 while (next_uni_uu(aTHX_ &s, strend, &len)) {
2143 I32 a, b, c, d;
2144 char hunk[4];
2145
2146 hunk[3] = '\0';
2147 while (len > 0) {
2148 next_uni_uu(aTHX_ &s, strend, &a);
2149 next_uni_uu(aTHX_ &s, strend, &b);
2150 next_uni_uu(aTHX_ &s, strend, &c);
2151 next_uni_uu(aTHX_ &s, strend, &d);
2152 hunk[0] = (char)((a << 2) | (b >> 4));
2153 hunk[1] = (char)((b << 4) | (c >> 2));
2154 hunk[2] = (char)((c << 6) | d);
2155 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2156 len -= 3;
2157 }
2158 if (s < strend) {
2159 if (*s == '\n') {
2160 s++;
2161 }
2162 else {
2163 /* possible checksum byte */
2164 const char *skip = s+UTF8SKIP(s);
2165 if (skip < strend && *skip == '\n')
2166 s = skip+1;
2167 }
2168 }
2169 }
2170 } else {
2171 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
2172 I32 a, b, c, d;
2173 char hunk[4];
2174
2175 hunk[3] = '\0';
2176 len = PL_uudmap[*(U8*)s++] & 077;
2177 while (len > 0) {
2178 if (s < strend && ISUUCHAR(*s))
2179 a = PL_uudmap[*(U8*)s++] & 077;
2180 else
2181 a = 0;
2182 if (s < strend && ISUUCHAR(*s))
2183 b = PL_uudmap[*(U8*)s++] & 077;
2184 else
2185 b = 0;
2186 if (s < strend && ISUUCHAR(*s))
2187 c = PL_uudmap[*(U8*)s++] & 077;
2188 else
2189 c = 0;
2190 if (s < strend && ISUUCHAR(*s))
2191 d = PL_uudmap[*(U8*)s++] & 077;
2192 else
2193 d = 0;
2194 hunk[0] = (char)((a << 2) | (b >> 4));
2195 hunk[1] = (char)((b << 4) | (c >> 2));
2196 hunk[2] = (char)((c << 6) | d);
2197 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2198 len -= 3;
2199 }
2200 if (*s == '\n')
2201 s++;
2202 else /* possible checksum byte */
2203 if (s + 1 < strend && s[1] == '\n')
2204 s += 2;
2205 }
2206 }
2207 XPUSHs(sv);
2208 break;
2209 }
2210
2211 if (checksum) {
2212 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
2213 (checksum > bits_in_uv &&
2214 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
2215 NV trouble, anv;
2216
2217 anv = (NV) (1 << (checksum & 15));
2218 while (checksum >= 16) {
2219 checksum -= 16;
2220 anv *= 65536.0;
2221 }
2222 while (cdouble < 0.0)
2223 cdouble += anv;
2224 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
2225 sv = newSVnv(cdouble);
2226 }
2227 else {
2228 if (checksum < bits_in_uv) {
2229 UV mask = ((UV)1 << checksum) - 1;
2230 cuv &= mask;
2231 }
2232 sv = newSVuv(cuv);
2233 }
2234 XPUSHs(sv_2mortal(sv));
2235 checksum = 0;
2236 }
2237
2238 if (symptr->flags & FLAG_SLASH){
2239 if (SP - PL_stack_base - start_sp_offset <= 0)
2240 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
2241 if( next_symbol(symptr) ){
2242 if( symptr->howlen == e_number )
2243 Perl_croak(aTHX_ "Count after length/code in unpack" );
2244 if( beyond ){
2245 /* ...end of char buffer then no decent length available */
2246 Perl_croak(aTHX_ "length/code after end of string in unpack" );
2247 } else {
2248 /* take top of stack (hope it's numeric) */
2249 len = POPi;
2250 if( len < 0 )
2251 Perl_croak(aTHX_ "Negative '/' count in unpack" );
2252 }
2253 } else {
2254 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
2255 }
2256 datumtype = symptr->code;
2257 explicit_length = FALSE;
2258 goto redo_switch;
2259 }
2260 }
2261
2262 if (new_s)
2263 *new_s = s;
2264 PUTBACK;
2265 return SP - PL_stack_base - start_sp_offset;
2266}
2267
2268PP(pp_unpack)
2269{
2270 dSP;
2271 dPOPPOPssrl;
2272 I32 gimme = GIMME_V;
2273 STRLEN llen;
2274 STRLEN rlen;
2275 const char *pat = SvPV_const(left, llen);
2276 const char *s = SvPV_const(right, rlen);
2277 const char *strend = s + rlen;
2278 const char *patend = pat + llen;
2279 I32 cnt;
2280
2281 PUTBACK;
2282 cnt = unpackstring(pat, patend, s, strend,
2283 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
2284 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
2285
2286 SPAGAIN;
2287 if ( !cnt && gimme == G_SCALAR )
2288 PUSHs(&PL_sv_undef);
2289 RETURN;
2290}
2291
2292STATIC U8 *
2293doencodes(U8 *h, const char *s, I32 len)
2294{
2295 *h++ = PL_uuemap[len];
2296 while (len > 2) {
2297 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2298 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
2299 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
2300 *h++ = PL_uuemap[(077 & (s[2] & 077))];
2301 s += 3;
2302 len -= 3;
2303 }
2304 if (len > 0) {
2305 const char r = (len > 1 ? s[1] : '\0');
2306 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2307 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
2308 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
2309 *h++ = PL_uuemap[0];
2310 }
2311 *h++ = '\n';
2312 return h;
2313}
2314
2315STATIC SV *
2316S_is_an_int(pTHX_ const char *s, STRLEN l)
2317{
2318 SV *result = newSVpvn(s, l);
2319 char *const result_c = SvPV_nolen(result); /* convenience */
2320 char *out = result_c;
2321 bool skip = 1;
2322 bool ignore = 0;
2323
2324 while (*s) {
2325 switch (*s) {
2326 case ' ':
2327 break;
2328 case '+':
2329 if (!skip) {
2330 SvREFCNT_dec(result);
2331 return (NULL);
2332 }
2333 break;
2334 case '0':
2335 case '1':
2336 case '2':
2337 case '3':
2338 case '4':
2339 case '5':
2340 case '6':
2341 case '7':
2342 case '8':
2343 case '9':
2344 skip = 0;
2345 if (!ignore) {
2346 *(out++) = *s;
2347 }
2348 break;
2349 case '.':
2350 ignore = 1;
2351 break;
2352 default:
2353 SvREFCNT_dec(result);
2354 return (NULL);
2355 }
2356 s++;
2357 }
2358 *(out++) = '\0';
2359 SvCUR_set(result, out - result_c);
2360 return (result);
2361}
2362
2363/* pnum must be '\0' terminated */
2364STATIC int
2365S_div128(pTHX_ SV *pnum, bool *done)
2366{
2367 STRLEN len;
2368 char * const s = SvPV(pnum, len);
2369 char *t = s;
2370 int m = 0;
2371
2372 *done = 1;
2373 while (*t) {
2374 const int i = m * 10 + (*t - '0');
2375 const int r = (i >> 7); /* r < 10 */
2376 m = i & 0x7F;
2377 if (r) {
2378 *done = 0;
2379 }
2380 *(t++) = '0' + r;
2381 }
2382 *(t++) = '\0';
2383 SvCUR_set(pnum, (STRLEN) (t - s));
2384 return (m);
2385}
2386
2387/*
2388=for apidoc pack_cat
2389
2390The engine implementing pack() Perl function. Note: parameters next_in_list and
2391flags are not used. This call should not be used; use packlist instead.
2392
2393=cut */
2394
2395
2396void
2397Perl_pack_cat(pTHX_ SV *cat, const char *pat, const char *patend, register SV **beglist, SV **endlist, SV ***next_in_list, U32 flags)
2398{
2399 tempsym_t sym;
2400 PERL_UNUSED_ARG(next_in_list);
2401 PERL_UNUSED_ARG(flags);
2402
2403 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
2404
2405 (void)pack_rec( cat, &sym, beglist, endlist );
2406}
2407
2408
2409/*
2410=for apidoc packlist
2411
2412The engine implementing pack() Perl function.
2413
2414=cut */
2415
2416
2417void
2418Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, register SV **beglist, SV **endlist )
2419{
2420 STRLEN no_len;
2421 tempsym_t sym;
2422
2423 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
2424
2425 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
2426 Also make sure any UTF8 flag is loaded */
2427 SvPV_force(cat, no_len);
2428 if (DO_UTF8(cat)) sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
2429
2430 (void)pack_rec( cat, &sym, beglist, endlist );
2431}
2432
2433/* like sv_utf8_upgrade, but also repoint the group start markers */
2434STATIC void
2435marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2436 STRLEN len;
2437 tempsym_t *group;
2438 const char *from_ptr, *from_start, *from_end, **marks, **m;
2439 char *to_start, *to_ptr;
2440
2441 if (SvUTF8(sv)) return;
2442
2443 from_start = SvPVX_const(sv);
2444 from_end = from_start + SvCUR(sv);
2445 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2446 if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
2447 if (from_ptr == from_end) {
2448 /* Simple case: no character needs to be changed */
2449 SvUTF8_on(sv);
2450 return;
2451 }
2452
2453 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
2454 Newx(to_start, len, char);
2455 Copy(from_start, to_start, from_ptr-from_start, char);
2456 to_ptr = to_start + (from_ptr-from_start);
2457
2458 Newx(marks, sym_ptr->level+2, const char *);
2459 for (group=sym_ptr; group; group = group->previous)
2460 marks[group->level] = from_start + group->strbeg;
2461 marks[sym_ptr->level+1] = from_end+1;
2462 for (m = marks; *m < from_ptr; m++)
2463 *m = to_start + (*m-from_start);
2464
2465 for (;from_ptr < from_end; from_ptr++) {
2466 while (*m == from_ptr) *m++ = to_ptr;
2467 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
2468 }
2469 *to_ptr = 0;
2470
2471 while (*m == from_ptr) *m++ = to_ptr;
2472 if (m != marks + sym_ptr->level+1) {
2473 Safefree(marks);
2474 Safefree(to_start);
2475 Perl_croak(aTHX_ "Assertion: marks beyond string end");
2476 }
2477 for (group=sym_ptr; group; group = group->previous)
2478 group->strbeg = marks[group->level] - to_start;
2479 Safefree(marks);
2480
2481 if (SvOOK(sv)) {
2482 if (SvIVX(sv)) {
2483 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
2484 from_start -= SvIVX(sv);
2485 SvIV_set(sv, 0);
2486 }
2487 SvFLAGS(sv) &= ~SVf_OOK;
2488 }
2489 if (SvLEN(sv) != 0)
2490 Safefree(from_start);
2491 SvPV_set(sv, to_start);
2492 SvCUR_set(sv, to_ptr - to_start);
2493 SvLEN_set(sv, len);
2494 SvUTF8_on(sv);
2495}
2496
2497/* Exponential string grower. Makes string extension effectively O(n)
2498 needed says how many extra bytes we need (not counting the final '\0')
2499 Only grows the string if there is an actual lack of space
2500*/
2501STATIC char *
2502sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
2503 const STRLEN cur = SvCUR(sv);
2504 const STRLEN len = SvLEN(sv);
2505 STRLEN extend;
2506 if (len - cur > needed) return SvPVX(sv);
2507 extend = needed > len ? needed : len;
2508 return SvGROW(sv, len+extend+1);
2509}
2510
2511STATIC
2512SV **
2513S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
2514{
2515 tempsym_t lookahead;
2516 I32 items = endlist - beglist;
2517 bool found = next_symbol(symptr);
2518 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
2519 bool warn_utf8 = ckWARN(WARN_UTF8);
2520
2521 if (symptr->level == 0 && found && symptr->code == 'U') {
2522 marked_upgrade(aTHX_ cat, symptr);
2523 symptr->flags |= FLAG_DO_UTF8;
2524 utf8 = 0;
2525 }
2526 symptr->strbeg = SvCUR(cat);
2527
2528 while (found) {
2529 SV *fromstr;
2530 STRLEN fromlen;
2531 I32 len;
2532 SV *lengthcode = Nullsv;
2533 I32 datumtype = symptr->code;
2534 howlen_t howlen = symptr->howlen;
2535 char *start = SvPVX(cat);
2536 char *cur = start + SvCUR(cat);
2537
2538#define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2539
2540 switch (howlen) {
2541 case e_star:
2542 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2543 0 : items;
2544 break;
2545 default:
2546 /* e_no_len and e_number */
2547 len = symptr->length;
2548 break;
2549 }
2550
2551 if (len) {
2552 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
2553
2554 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2555 /* We can process this letter. */
2556 STRLEN size = props & PACK_SIZE_MASK;
2557 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2558 }
2559 }
2560
2561 /* Look ahead for next symbol. Do we have code/code? */
2562 lookahead = *symptr;
2563 found = next_symbol(&lookahead);
2564 if (symptr->flags & FLAG_SLASH) {
2565 IV count;
2566 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
2567 if (strchr("aAZ", lookahead.code)) {
2568 if (lookahead.howlen == e_number) count = lookahead.length;
2569 else {
2570 if (items > 0)
2571 count = DO_UTF8(*beglist) ?
2572 sv_len_utf8(*beglist) : sv_len(*beglist);
2573 else count = 0;
2574 if (lookahead.code == 'Z') count++;
2575 }
2576 } else {
2577 if (lookahead.howlen == e_number && lookahead.length < items)
2578 count = lookahead.length;
2579 else count = items;
2580 }
2581 lookahead.howlen = e_number;
2582 lookahead.length = count;
2583 lengthcode = sv_2mortal(newSViv(count));
2584 }
2585
2586 /* Code inside the switch must take care to properly update
2587 cat (CUR length and '\0' termination) if it updated *cur and
2588 doesn't simply leave using break */
2589 switch(TYPE_NO_ENDIANNESS(datumtype)) {
2590 default:
2591 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2592 (int) TYPE_NO_MODIFIERS(datumtype));
2593 case '%':
2594 Perl_croak(aTHX_ "'%%' may not be used in pack");
2595 {
2596 char *from;
2597#ifdef PERL_PACK_CAN_SHRIEKSIGN
2598 case '.' | TYPE_IS_SHRIEKING:
2599#endif
2600 case '.':
2601 if (howlen == e_star) from = start;
2602 else if (len == 0) from = cur;
2603 else {
2604 tempsym_t *group = symptr;
2605
2606 while (--len && group) group = group->previous;
2607 from = group ? start + group->strbeg : start;
2608 }
2609 fromstr = NEXTFROM;
2610 len = SvIV(fromstr);
2611 goto resize;
2612#ifdef PERL_PACK_CAN_SHRIEKSIGN
2613 case '@' | TYPE_IS_SHRIEKING:
2614#endif
2615 case '@':
2616 from = start + symptr->strbeg;
2617 resize:
2618#ifdef PERL_PACK_CAN_SHRIEKSIGN
2619 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
2620#else /* PERL_PACK_CAN_SHRIEKSIGN */
2621 if (utf8)
2622#endif
2623 if (len >= 0) {
2624 while (len && from < cur) {
2625 from += UTF8SKIP(from);
2626 len--;
2627 }
2628 if (from > cur)
2629 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2630 if (len) {
2631 /* Here we know from == cur */
2632 grow:
2633 GROWING(0, cat, start, cur, len);
2634 Zero(cur, len, char);
2635 cur += len;
2636 } else if (from < cur) {
2637 len = cur - from;
2638 goto shrink;
2639 } else goto no_change;
2640 } else {
2641 cur = from;
2642 len = -len;
2643 goto utf8_shrink;
2644 }
2645 else {
2646 len -= cur - from;
2647 if (len > 0) goto grow;
2648 if (len == 0) goto no_change;
2649 len = -len;
2650 goto shrink;
2651 }
2652 break;
2653 }
2654 case '(': {
2655 tempsym_t savsym = *symptr;
2656 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2657 symptr->flags |= group_modifiers;
2658 symptr->patend = savsym.grpend;
2659 symptr->level++;
2660 symptr->previous = &lookahead;
2661 while (len--) {
2662 U32 was_utf8;
2663 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2664 else symptr->flags &= ~FLAG_PARSE_UTF8;
2665 was_utf8 = SvUTF8(cat);
2666 symptr->patptr = savsym.grpbeg;
2667 beglist = pack_rec(cat, symptr, beglist, endlist);
2668 if (SvUTF8(cat) != was_utf8)
2669 /* This had better be an upgrade while in utf8==0 mode */
2670 utf8 = 1;
2671
2672 if (savsym.howlen == e_star && beglist == endlist)
2673 break; /* No way to continue */
2674 }
2675 lookahead.flags = symptr->flags & ~group_modifiers;
2676 goto no_change;
2677 }
2678 case 'X' | TYPE_IS_SHRIEKING:
2679 if (!len) /* Avoid division by 0 */
2680 len = 1;
2681 if (utf8) {
2682 char *hop, *last;
2683 I32 l = len;
2684 hop = last = start;
2685 while (hop < cur) {
2686 hop += UTF8SKIP(hop);
2687 if (--l == 0) {
2688 last = hop;
2689 l = len;
2690 }
2691 }
2692 if (last > cur)
2693 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2694 cur = last;
2695 break;
2696 }
2697 len = (cur-start) % len;
2698 /* FALL THROUGH */
2699 case 'X':
2700 if (utf8) {
2701 if (len < 1) goto no_change;
2702 utf8_shrink:
2703 while (len > 0) {
2704 if (cur <= start)
2705 Perl_croak(aTHX_ "'%c' outside of string in pack",
2706 (int) TYPE_NO_MODIFIERS(datumtype));
2707 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2708 if (cur <= start)
2709 Perl_croak(aTHX_ "'%c' outside of string in pack",
2710 (int) TYPE_NO_MODIFIERS(datumtype));
2711 }
2712 len--;
2713 }
2714 } else {
2715 shrink:
2716 if (cur - start < len)
2717 Perl_croak(aTHX_ "'%c' outside of string in pack",
2718 (int) TYPE_NO_MODIFIERS(datumtype));
2719 cur -= len;
2720 }
2721 if (cur < start+symptr->strbeg) {
2722 /* Make sure group starts don't point into the void */
2723 tempsym_t *group;
2724 const STRLEN length = cur-start;
2725 for (group = symptr;
2726 group && length < group->strbeg;
2727 group = group->previous) group->strbeg = length;
2728 lookahead.strbeg = length;
2729 }
2730 break;
2731 case 'x' | TYPE_IS_SHRIEKING: {
2732 I32 ai32;
2733 if (!len) /* Avoid division by 0 */
2734 len = 1;
2735 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
2736 else ai32 = (cur - start) % len;
2737 if (ai32 == 0) goto no_change;
2738 len -= ai32;
2739 }
2740 /* FALL THROUGH */
2741 case 'x':
2742 goto grow;
2743 case 'A':
2744 case 'Z':
2745 case 'a': {
2746 const char *aptr;
2747
2748 fromstr = NEXTFROM;
2749 aptr = SvPV_const(fromstr, fromlen);
2750 if (DO_UTF8(fromstr)) {
2751 const char *end, *s;
2752
2753 if (!utf8 && !SvUTF8(cat)) {
2754 marked_upgrade(aTHX_ cat, symptr);
2755 lookahead.flags |= FLAG_DO_UTF8;
2756 lookahead.strbeg = symptr->strbeg;
2757 utf8 = 1;
2758 start = SvPVX(cat);
2759 cur = start + SvCUR(cat);
2760 }
2761 if (howlen == e_star) {
2762 if (utf8) goto string_copy;
2763 len = fromlen+1;
2764 }
2765 s = aptr;
2766 end = aptr + fromlen;
2767 fromlen = datumtype == 'Z' ? len-1 : len;
2768 while ((I32) fromlen > 0 && s < end) {
2769 s += UTF8SKIP(s);
2770 fromlen--;
2771 }
2772 if (s > end)
2773 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2774 if (utf8) {
2775 len = fromlen;
2776 if (datumtype == 'Z') len++;
2777 fromlen = s-aptr;
2778 len += fromlen;
2779
2780 goto string_copy;
2781 }
2782 fromlen = len - fromlen;
2783 if (datumtype == 'Z') fromlen--;
2784 if (howlen == e_star) {
2785 len = fromlen;
2786 if (datumtype == 'Z') len++;
2787 }
2788 GROWING(0, cat, start, cur, len);
2789 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
2790 datumtype | TYPE_IS_PACK))
2791 Perl_croak(aTHX_ "Perl bug: predicted utf8 length not available");
2792 cur += fromlen;
2793 len -= fromlen;
2794 } else if (utf8) {
2795 if (howlen == e_star) {
2796 len = fromlen;
2797 if (datumtype == 'Z') len++;
2798 }
2799 if (len <= (I32) fromlen) {
2800 fromlen = len;
2801 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2802 }
2803 /* assumes a byte expands to at most UTF8_EXPAND bytes on
2804 upgrade, so:
2805 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2806 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
2807 len -= fromlen;
2808 while (fromlen > 0) {
2809 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
2810 aptr++;
2811 fromlen--;
2812 }
2813 } else {
2814 string_copy:
2815 if (howlen == e_star) {
2816 len = fromlen;
2817 if (datumtype == 'Z') len++;
2818 }
2819 if (len <= (I32) fromlen) {
2820 fromlen = len;
2821 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2822 }
2823 GROWING(0, cat, start, cur, len);
2824 Copy(aptr, cur, fromlen, char);
2825 cur += fromlen;
2826 len -= fromlen;
2827 }
2828 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2829 cur += len;
2830 break;
2831 }
2832 case 'B':
2833 case 'b': {
2834 const char *str, *end;
2835 I32 l, field_len;
2836 U8 bits;
2837 bool utf8_source;
2838 U32 utf8_flags;
2839
2840 fromstr = NEXTFROM;
2841 str = SvPV_const(fromstr, fromlen);
2842 end = str + fromlen;
2843 if (DO_UTF8(fromstr)) {
2844 utf8_source = TRUE;
2845 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2846 } else {
2847 utf8_source = FALSE;
2848 utf8_flags = 0; /* Unused, but keep compilers happy */
2849 }
2850 if (howlen == e_star) len = fromlen;
2851 field_len = (len+7)/8;
2852 GROWING(utf8, cat, start, cur, field_len);
2853 if (len > (I32)fromlen) len = fromlen;
2854 bits = 0;
2855 l = 0;
2856 if (datumtype == 'B')
2857 while (l++ < len) {
2858 if (utf8_source) {
2859 UV val;
2860 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2861 bits |= val & 1;
2862 } else bits |= *str++ & 1;
2863 if (l & 7) bits <<= 1;
2864 else {
2865 PUSH_BYTE(utf8, cur, bits);
2866 bits = 0;
2867 }
2868 }
2869 else
2870 /* datumtype == 'b' */
2871 while (l++ < len) {
2872 if (utf8_source) {
2873 UV val;
2874 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2875 if (val & 1) bits |= 0x80;
2876 } else if (*str++ & 1)
2877 bits |= 0x80;
2878 if (l & 7) bits >>= 1;
2879 else {
2880 PUSH_BYTE(utf8, cur, bits);
2881 bits = 0;
2882 }
2883 }
2884 l--;
2885 if (l & 7) {
2886 if (datumtype == 'B')
2887 bits <<= 7 - (l & 7);
2888 else
2889 bits >>= 7 - (l & 7);
2890 PUSH_BYTE(utf8, cur, bits);
2891 l += 7;
2892 }
2893 /* Determine how many chars are left in the requested field */
2894 l /= 8;
2895 if (howlen == e_star) field_len = 0;
2896 else field_len -= l;
2897 Zero(cur, field_len, char);
2898 cur += field_len;
2899 break;
2900 }
2901 case 'H':
2902 case 'h': {
2903 const char *str, *end;
2904 I32 l, field_len;
2905 U8 bits;
2906 bool utf8_source;
2907 U32 utf8_flags;
2908
2909 fromstr = NEXTFROM;
2910 str = SvPV_const(fromstr, fromlen);
2911 end = str + fromlen;
2912 if (DO_UTF8(fromstr)) {
2913 utf8_source = TRUE;
2914 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2915 } else {
2916 utf8_source = FALSE;
2917 utf8_flags = 0; /* Unused, but keep compilers happy */
2918 }
2919 if (howlen == e_star) len = fromlen;
2920 field_len = (len+1)/2;
2921 GROWING(utf8, cat, start, cur, field_len);
2922 if (!utf8 && len > (I32)fromlen) len = fromlen;
2923 bits = 0;
2924 l = 0;
2925 if (datumtype == 'H')
2926 while (l++ < len) {
2927 if (utf8_source) {
2928 UV val;
2929 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2930 if (val < 256 && isALPHA(val))
2931 bits |= (val + 9) & 0xf;
2932 else
2933 bits |= val & 0xf;
2934 } else if (isALPHA(*str))
2935 bits |= (*str++ + 9) & 0xf;
2936 else
2937 bits |= *str++ & 0xf;
2938 if (l & 1) bits <<= 4;
2939 else {
2940 PUSH_BYTE(utf8, cur, bits);
2941 bits = 0;
2942 }
2943 }
2944 else
2945 while (l++ < len) {
2946 if (utf8_source) {
2947 UV val;
2948 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2949 if (val < 256 && isALPHA(val))
2950 bits |= ((val + 9) & 0xf) << 4;
2951 else
2952 bits |= (val & 0xf) << 4;
2953 } else if (isALPHA(*str))
2954 bits |= ((*str++ + 9) & 0xf) << 4;
2955 else
2956 bits |= (*str++ & 0xf) << 4;
2957 if (l & 1) bits >>= 4;
2958 else {
2959 PUSH_BYTE(utf8, cur, bits);
2960 bits = 0;
2961 }
2962 }
2963 l--;
2964 if (l & 1) {
2965 PUSH_BYTE(utf8, cur, bits);
2966 l++;
2967 }
2968 /* Determine how many chars are left in the requested field */
2969 l /= 2;
2970 if (howlen == e_star) field_len = 0;
2971 else field_len -= l;
2972 Zero(cur, field_len, char);
2973 cur += field_len;
2974 break;
2975 }
2976 case 'c':
2977 while (len-- > 0) {
2978 IV aiv;
2979 fromstr = NEXTFROM;
2980 aiv = SvIV(fromstr);
2981 if ((-128 > aiv || aiv > 127) &&
2982 ckWARN(WARN_PACK))
2983 Perl_warner(aTHX_ packWARN(WARN_PACK),
2984 "Character in 'c' format wrapped in pack");
2985 PUSH_BYTE(utf8, cur, aiv & 0xff);
2986 }
2987 break;
2988 case 'C':
2989 if (len == 0) {
2990 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2991 break;
2992 }
2993 GROWING(0, cat, start, cur, len);
2994 while (len-- > 0) {
2995 IV aiv;
2996 fromstr = NEXTFROM;
2997 aiv = SvIV(fromstr);
2998 if ((0 > aiv || aiv > 0xff) &&
2999 ckWARN(WARN_PACK))
3000 Perl_warner(aTHX_ packWARN(WARN_PACK),
3001 "Character in 'C' format wrapped in pack");
3002 *cur++ = aiv & 0xff;
3003 }
3004 break;
3005 case 'W': {
3006 char *end;
3007 U8 in_bytes = IN_BYTES;
3008
3009 end = start+SvLEN(cat)-1;
3010 if (utf8) end -= UTF8_MAXLEN-1;
3011 while (len-- > 0) {
3012 UV auv;
3013 fromstr = NEXTFROM;
3014 auv = SvUV(fromstr);
3015 if (in_bytes) auv = auv % 0x100;
3016 if (utf8) {
3017 W_utf8:
3018 if (cur > end) {
3019 *cur = '\0';
3020 SvCUR_set(cat, cur - start);
3021
3022 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3023 end = start+SvLEN(cat)-UTF8_MAXLEN;
3024 }
3025 cur = (char *) uvuni_to_utf8_flags((U8 *) cur,
3026 NATIVE_TO_UNI(auv),
3027 warn_utf8 ?
3028 0 : UNICODE_ALLOW_ANY);
3029 } else {
3030 if (auv >= 0x100) {
3031 if (!SvUTF8(cat)) {
3032 *cur = '\0';
3033 SvCUR_set(cat, cur - start);
3034 marked_upgrade(aTHX_ cat, symptr);
3035 lookahead.flags |= FLAG_DO_UTF8;
3036 lookahead.strbeg = symptr->strbeg;
3037 utf8 = 1;
3038 start = SvPVX(cat);
3039 cur = start + SvCUR(cat);
3040 end = start+SvLEN(cat)-UTF8_MAXLEN;
3041 goto W_utf8;
3042 }
3043 if (ckWARN(WARN_PACK))
3044 Perl_warner(aTHX_ packWARN(WARN_PACK),
3045 "Character in 'W' format wrapped in pack");
3046 auv &= 0xff;
3047 }
3048 if (cur >= end) {
3049 *cur = '\0';
3050 SvCUR_set(cat, cur - start);
3051 GROWING(0, cat, start, cur, len+1);
3052 end = start+SvLEN(cat)-1;
3053 }
3054 *(U8 *) cur++ = (U8)auv;
3055 }
3056 }
3057 break;
3058 }
3059 case 'U': {
3060 char *end;
3061
3062 if (len == 0) {
3063 if (!(symptr->flags & FLAG_DO_UTF8)) {
3064 marked_upgrade(aTHX_ cat, symptr);
3065 lookahead.flags |= FLAG_DO_UTF8;
3066 lookahead.strbeg = symptr->strbeg;
3067 }
3068 utf8 = 0;
3069 goto no_change;
3070 }
3071
3072 end = start+SvLEN(cat);
3073 if (!utf8) end -= UTF8_MAXLEN;
3074 while (len-- > 0) {
3075 UV auv;
3076 fromstr = NEXTFROM;
3077 auv = SvUV(fromstr);
3078 if (utf8) {
3079 U8 buffer[UTF8_MAXLEN], *endb;
3080 endb = uvuni_to_utf8_flags(buffer, auv,
3081 warn_utf8 ?
3082 0 : UNICODE_ALLOW_ANY);
3083 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
3084 *cur = '\0';
3085 SvCUR_set(cat, cur - start);
3086 GROWING(0, cat, start, cur,
3087 len+(endb-buffer)*UTF8_EXPAND);
3088 end = start+SvLEN(cat);
3089 }
3090 bytes_to_uni(aTHX_ buffer, endb-buffer, &cur);
3091 } else {
3092 if (cur >= end) {
3093 *cur = '\0';
3094 SvCUR_set(cat, cur - start);
3095 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3096 end = start+SvLEN(cat)-UTF8_MAXLEN;
3097 }
3098 cur = (char *) uvuni_to_utf8_flags((U8 *) cur, auv,
3099 warn_utf8 ?
3100 0 : UNICODE_ALLOW_ANY);
3101 }
3102 }
3103 break;
3104 }
3105 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
3106 case 'f':
3107 while (len-- > 0) {
3108 float afloat;
3109 NV anv;
3110 fromstr = NEXTFROM;
3111 anv = SvNV(fromstr);
3112#ifdef __VOS__
3113 /* VOS does not automatically map a floating-point overflow
3114 during conversion from double to float into infinity, so we
3115 do it by hand. This code should either be generalized for
3116 any OS that needs it, or removed if and when VOS implements
3117 posix-976 (suggestion to support mapping to infinity).
3118 Paul.Green@stratus.com 02-04-02. */
3119 if (anv > FLT_MAX)
3120 afloat = _float_constants[0]; /* single prec. inf. */
3121 else if (anv < -FLT_MAX)
3122 afloat = _float_constants[0]; /* single prec. inf. */
3123 else afloat = (float) anv;
3124#else /* __VOS__ */
3125# if defined(VMS) && !defined(__IEEE_FP)
3126 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
3127 * on Alpha; fake it if we don't have them.
3128 */
3129 if (anv > FLT_MAX)
3130 afloat = FLT_MAX;
3131 else if (anv < -FLT_MAX)
3132 afloat = -FLT_MAX;
3133 else afloat = (float)anv;
3134# else
3135 afloat = (float)anv;
3136# endif
3137#endif /* __VOS__ */
3138 DO_BO_PACK_N(afloat, float);
3139 PUSH_VAR(utf8, cur, afloat);
3140 }
3141 break;
3142 case 'd':
3143 while (len-- > 0) {
3144 double adouble;
3145 NV anv;
3146 fromstr = NEXTFROM;
3147 anv = SvNV(fromstr);
3148#ifdef __VOS__
3149 /* VOS does not automatically map a floating-point overflow
3150 during conversion from long double to double into infinity,
3151 so we do it by hand. This code should either be generalized
3152 for any OS that needs it, or removed if and when VOS
3153 implements posix-976 (suggestion to support mapping to
3154 infinity). Paul.Green@stratus.com 02-04-02. */
3155 if (anv > DBL_MAX)
3156 adouble = _double_constants[0]; /* double prec. inf. */
3157 else if (anv < -DBL_MAX)
3158 adouble = _double_constants[0]; /* double prec. inf. */
3159 else adouble = (double) anv;
3160#else /* __VOS__ */
3161# if defined(VMS) && !defined(__IEEE_FP)
3162 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
3163 * on Alpha; fake it if we don't have them.
3164 */
3165 if (anv > DBL_MAX)
3166 adouble = DBL_MAX;
3167 else if (anv < -DBL_MAX)
3168 adouble = -DBL_MAX;
3169 else adouble = (double)anv;
3170# else
3171 adouble = (double)anv;
3172# endif
3173#endif /* __VOS__ */
3174 DO_BO_PACK_N(adouble, double);
3175 PUSH_VAR(utf8, cur, adouble);
3176 }
3177 break;
3178 case 'F': {
3179 NV anv;
3180 Zero(&anv, 1, NV); /* can be long double with unused bits */
3181 while (len-- > 0) {
3182 fromstr = NEXTFROM;
3183 anv = SvNV(fromstr);
3184 DO_BO_PACK_N(anv, NV);
3185 PUSH_VAR(utf8, cur, anv);
3186 }
3187 break;
3188 }
3189#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
3190 case 'D': {
3191 long double aldouble;
3192 /* long doubles can have unused bits, which may be nonzero */
3193 Zero(&aldouble, 1, long double);
3194 while (len-- > 0) {
3195 fromstr = NEXTFROM;
3196 aldouble = (long double)SvNV(fromstr);
3197 DO_BO_PACK_N(aldouble, long double);
3198 PUSH_VAR(utf8, cur, aldouble);
3199 }
3200 break;
3201 }
3202#endif
3203#ifdef PERL_PACK_CAN_SHRIEKSIGN
3204 case 'n' | TYPE_IS_SHRIEKING:
3205#endif
3206 case 'n':
3207 while (len-- > 0) {
3208 I16 ai16;
3209 fromstr = NEXTFROM;
3210 ai16 = (I16)SvIV(fromstr);
3211#ifdef HAS_HTONS
3212 ai16 = PerlSock_htons(ai16);
3213#endif
3214 PUSH16(utf8, cur, &ai16);
3215 }
3216 break;
3217#ifdef PERL_PACK_CAN_SHRIEKSIGN
3218 case 'v' | TYPE_IS_SHRIEKING:
3219#endif
3220 case 'v':
3221 while (len-- > 0) {
3222 I16 ai16;
3223 fromstr = NEXTFROM;
3224 ai16 = (I16)SvIV(fromstr);
3225#ifdef HAS_HTOVS
3226 ai16 = htovs(ai16);
3227#endif
3228 PUSH16(utf8, cur, &ai16);
3229 }
3230 break;
3231 case 'S' | TYPE_IS_SHRIEKING:
3232#if SHORTSIZE != SIZE16
3233 while (len-- > 0) {
3234 unsigned short aushort;
3235 fromstr = NEXTFROM;
3236 aushort = SvUV(fromstr);
3237 DO_BO_PACK(aushort, s);
3238 PUSH_VAR(utf8, cur, aushort);
3239 }
3240 break;
3241#else
3242 /* Fall through! */
3243#endif
3244 case 'S':
3245 while (len-- > 0) {
3246 U16 au16;
3247 fromstr = NEXTFROM;
3248 au16 = (U16)SvUV(fromstr);
3249 DO_BO_PACK(au16, 16);
3250 PUSH16(utf8, cur, &au16);
3251 }
3252 break;
3253 case 's' | TYPE_IS_SHRIEKING:
3254#if SHORTSIZE != SIZE16
3255 while (len-- > 0) {
3256 short ashort;
3257 fromstr = NEXTFROM;
3258 ashort = SvIV(fromstr);
3259 DO_BO_PACK(ashort, s);
3260 PUSH_VAR(utf8, cur, ashort);
3261 }
3262 break;
3263#else
3264 /* Fall through! */
3265#endif
3266 case 's':
3267 while (len-- > 0) {
3268 I16 ai16;
3269 fromstr = NEXTFROM;
3270 ai16 = (I16)SvIV(fromstr);
3271 DO_BO_PACK(ai16, 16);
3272 PUSH16(utf8, cur, &ai16);
3273 }
3274 break;
3275 case 'I':
3276 case 'I' | TYPE_IS_SHRIEKING:
3277 while (len-- > 0) {
3278 unsigned int auint;
3279 fromstr = NEXTFROM;
3280 auint = SvUV(fromstr);
3281 DO_BO_PACK(auint, i);
3282 PUSH_VAR(utf8, cur, auint);
3283 }
3284 break;
3285 case 'j':
3286 while (len-- > 0) {
3287 IV aiv;
3288 fromstr = NEXTFROM;
3289 aiv = SvIV(fromstr);
3290#if IVSIZE == INTSIZE
3291 DO_BO_PACK(aiv, i);
3292#elif IVSIZE == LONGSIZE
3293 DO_BO_PACK(aiv, l);
3294#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
3295 DO_BO_PACK(aiv, 64);
3296#else
3297 Perl_croak(aTHX_ "'j' not supported on this platform");
3298#endif
3299 PUSH_VAR(utf8, cur, aiv);
3300 }
3301 break;
3302 case 'J':
3303 while (len-- > 0) {
3304 UV auv;
3305 fromstr = NEXTFROM;
3306 auv = SvUV(fromstr);
3307#if UVSIZE == INTSIZE
3308 DO_BO_PACK(auv, i);
3309#elif UVSIZE == LONGSIZE
3310 DO_BO_PACK(auv, l);
3311#elif defined(HAS_QUAD) && UVSIZE == U64SIZE
3312 DO_BO_PACK(auv, 64);
3313#else
3314 Perl_croak(aTHX_ "'J' not supported on this platform");
3315#endif
3316 PUSH_VAR(utf8, cur, auv);
3317 }
3318 break;
3319 case 'w':
3320 while (len-- > 0) {
3321 NV anv;
3322 fromstr = NEXTFROM;
3323 anv = SvNV(fromstr);
3324
3325 if (anv < 0) {
3326 *cur = '\0';
3327 SvCUR_set(cat, cur - start);
3328 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
3329 }
3330
3331 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
3332 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
3333 any negative IVs will have already been got by the croak()
3334 above. IOK is untrue for fractions, so we test them
3335 against UV_MAX_P1. */
3336 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
3337 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
3338 char *in = buf + sizeof(buf);
3339 UV auv = SvUV(fromstr);
3340
3341 do {
3342 *--in = (char)((auv & 0x7f) | 0x80);
3343 auv >>= 7;
3344 } while (auv);
3345 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
3346 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3347 in, (buf + sizeof(buf)) - in);
3348 } else if (SvPOKp(fromstr))
3349 goto w_string;
3350 else if (SvNOKp(fromstr)) {
3351 /* 10**NV_MAX_10_EXP is the largest power of 10
3352 so 10**(NV_MAX_10_EXP+1) is definately unrepresentable
3353 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
3354 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
3355 And with that many bytes only Inf can overflow.
3356 Some C compilers are strict about integral constant
3357 expressions so we conservatively divide by a slightly
3358 smaller integer instead of multiplying by the exact
3359 floating-point value.
3360 */
3361#ifdef NV_MAX_10_EXP
3362 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
3363 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
3364#else
3365 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
3366 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
3367#endif
3368 char *in = buf + sizeof(buf);
3369
3370 anv = Perl_floor(anv);
3371 do {
3372 const NV next = Perl_floor(anv / 128);
3373 if (in <= buf) /* this cannot happen ;-) */
3374 Perl_croak(aTHX_ "Cannot compress integer in pack");
3375 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
3376 anv = next;
3377 } while (anv > 0);
3378 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
3379 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3380 in, (buf + sizeof(buf)) - in);
3381 } else {
3382 const char *from;
3383 char *result, *in;
3384 SV *norm;
3385 STRLEN len;
3386 bool done;
3387
3388 w_string:
3389 /* Copy string and check for compliance */
3390 from = SvPV_const(fromstr, len);
3391 if ((norm = is_an_int(from, len)) == NULL)
3392 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
3393
3394 Newx(result, len, char);
3395 in = result + len;
3396 done = FALSE;
3397 while (!done) *--in = div128(norm, &done) | 0x80;
3398 result[len - 1] &= 0x7F; /* clear continue bit */
3399 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3400 in, (result + len) - in);
3401 Safefree(result);
3402 SvREFCNT_dec(norm); /* free norm */
3403 }
3404 }
3405 break;
3406 case 'i':
3407 case 'i' | TYPE_IS_SHRIEKING:
3408 while (len-- > 0) {
3409 int aint;
3410 fromstr = NEXTFROM;
3411 aint = SvIV(fromstr);
3412 DO_BO_PACK(aint, i);
3413 PUSH_VAR(utf8, cur, aint);
3414 }
3415 break;
3416#ifdef PERL_PACK_CAN_SHRIEKSIGN
3417 case 'N' | TYPE_IS_SHRIEKING:
3418#endif
3419 case 'N':
3420 while (len-- > 0) {
3421 U32 au32;
3422 fromstr = NEXTFROM;
3423 au32 = SvUV(fromstr);
3424#ifdef HAS_HTONL
3425 au32 = PerlSock_htonl(au32);
3426#endif
3427 PUSH32(utf8, cur, &au32);
3428 }
3429 break;
3430#ifdef PERL_PACK_CAN_SHRIEKSIGN
3431 case 'V' | TYPE_IS_SHRIEKING:
3432#endif
3433 case 'V':
3434 while (len-- > 0) {
3435 U32 au32;
3436 fromstr = NEXTFROM;
3437 au32 = SvUV(fromstr);
3438#ifdef HAS_HTOVL
3439 au32 = htovl(au32);
3440#endif
3441 PUSH32(utf8, cur, &au32);
3442 }
3443 break;
3444 case 'L' | TYPE_IS_SHRIEKING:
3445#if LONGSIZE != SIZE32
3446 while (len-- > 0) {
3447 unsigned long aulong;
3448 fromstr = NEXTFROM;
3449 aulong = SvUV(fromstr);
3450 DO_BO_PACK(aulong, l);
3451 PUSH_VAR(utf8, cur, aulong);
3452 }
3453 break;
3454#else
3455 /* Fall though! */
3456#endif
3457 case 'L':
3458 while (len-- > 0) {
3459 U32 au32;
3460 fromstr = NEXTFROM;
3461 au32 = SvUV(fromstr);
3462 DO_BO_PACK(au32, 32);
3463 PUSH32(utf8, cur, &au32);
3464 }
3465 break;
3466 case 'l' | TYPE_IS_SHRIEKING:
3467#if LONGSIZE != SIZE32
3468 while (len-- > 0) {
3469 long along;
3470 fromstr = NEXTFROM;
3471 along = SvIV(fromstr);
3472 DO_BO_PACK(along, l);
3473 PUSH_VAR(utf8, cur, along);
3474 }
3475 break;
3476#else
3477 /* Fall though! */
3478#endif
3479 case 'l':
3480 while (len-- > 0) {
3481 I32 ai32;
3482 fromstr = NEXTFROM;
3483 ai32 = SvIV(fromstr);
3484 DO_BO_PACK(ai32, 32);
3485 PUSH32(utf8, cur, &ai32);
3486 }
3487 break;
3488#ifdef HAS_QUAD
3489 case 'Q':
3490 while (len-- > 0) {
3491 Uquad_t auquad;
3492 fromstr = NEXTFROM;
3493 auquad = (Uquad_t) SvUV(fromstr);
3494 DO_BO_PACK(auquad, 64);
3495 PUSH_VAR(utf8, cur, auquad);
3496 }
3497 break;
3498 case 'q':
3499 while (len-- > 0) {
3500 Quad_t aquad;
3501 fromstr = NEXTFROM;
3502 aquad = (Quad_t)SvIV(fromstr);
3503 DO_BO_PACK(aquad, 64);
3504 PUSH_VAR(utf8, cur, aquad);
3505 }
3506 break;
3507#endif /* HAS_QUAD */
3508 case 'P':
3509 len = 1; /* assume SV is correct length */
3510 GROWING(utf8, cat, start, cur, sizeof(char *));
3511 /* Fall through! */
3512 case 'p':
3513 while (len-- > 0) {
3514 const char *aptr;
3515
3516 fromstr = NEXTFROM;
3517 SvGETMAGIC(fromstr);
3518 if (!SvOK(fromstr)) aptr = NULL;
3519 else {
3520 STRLEN n_a;
3521 /* XXX better yet, could spirit away the string to
3522 * a safe spot and hang on to it until the result
3523 * of pack() (and all copies of the result) are
3524 * gone.
3525 */
3526 if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
3527 !SvREADONLY(fromstr))) && ckWARN(WARN_PACK)) {
3528 Perl_warner(aTHX_ packWARN(WARN_PACK),
3529 "Attempt to pack pointer to temporary value");
3530 }
3531 if (SvPOK(fromstr) || SvNIOK(fromstr))
3532 aptr = SvPV_nomg_const(fromstr, n_a);
3533 else
3534 aptr = SvPV_force_flags(fromstr, n_a, 0);
3535 }
3536 DO_BO_PACK_PC(aptr);
3537 PUSH_VAR(utf8, cur, aptr);
3538 }
3539 break;
3540 case 'u': {
3541 const char *aptr, *aend;
3542 bool from_utf8;
3543
3544 fromstr = NEXTFROM;
3545 if (len <= 2) len = 45;
3546 else len = len / 3 * 3;
3547 if (len >= 64) {
3548 Perl_warner(aTHX_ packWARN(WARN_PACK),
3549 "Field too wide in 'u' format in pack");
3550 len = 63;
3551 }
3552 aptr = SvPV_const(fromstr, fromlen);
3553 from_utf8 = DO_UTF8(fromstr);
3554 if (from_utf8) {
3555 aend = aptr + fromlen;
3556 fromlen = sv_len_utf8(fromstr);
3557 } else aend = NULL; /* Unused, but keep compilers happy */
3558 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
3559 while (fromlen > 0) {
3560 U8 *end;
3561 I32 todo;
3562 U8 hunk[1+63/3*4+1];
3563
3564 if ((I32)fromlen > len)
3565 todo = len;
3566 else
3567 todo = fromlen;
3568 if (from_utf8) {
3569 char buffer[64];
3570 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3571 'u' | TYPE_IS_PACK)) {
3572 *cur = '\0';
3573 SvCUR_set(cat, cur - start);
3574 Perl_croak(aTHX_ "Assertion: string is shorter than advertised");
3575 }
3576 end = doencodes(hunk, buffer, todo);
3577 } else {
3578 end = doencodes(hunk, aptr, todo);
3579 aptr += todo;
3580 }
3581 PUSH_BYTES(utf8, cur, hunk, end-hunk);
3582 fromlen -= todo;
3583 }
3584 break;
3585 }
3586 }
3587 *cur = '\0';
3588 SvCUR_set(cat, cur - start);
3589 no_change:
3590 *symptr = lookahead;
3591 }
3592 return beglist;
3593}
3594#undef NEXTFROM
3595
3596
3597PP(pp_pack)
3598{
3599 dSP; dMARK; dORIGMARK; dTARGET;
3600 register SV *cat = TARG;
3601 STRLEN fromlen;
3602 SV *pat_sv = *++MARK;
3603 register const char *pat = SvPV_const(pat_sv, fromlen);
3604 register const char *patend = pat + fromlen;
3605
3606 MARK++;
3607 sv_setpvn(cat, "", 0);
3608 SvUTF8_off(cat);
3609
3610 packlist(cat, pat, patend, MARK, SP + 1);
3611
3612 SvSETMAGIC(cat);
3613 SP = ORIGMARK;
3614 PUSHs(cat);
3615 RETURN;
3616}
3617
3618/*
3619 * Local variables:
3620 * c-indentation-style: bsd
3621 * c-basic-offset: 4
3622 * indent-tabs-mode: t
3623 * End:
3624 *
3625 * ex: set ts=8 sts=4 sw=4 noet:
3626 */