This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Deprecate assert.pl with a warning
[perl5.git] / pp_pack.c
... / ...
CommitLineData
1/* pp_pack.c
2 *
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
9 */
10
11/*
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
16 * some salt.
17 *
18 * [p.653 of _The Lord of the Rings_, IV/iv: "Of Herbs and Stewed Rabbit"]
19 */
20
21/* This file contains pp ("push/pop") functions that
22 * execute the opcodes that make up a perl program. A typical pp function
23 * expects to find its arguments on the stack, and usually pushes its
24 * results onto the stack, hence the 'pp' terminology. Each OP structure
25 * contains a pointer to the relevant pp_foo() function.
26 *
27 * This particular file just contains pp_pack() and pp_unpack(). See the
28 * other pp*.c files for the rest of the pp_ functions.
29 */
30
31#include "EXTERN.h"
32#define PERL_IN_PP_PACK_C
33#include "perl.h"
34
35/* Types used by pack/unpack */
36typedef enum {
37 e_no_len, /* no length */
38 e_number, /* number, [] */
39 e_star /* asterisk */
40} howlen_t;
41
42typedef struct tempsym {
43 const char* patptr; /* current template char */
44 const char* patend; /* one after last char */
45 const char* grpbeg; /* 1st char of ()-group */
46 const char* grpend; /* end of ()-group */
47 I32 code; /* template code (!<>) */
48 I32 length; /* length/repeat count */
49 howlen_t howlen; /* how length is given */
50 int level; /* () nesting level */
51 U32 flags; /* /=4, comma=2, pack=1 */
52 /* and group modifiers */
53 STRLEN strbeg; /* offset of group start */
54 struct tempsym *previous; /* previous group */
55} tempsym_t;
56
57#define TEMPSYM_INIT(symptr, p, e, f) \
58 STMT_START { \
59 (symptr)->patptr = (p); \
60 (symptr)->patend = (e); \
61 (symptr)->grpbeg = NULL; \
62 (symptr)->grpend = NULL; \
63 (symptr)->grpend = NULL; \
64 (symptr)->code = 0; \
65 (symptr)->length = 0; \
66 (symptr)->howlen = e_no_len; \
67 (symptr)->level = 0; \
68 (symptr)->flags = (f); \
69 (symptr)->strbeg = 0; \
70 (symptr)->previous = NULL; \
71 } STMT_END
72
73#if PERL_VERSION >= 9
74# define PERL_PACK_CAN_BYTEORDER
75# define PERL_PACK_CAN_SHRIEKSIGN
76#endif
77
78#ifndef CHAR_BIT
79# define CHAR_BIT 8
80#endif
81/* Maximum number of bytes to which a byte can grow due to upgrade */
82#define UTF8_EXPAND 2
83
84/*
85 * Offset for integer pack/unpack.
86 *
87 * On architectures where I16 and I32 aren't really 16 and 32 bits,
88 * which for now are all Crays, pack and unpack have to play games.
89 */
90
91/*
92 * These values are required for portability of pack() output.
93 * If they're not right on your machine, then pack() and unpack()
94 * wouldn't work right anyway; you'll need to apply the Cray hack.
95 * (I'd like to check them with #if, but you can't use sizeof() in
96 * the preprocessor.) --???
97 */
98/*
99 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
100 defines are now in config.h. --Andy Dougherty April 1998
101 */
102#define SIZE16 2
103#define SIZE32 4
104
105/* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
106 --jhi Feb 1999 */
107
108#if U16SIZE > SIZE16 || U32SIZE > SIZE32
109# if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
110# define OFF16(p) ((char*)(p))
111# define OFF32(p) ((char*)(p))
112# else
113# if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
114# define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
115# define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
116# else
117 ++++ bad cray byte order
118# endif
119# endif
120#else
121# define OFF16(p) ((char *) (p))
122# define OFF32(p) ((char *) (p))
123#endif
124
125/* Only to be used inside a loop (see the break) */
126#define SHIFT16(utf8, s, strend, p, datumtype) STMT_START { \
127 if (utf8) { \
128 if (!uni_to_bytes(aTHX_ &(s), strend, OFF16(p), SIZE16, datumtype)) break; \
129 } else { \
130 Copy(s, OFF16(p), SIZE16, char); \
131 (s) += SIZE16; \
132 } \
133} STMT_END
134
135/* Only to be used inside a loop (see the break) */
136#define SHIFT32(utf8, s, strend, p, datumtype) STMT_START { \
137 if (utf8) { \
138 if (!uni_to_bytes(aTHX_ &(s), strend, OFF32(p), SIZE32, datumtype)) break; \
139 } else { \
140 Copy(s, OFF32(p), SIZE32, char); \
141 (s) += SIZE32; \
142 } \
143} STMT_END
144
145#define PUSH16(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF16(p), SIZE16)
146#define PUSH32(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF32(p), SIZE32)
147
148/* Only to be used inside a loop (see the break) */
149#define SHIFT_VAR(utf8, s, strend, var, datumtype) \
150STMT_START { \
151 if (utf8) { \
152 if (!uni_to_bytes(aTHX_ &s, strend, \
153 (char *) &var, sizeof(var), datumtype)) break;\
154 } else { \
155 Copy(s, (char *) &var, sizeof(var), char); \
156 s += sizeof(var); \
157 } \
158} STMT_END
159
160#define PUSH_VAR(utf8, aptr, var) \
161 PUSH_BYTES(utf8, aptr, &(var), sizeof(var))
162
163/* Avoid stack overflow due to pathological templates. 100 should be plenty. */
164#define MAX_SUB_TEMPLATE_LEVEL 100
165
166/* flags (note that type modifiers can also be used as flags!) */
167#define FLAG_WAS_UTF8 0x40
168#define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
169#define FLAG_UNPACK_ONLY_ONE 0x10
170#define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
171#define FLAG_SLASH 0x04
172#define FLAG_COMMA 0x02
173#define FLAG_PACK 0x01
174
175STATIC SV *
176S_mul128(pTHX_ SV *sv, U8 m)
177{
178 STRLEN len;
179 char *s = SvPV(sv, len);
180 char *t;
181
182 PERL_ARGS_ASSERT_MUL128;
183
184 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
185 SV * const tmpNew = newSVpvs("0000000000");
186
187 sv_catsv(tmpNew, sv);
188 SvREFCNT_dec(sv); /* free old sv */
189 sv = tmpNew;
190 s = SvPV(sv, len);
191 }
192 t = s + len - 1;
193 while (!*t) /* trailing '\0'? */
194 t--;
195 while (t > s) {
196 const U32 i = ((*t - '0') << 7) + m;
197 *(t--) = '0' + (char)(i % 10);
198 m = (char)(i / 10);
199 }
200 return (sv);
201}
202
203/* Explosives and implosives. */
204
205#if 'I' == 73 && 'J' == 74
206/* On an ASCII/ISO kind of system */
207#define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
208#else
209/*
210 Some other sort of character set - use memchr() so we don't match
211 the null byte.
212 */
213#define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
214#endif
215
216/* type modifiers */
217#define TYPE_IS_SHRIEKING 0x100
218#define TYPE_IS_BIG_ENDIAN 0x200
219#define TYPE_IS_LITTLE_ENDIAN 0x400
220#define TYPE_IS_PACK 0x800
221#define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
222#define TYPE_MODIFIERS(t) ((t) & ~0xFF)
223#define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
224
225#ifdef PERL_PACK_CAN_SHRIEKSIGN
226# define SHRIEKING_ALLOWED_TYPES "sSiIlLxXnNvV@."
227#else
228# define SHRIEKING_ALLOWED_TYPES "sSiIlLxX"
229#endif
230
231#ifndef PERL_PACK_CAN_BYTEORDER
232/* Put "can't" first because it is shorter */
233# define TYPE_ENDIANNESS(t) 0
234# define TYPE_NO_ENDIANNESS(t) (t)
235
236# define ENDIANNESS_ALLOWED_TYPES ""
237
238# define DO_BO_UNPACK(var, type)
239# define DO_BO_PACK(var, type)
240# define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast)
241# define DO_BO_PACK_PTR(var, type, pre_cast, post_cast)
242# define DO_BO_UNPACK_N(var, type)
243# define DO_BO_PACK_N(var, type)
244# define DO_BO_UNPACK_P(var)
245# define DO_BO_PACK_P(var)
246# define DO_BO_UNPACK_PC(var)
247# define DO_BO_PACK_PC(var)
248
249#else /* PERL_PACK_CAN_BYTEORDER */
250
251# define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
252# define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
253
254# define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
255
256# define DO_BO_UNPACK(var, type) \
257 STMT_START { \
258 switch (TYPE_ENDIANNESS(datumtype)) { \
259 case TYPE_IS_BIG_ENDIAN: var = my_betoh ## type (var); break; \
260 case TYPE_IS_LITTLE_ENDIAN: var = my_letoh ## type (var); break; \
261 default: break; \
262 } \
263 } STMT_END
264
265# define DO_BO_PACK(var, type) \
266 STMT_START { \
267 switch (TYPE_ENDIANNESS(datumtype)) { \
268 case TYPE_IS_BIG_ENDIAN: var = my_htobe ## type (var); break; \
269 case TYPE_IS_LITTLE_ENDIAN: var = my_htole ## type (var); break; \
270 default: break; \
271 } \
272 } STMT_END
273
274# define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast) \
275 STMT_START { \
276 switch (TYPE_ENDIANNESS(datumtype)) { \
277 case TYPE_IS_BIG_ENDIAN: \
278 var = (post_cast*) my_betoh ## type ((pre_cast) var); \
279 break; \
280 case TYPE_IS_LITTLE_ENDIAN: \
281 var = (post_cast *) my_letoh ## type ((pre_cast) var); \
282 break; \
283 default: \
284 break; \
285 } \
286 } STMT_END
287
288# define DO_BO_PACK_PTR(var, type, pre_cast, post_cast) \
289 STMT_START { \
290 switch (TYPE_ENDIANNESS(datumtype)) { \
291 case TYPE_IS_BIG_ENDIAN: \
292 var = (post_cast *) my_htobe ## type ((pre_cast) var); \
293 break; \
294 case TYPE_IS_LITTLE_ENDIAN: \
295 var = (post_cast *) my_htole ## type ((pre_cast) var); \
296 break; \
297 default: \
298 break; \
299 } \
300 } STMT_END
301
302# define BO_CANT_DOIT(action, type) \
303 STMT_START { \
304 switch (TYPE_ENDIANNESS(datumtype)) { \
305 case TYPE_IS_BIG_ENDIAN: \
306 Perl_croak(aTHX_ "Can't %s big-endian %ss on this " \
307 "platform", #action, #type); \
308 break; \
309 case TYPE_IS_LITTLE_ENDIAN: \
310 Perl_croak(aTHX_ "Can't %s little-endian %ss on this " \
311 "platform", #action, #type); \
312 break; \
313 default: \
314 break; \
315 } \
316 } STMT_END
317
318# if PTRSIZE == INTSIZE
319# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, i, int, void)
320# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, i, int, void)
321# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, i, int, char)
322# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, i, int, char)
323# elif PTRSIZE == LONGSIZE
324# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, long, void)
325# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, long, void)
326# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, long, char)
327# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, long, char)
328# elif PTRSIZE == IVSIZE
329# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, IV, void)
330# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, IV, void)
331# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, IV, char)
332# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, IV, char)
333# else
334# define DO_BO_UNPACK_P(var) BO_CANT_DOIT(unpack, pointer)
335# define DO_BO_PACK_P(var) BO_CANT_DOIT(pack, pointer)
336# define DO_BO_UNPACK_PC(var) BO_CANT_DOIT(unpack, pointer)
337# define DO_BO_PACK_PC(var) BO_CANT_DOIT(pack, pointer)
338# endif
339
340# if defined(my_htolen) && defined(my_letohn) && \
341 defined(my_htoben) && defined(my_betohn)
342# define DO_BO_UNPACK_N(var, type) \
343 STMT_START { \
344 switch (TYPE_ENDIANNESS(datumtype)) { \
345 case TYPE_IS_BIG_ENDIAN: my_betohn(&var, sizeof(type)); break;\
346 case TYPE_IS_LITTLE_ENDIAN: my_letohn(&var, sizeof(type)); break;\
347 default: break; \
348 } \
349 } STMT_END
350
351# define DO_BO_PACK_N(var, type) \
352 STMT_START { \
353 switch (TYPE_ENDIANNESS(datumtype)) { \
354 case TYPE_IS_BIG_ENDIAN: my_htoben(&var, sizeof(type)); break;\
355 case TYPE_IS_LITTLE_ENDIAN: my_htolen(&var, sizeof(type)); break;\
356 default: break; \
357 } \
358 } STMT_END
359# else
360# define DO_BO_UNPACK_N(var, type) BO_CANT_DOIT(unpack, type)
361# define DO_BO_PACK_N(var, type) BO_CANT_DOIT(pack, type)
362# endif
363
364#endif /* PERL_PACK_CAN_BYTEORDER */
365
366#define PACK_SIZE_CANNOT_CSUM 0x80
367#define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
368#define PACK_SIZE_MASK 0x3F
369
370/* These tables are regenerated by genpacksizetables.pl (and then hand pasted
371 in). You're unlikely ever to need to regenerate them. */
372
373#if TYPE_IS_SHRIEKING != 0x100
374 ++++shriek offset should be 256
375#endif
376
377typedef U8 packprops_t;
378#if 'J'-'I' == 1
379/* ASCII */
380STATIC const packprops_t packprops[512] = {
381 /* normal */
382 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
383 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
384 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
385 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
386 0, 0, 0,
387 /* C */ sizeof(unsigned char),
388#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
389 /* D */ LONG_DOUBLESIZE,
390#else
391 0,
392#endif
393 0,
394 /* F */ NVSIZE,
395 0, 0,
396 /* I */ sizeof(unsigned int),
397 /* J */ UVSIZE,
398 0,
399 /* L */ SIZE32,
400 0,
401 /* N */ SIZE32,
402 0, 0,
403#if defined(HAS_QUAD)
404 /* Q */ sizeof(Uquad_t),
405#else
406 0,
407#endif
408 0,
409 /* S */ SIZE16,
410 0,
411 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
412 /* V */ SIZE32,
413 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
414 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
415 /* c */ sizeof(char),
416 /* d */ sizeof(double),
417 0,
418 /* f */ sizeof(float),
419 0, 0,
420 /* i */ sizeof(int),
421 /* j */ IVSIZE,
422 0,
423 /* l */ SIZE32,
424 0,
425 /* n */ SIZE16,
426 0,
427 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
428#if defined(HAS_QUAD)
429 /* q */ sizeof(Quad_t),
430#else
431 0,
432#endif
433 0,
434 /* s */ SIZE16,
435 0, 0,
436 /* v */ SIZE16,
437 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
438 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
439 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
440 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
441 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
442 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
443 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
444 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
445 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
446 0, 0, 0, 0, 0, 0, 0, 0,
447 /* shrieking */
448 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
449 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
450 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
451 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
452 0, 0, 0, 0, 0, 0, 0, 0, 0,
453 /* I */ sizeof(unsigned int),
454 0, 0,
455 /* L */ sizeof(unsigned long),
456 0,
457#if defined(PERL_PACK_CAN_SHRIEKSIGN)
458 /* N */ SIZE32,
459#else
460 0,
461#endif
462 0, 0, 0, 0,
463 /* S */ sizeof(unsigned short),
464 0, 0,
465#if defined(PERL_PACK_CAN_SHRIEKSIGN)
466 /* V */ SIZE32,
467#else
468 0,
469#endif
470 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
471 0, 0,
472 /* i */ sizeof(int),
473 0, 0,
474 /* l */ sizeof(long),
475 0,
476#if defined(PERL_PACK_CAN_SHRIEKSIGN)
477 /* n */ SIZE16,
478#else
479 0,
480#endif
481 0, 0, 0, 0,
482 /* s */ sizeof(short),
483 0, 0,
484#if defined(PERL_PACK_CAN_SHRIEKSIGN)
485 /* v */ SIZE16,
486#else
487 0,
488#endif
489 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
490 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
491 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
492 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
493 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
494 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
495 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
496 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
497 0, 0, 0, 0, 0, 0, 0, 0, 0
498};
499#else
500/* EBCDIC (or bust) */
501STATIC const packprops_t packprops[512] = {
502 /* normal */
503 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
504 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
505 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
506 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
507 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
508 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
509 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
510 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
511 0, 0, 0,
512 /* c */ sizeof(char),
513 /* d */ sizeof(double),
514 0,
515 /* f */ sizeof(float),
516 0, 0,
517 /* i */ sizeof(int),
518 0, 0, 0, 0, 0, 0, 0,
519 /* j */ IVSIZE,
520 0,
521 /* l */ SIZE32,
522 0,
523 /* n */ SIZE16,
524 0,
525 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
526#if defined(HAS_QUAD)
527 /* q */ sizeof(Quad_t),
528#else
529 0,
530#endif
531 0, 0, 0, 0, 0, 0, 0, 0, 0,
532 /* s */ SIZE16,
533 0, 0,
534 /* v */ SIZE16,
535 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
536 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
537 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
538 /* C */ sizeof(unsigned char),
539#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
540 /* D */ LONG_DOUBLESIZE,
541#else
542 0,
543#endif
544 0,
545 /* F */ NVSIZE,
546 0, 0,
547 /* I */ sizeof(unsigned int),
548 0, 0, 0, 0, 0, 0, 0,
549 /* J */ UVSIZE,
550 0,
551 /* L */ SIZE32,
552 0,
553 /* N */ SIZE32,
554 0, 0,
555#if defined(HAS_QUAD)
556 /* Q */ sizeof(Uquad_t),
557#else
558 0,
559#endif
560 0, 0, 0, 0, 0, 0, 0, 0, 0,
561 /* S */ SIZE16,
562 0,
563 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
564 /* V */ SIZE32,
565 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
566 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
567 0, 0, 0, 0, 0, 0, 0, 0, 0,
568 /* shrieking */
569 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
570 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
571 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
572 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
573 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
574 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
575 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
576 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
577 0, 0, 0, 0, 0, 0, 0, 0, 0,
578 /* i */ sizeof(int),
579 0, 0, 0, 0, 0, 0, 0, 0, 0,
580 /* l */ sizeof(long),
581 0,
582#if defined(PERL_PACK_CAN_SHRIEKSIGN)
583 /* n */ SIZE16,
584#else
585 0,
586#endif
587 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
588 /* s */ sizeof(short),
589 0, 0,
590#if defined(PERL_PACK_CAN_SHRIEKSIGN)
591 /* v */ SIZE16,
592#else
593 0,
594#endif
595 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
596 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
597 0, 0, 0,
598 /* I */ sizeof(unsigned int),
599 0, 0, 0, 0, 0, 0, 0, 0, 0,
600 /* L */ sizeof(unsigned long),
601 0,
602#if defined(PERL_PACK_CAN_SHRIEKSIGN)
603 /* N */ SIZE32,
604#else
605 0,
606#endif
607 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
608 /* S */ sizeof(unsigned short),
609 0, 0,
610#if defined(PERL_PACK_CAN_SHRIEKSIGN)
611 /* V */ SIZE32,
612#else
613 0,
614#endif
615 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
616 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
617};
618#endif
619
620STATIC U8
621uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
622{
623 STRLEN retlen;
624 UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
625 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
626 /* We try to process malformed UTF-8 as much as possible (preferrably with
627 warnings), but these two mean we make no progress in the string and
628 might enter an infinite loop */
629 if (retlen == (STRLEN) -1 || retlen == 0)
630 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
631 (int) TYPE_NO_MODIFIERS(datumtype));
632 if (val >= 0x100) {
633 if (ckWARN(WARN_UNPACK))
634 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
635 "Character in '%c' format wrapped in unpack",
636 (int) TYPE_NO_MODIFIERS(datumtype));
637 val &= 0xff;
638 }
639 *s += retlen;
640 return (U8)val;
641}
642
643#define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
644 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
645 *(U8 *)(s)++)
646
647STATIC bool
648uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
649{
650 UV val;
651 STRLEN retlen;
652 const char *from = *s;
653 int bad = 0;
654 const U32 flags = ckWARN(WARN_UTF8) ?
655 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
656 for (;buf_len > 0; buf_len--) {
657 if (from >= end) return FALSE;
658 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
659 if (retlen == (STRLEN) -1 || retlen == 0) {
660 from += UTF8SKIP(from);
661 bad |= 1;
662 } else from += retlen;
663 if (val >= 0x100) {
664 bad |= 2;
665 val &= 0xff;
666 }
667 *(U8 *)buf++ = (U8)val;
668 }
669 /* We have enough characters for the buffer. Did we have problems ? */
670 if (bad) {
671 if (bad & 1) {
672 /* Rewalk the string fragment while warning */
673 const char *ptr;
674 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
675 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
676 if (ptr >= end) break;
677 utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
678 }
679 if (from > end) from = end;
680 }
681 if ((bad & 2) && ckWARN(WARN_UNPACK))
682 Perl_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
683 WARN_PACK : WARN_UNPACK),
684 "Character(s) in '%c' format wrapped in %s",
685 (int) TYPE_NO_MODIFIERS(datumtype),
686 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
687 }
688 *s = from;
689 return TRUE;
690}
691
692STATIC bool
693next_uni_uu(pTHX_ const char **s, const char *end, I32 *out)
694{
695 dVAR;
696 STRLEN retlen;
697 const UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
698 if (val >= 0x100 || !ISUUCHAR(val) ||
699 retlen == (STRLEN) -1 || retlen == 0) {
700 *out = 0;
701 return FALSE;
702 }
703 *out = PL_uudmap[val] & 077;
704 *s += retlen;
705 return TRUE;
706}
707
708STATIC char *
709S_bytes_to_uni(const U8 *start, STRLEN len, char *dest) {
710 const U8 * const end = start + len;
711
712 PERL_ARGS_ASSERT_BYTES_TO_UNI;
713
714 while (start < end) {
715 const UV uv = NATIVE_TO_ASCII(*start);
716 if (UNI_IS_INVARIANT(uv))
717 *dest++ = (char)(U8)UTF_TO_NATIVE(uv);
718 else {
719 *dest++ = (char)(U8)UTF8_EIGHT_BIT_HI(uv);
720 *dest++ = (char)(U8)UTF8_EIGHT_BIT_LO(uv);
721 }
722 start++;
723 }
724 return dest;
725}
726
727#define PUSH_BYTES(utf8, cur, buf, len) \
728STMT_START { \
729 if (utf8) \
730 (cur) = bytes_to_uni((U8 *) buf, len, (cur)); \
731 else { \
732 Copy(buf, cur, len, char); \
733 (cur) += (len); \
734 } \
735} STMT_END
736
737#define GROWING(utf8, cat, start, cur, in_len) \
738STMT_START { \
739 STRLEN glen = (in_len); \
740 if (utf8) glen *= UTF8_EXPAND; \
741 if ((cur) + glen >= (start) + SvLEN(cat)) { \
742 (start) = sv_exp_grow(cat, glen); \
743 (cur) = (start) + SvCUR(cat); \
744 } \
745} STMT_END
746
747#define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
748STMT_START { \
749 const STRLEN glen = (in_len); \
750 STRLEN gl = glen; \
751 if (utf8) gl *= UTF8_EXPAND; \
752 if ((cur) + gl >= (start) + SvLEN(cat)) { \
753 *cur = '\0'; \
754 SvCUR_set((cat), (cur) - (start)); \
755 (start) = sv_exp_grow(cat, gl); \
756 (cur) = (start) + SvCUR(cat); \
757 } \
758 PUSH_BYTES(utf8, cur, buf, glen); \
759} STMT_END
760
761#define PUSH_BYTE(utf8, s, byte) \
762STMT_START { \
763 if (utf8) { \
764 const U8 au8 = (byte); \
765 (s) = bytes_to_uni(&au8, 1, (s)); \
766 } else *(U8 *)(s)++ = (byte); \
767} STMT_END
768
769/* Only to be used inside a loop (see the break) */
770#define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
771STMT_START { \
772 STRLEN retlen; \
773 if (str >= end) break; \
774 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
775 if (retlen == (STRLEN) -1 || retlen == 0) { \
776 *cur = '\0'; \
777 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
778 } \
779 str += retlen; \
780} STMT_END
781
782static const char *_action( const tempsym_t* symptr )
783{
784 return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
785}
786
787/* Returns the sizeof() struct described by pat */
788STATIC I32
789S_measure_struct(pTHX_ tempsym_t* symptr)
790{
791 I32 total = 0;
792
793 PERL_ARGS_ASSERT_MEASURE_STRUCT;
794
795 while (next_symbol(symptr)) {
796 I32 len;
797 int size;
798
799 switch (symptr->howlen) {
800 case e_star:
801 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
802 _action( symptr ) );
803 break;
804 default:
805 /* e_no_len and e_number */
806 len = symptr->length;
807 break;
808 }
809
810 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
811 if (!size) {
812 int star;
813 /* endianness doesn't influence the size of a type */
814 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
815 default:
816 Perl_croak(aTHX_ "Invalid type '%c' in %s",
817 (int)TYPE_NO_MODIFIERS(symptr->code),
818 _action( symptr ) );
819#ifdef PERL_PACK_CAN_SHRIEKSIGN
820 case '.' | TYPE_IS_SHRIEKING:
821 case '@' | TYPE_IS_SHRIEKING:
822#endif
823 case '@':
824 case '.':
825 case '/':
826 case 'U': /* XXXX Is it correct? */
827 case 'w':
828 case 'u':
829 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
830 (int) TYPE_NO_MODIFIERS(symptr->code),
831 _action( symptr ) );
832 case '%':
833 size = 0;
834 break;
835 case '(':
836 {
837 tempsym_t savsym = *symptr;
838 symptr->patptr = savsym.grpbeg;
839 symptr->patend = savsym.grpend;
840 /* XXXX Theoretically, we need to measure many times at
841 different positions, since the subexpression may contain
842 alignment commands, but be not of aligned length.
843 Need to detect this and croak(). */
844 size = measure_struct(symptr);
845 *symptr = savsym;
846 break;
847 }
848 case 'X' | TYPE_IS_SHRIEKING:
849 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
850 */
851 if (!len) /* Avoid division by 0 */
852 len = 1;
853 len = total % len; /* Assumed: the start is aligned. */
854 /* FALL THROUGH */
855 case 'X':
856 size = -1;
857 if (total < len)
858 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
859 break;
860 case 'x' | TYPE_IS_SHRIEKING:
861 if (!len) /* Avoid division by 0 */
862 len = 1;
863 star = total % len; /* Assumed: the start is aligned. */
864 if (star) /* Other portable ways? */
865 len = len - star;
866 else
867 len = 0;
868 /* FALL THROUGH */
869 case 'x':
870 case 'A':
871 case 'Z':
872 case 'a':
873 size = 1;
874 break;
875 case 'B':
876 case 'b':
877 len = (len + 7)/8;
878 size = 1;
879 break;
880 case 'H':
881 case 'h':
882 len = (len + 1)/2;
883 size = 1;
884 break;
885
886 case 'P':
887 len = 1;
888 size = sizeof(char*);
889 break;
890 }
891 }
892 total += len * size;
893 }
894 return total;
895}
896
897
898/* locate matching closing parenthesis or bracket
899 * returns char pointer to char after match, or NULL
900 */
901STATIC const char *
902S_group_end(pTHX_ register const char *patptr, register const char *patend, char ender)
903{
904 PERL_ARGS_ASSERT_GROUP_END;
905
906 while (patptr < patend) {
907 const char c = *patptr++;
908
909 if (isSPACE(c))
910 continue;
911 else if (c == ender)
912 return patptr-1;
913 else if (c == '#') {
914 while (patptr < patend && *patptr != '\n')
915 patptr++;
916 continue;
917 } else if (c == '(')
918 patptr = group_end(patptr, patend, ')') + 1;
919 else if (c == '[')
920 patptr = group_end(patptr, patend, ']') + 1;
921 }
922 Perl_croak(aTHX_ "No group ending character '%c' found in template",
923 ender);
924 return 0;
925}
926
927
928/* Convert unsigned decimal number to binary.
929 * Expects a pointer to the first digit and address of length variable
930 * Advances char pointer to 1st non-digit char and returns number
931 */
932STATIC const char *
933S_get_num(pTHX_ register const char *patptr, I32 *lenptr )
934{
935 I32 len = *patptr++ - '0';
936
937 PERL_ARGS_ASSERT_GET_NUM;
938
939 while (isDIGIT(*patptr)) {
940 if (len >= 0x7FFFFFFF/10)
941 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
942 len = (len * 10) + (*patptr++ - '0');
943 }
944 *lenptr = len;
945 return patptr;
946}
947
948/* The marvellous template parsing routine: Using state stored in *symptr,
949 * locates next template code and count
950 */
951STATIC bool
952S_next_symbol(pTHX_ tempsym_t* symptr )
953{
954 const char* patptr = symptr->patptr;
955 const char* const patend = symptr->patend;
956
957 PERL_ARGS_ASSERT_NEXT_SYMBOL;
958
959 symptr->flags &= ~FLAG_SLASH;
960
961 while (patptr < patend) {
962 if (isSPACE(*patptr))
963 patptr++;
964 else if (*patptr == '#') {
965 patptr++;
966 while (patptr < patend && *patptr != '\n')
967 patptr++;
968 if (patptr < patend)
969 patptr++;
970 } else {
971 /* We should have found a template code */
972 I32 code = *patptr++ & 0xFF;
973 U32 inherited_modifiers = 0;
974
975 if (code == ','){ /* grandfather in commas but with a warning */
976 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
977 symptr->flags |= FLAG_COMMA;
978 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
979 "Invalid type ',' in %s", _action( symptr ) );
980 }
981 continue;
982 }
983
984 /* for '(', skip to ')' */
985 if (code == '(') {
986 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
987 Perl_croak(aTHX_ "()-group starts with a count in %s",
988 _action( symptr ) );
989 symptr->grpbeg = patptr;
990 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
991 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
992 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
993 _action( symptr ) );
994 }
995
996 /* look for group modifiers to inherit */
997 if (TYPE_ENDIANNESS(symptr->flags)) {
998 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
999 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
1000 }
1001
1002 /* look for modifiers */
1003 while (patptr < patend) {
1004 const char *allowed;
1005 I32 modifier;
1006 switch (*patptr) {
1007 case '!':
1008 modifier = TYPE_IS_SHRIEKING;
1009 allowed = SHRIEKING_ALLOWED_TYPES;
1010 break;
1011#ifdef PERL_PACK_CAN_BYTEORDER
1012 case '>':
1013 modifier = TYPE_IS_BIG_ENDIAN;
1014 allowed = ENDIANNESS_ALLOWED_TYPES;
1015 break;
1016 case '<':
1017 modifier = TYPE_IS_LITTLE_ENDIAN;
1018 allowed = ENDIANNESS_ALLOWED_TYPES;
1019 break;
1020#endif /* PERL_PACK_CAN_BYTEORDER */
1021 default:
1022 allowed = "";
1023 modifier = 0;
1024 break;
1025 }
1026
1027 if (modifier == 0)
1028 break;
1029
1030 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
1031 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
1032 allowed, _action( symptr ) );
1033
1034 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
1035 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
1036 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
1037 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
1038 TYPE_ENDIANNESS_MASK)
1039 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
1040 *patptr, _action( symptr ) );
1041
1042 if ((code & modifier) && ckWARN(WARN_UNPACK)) {
1043 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
1044 "Duplicate modifier '%c' after '%c' in %s",
1045 *patptr, (int) TYPE_NO_MODIFIERS(code),
1046 _action( symptr ) );
1047 }
1048
1049 code |= modifier;
1050 patptr++;
1051 }
1052
1053 /* inherit modifiers */
1054 code |= inherited_modifiers;
1055
1056 /* look for count and/or / */
1057 if (patptr < patend) {
1058 if (isDIGIT(*patptr)) {
1059 patptr = get_num( patptr, &symptr->length );
1060 symptr->howlen = e_number;
1061
1062 } else if (*patptr == '*') {
1063 patptr++;
1064 symptr->howlen = e_star;
1065
1066 } else if (*patptr == '[') {
1067 const char* lenptr = ++patptr;
1068 symptr->howlen = e_number;
1069 patptr = group_end( patptr, patend, ']' ) + 1;
1070 /* what kind of [] is it? */
1071 if (isDIGIT(*lenptr)) {
1072 lenptr = get_num( lenptr, &symptr->length );
1073 if( *lenptr != ']' )
1074 Perl_croak(aTHX_ "Malformed integer in [] in %s",
1075 _action( symptr ) );
1076 } else {
1077 tempsym_t savsym = *symptr;
1078 symptr->patend = patptr-1;
1079 symptr->patptr = lenptr;
1080 savsym.length = measure_struct(symptr);
1081 *symptr = savsym;
1082 }
1083 } else {
1084 symptr->howlen = e_no_len;
1085 symptr->length = 1;
1086 }
1087
1088 /* try to find / */
1089 while (patptr < patend) {
1090 if (isSPACE(*patptr))
1091 patptr++;
1092 else if (*patptr == '#') {
1093 patptr++;
1094 while (patptr < patend && *patptr != '\n')
1095 patptr++;
1096 if (patptr < patend)
1097 patptr++;
1098 } else {
1099 if (*patptr == '/') {
1100 symptr->flags |= FLAG_SLASH;
1101 patptr++;
1102 if (patptr < patend &&
1103 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
1104 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
1105 _action( symptr ) );
1106 }
1107 break;
1108 }
1109 }
1110 } else {
1111 /* at end - no count, no / */
1112 symptr->howlen = e_no_len;
1113 symptr->length = 1;
1114 }
1115
1116 symptr->code = code;
1117 symptr->patptr = patptr;
1118 return TRUE;
1119 }
1120 }
1121 symptr->patptr = patptr;
1122 return FALSE;
1123}
1124
1125/*
1126 There is no way to cleanly handle the case where we should process the
1127 string per byte in its upgraded form while it's really in downgraded form
1128 (e.g. estimates like strend-s as an upper bound for the number of
1129 characters left wouldn't work). So if we foresee the need of this
1130 (pattern starts with U or contains U0), we want to work on the encoded
1131 version of the string. Users are advised to upgrade their pack string
1132 themselves if they need to do a lot of unpacks like this on it
1133*/
1134STATIC bool
1135need_utf8(const char *pat, const char *patend)
1136{
1137 bool first = TRUE;
1138
1139 PERL_ARGS_ASSERT_NEED_UTF8;
1140
1141 while (pat < patend) {
1142 if (pat[0] == '#') {
1143 pat++;
1144 pat = (const char *) memchr(pat, '\n', patend-pat);
1145 if (!pat) return FALSE;
1146 } else if (pat[0] == 'U') {
1147 if (first || pat[1] == '0') return TRUE;
1148 } else first = FALSE;
1149 pat++;
1150 }
1151 return FALSE;
1152}
1153
1154STATIC char
1155first_symbol(const char *pat, const char *patend) {
1156 PERL_ARGS_ASSERT_FIRST_SYMBOL;
1157
1158 while (pat < patend) {
1159 if (pat[0] != '#') return pat[0];
1160 pat++;
1161 pat = (const char *) memchr(pat, '\n', patend-pat);
1162 if (!pat) return 0;
1163 pat++;
1164 }
1165 return 0;
1166}
1167
1168/*
1169=for apidoc unpackstring
1170
1171The engine implementing unpack() Perl function. C<unpackstring> puts the
1172extracted list items on the stack and returns the number of elements.
1173Issue C<PUTBACK> before and C<SPAGAIN> after the call to this function.
1174
1175=cut */
1176
1177I32
1178Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
1179{
1180 tempsym_t sym;
1181
1182 PERL_ARGS_ASSERT_UNPACKSTRING;
1183
1184 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
1185 else if (need_utf8(pat, patend)) {
1186 /* We probably should try to avoid this in case a scalar context call
1187 wouldn't get to the "U0" */
1188 STRLEN len = strend - s;
1189 s = (char *) bytes_to_utf8((U8 *) s, &len);
1190 SAVEFREEPV(s);
1191 strend = s + len;
1192 flags |= FLAG_DO_UTF8;
1193 }
1194
1195 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
1196 flags |= FLAG_PARSE_UTF8;
1197
1198 TEMPSYM_INIT(&sym, pat, patend, flags);
1199
1200 return unpack_rec(&sym, s, s, strend, NULL );
1201}
1202
1203STATIC I32
1204S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
1205{
1206 dVAR; dSP;
1207 SV *sv;
1208 const I32 start_sp_offset = SP - PL_stack_base;
1209 howlen_t howlen;
1210 I32 checksum = 0;
1211 UV cuv = 0;
1212 NV cdouble = 0.0;
1213 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
1214 bool beyond = FALSE;
1215 bool explicit_length;
1216 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
1217 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
1218
1219 PERL_ARGS_ASSERT_UNPACK_REC;
1220
1221 symptr->strbeg = s - strbeg;
1222
1223 while (next_symbol(symptr)) {
1224 packprops_t props;
1225 I32 len;
1226 I32 datumtype = symptr->code;
1227 /* do first one only unless in list context
1228 / is implemented by unpacking the count, then popping it from the
1229 stack, so must check that we're not in the middle of a / */
1230 if ( unpack_only_one
1231 && (SP - PL_stack_base == start_sp_offset + 1)
1232 && (datumtype != '/') ) /* XXX can this be omitted */
1233 break;
1234
1235 switch (howlen = symptr->howlen) {
1236 case e_star:
1237 len = strend - strbeg; /* long enough */
1238 break;
1239 default:
1240 /* e_no_len and e_number */
1241 len = symptr->length;
1242 break;
1243 }
1244
1245 explicit_length = TRUE;
1246 redo_switch:
1247 beyond = s >= strend;
1248
1249 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
1250 if (props) {
1251 /* props nonzero means we can process this letter. */
1252 const long size = props & PACK_SIZE_MASK;
1253 const long howmany = (strend - s) / size;
1254 if (len > howmany)
1255 len = howmany;
1256
1257 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
1258 if (len && unpack_only_one) len = 1;
1259 EXTEND(SP, len);
1260 EXTEND_MORTAL(len);
1261 }
1262 }
1263
1264 switch(TYPE_NO_ENDIANNESS(datumtype)) {
1265 default:
1266 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
1267
1268 case '%':
1269 if (howlen == e_no_len)
1270 len = 16; /* len is not specified */
1271 checksum = len;
1272 cuv = 0;
1273 cdouble = 0;
1274 continue;
1275 break;
1276 case '(':
1277 {
1278 tempsym_t savsym = *symptr;
1279 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
1280 symptr->flags |= group_modifiers;
1281 symptr->patend = savsym.grpend;
1282 symptr->previous = &savsym;
1283 symptr->level++;
1284 PUTBACK;
1285 if (len && unpack_only_one) len = 1;
1286 while (len--) {
1287 symptr->patptr = savsym.grpbeg;
1288 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
1289 else symptr->flags &= ~FLAG_PARSE_UTF8;
1290 unpack_rec(symptr, s, strbeg, strend, &s);
1291 if (s == strend && savsym.howlen == e_star)
1292 break; /* No way to continue */
1293 }
1294 SPAGAIN;
1295 savsym.flags = symptr->flags & ~group_modifiers;
1296 *symptr = savsym;
1297 break;
1298 }
1299#ifdef PERL_PACK_CAN_SHRIEKSIGN
1300 case '.' | TYPE_IS_SHRIEKING:
1301#endif
1302 case '.': {
1303 const char *from;
1304 SV *sv;
1305#ifdef PERL_PACK_CAN_SHRIEKSIGN
1306 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
1307#else /* PERL_PACK_CAN_SHRIEKSIGN */
1308 const bool u8 = utf8;
1309#endif
1310 if (howlen == e_star) from = strbeg;
1311 else if (len <= 0) from = s;
1312 else {
1313 tempsym_t *group = symptr;
1314
1315 while (--len && group) group = group->previous;
1316 from = group ? strbeg + group->strbeg : strbeg;
1317 }
1318 sv = from <= s ?
1319 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
1320 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
1321 mXPUSHs(sv);
1322 break;
1323 }
1324#ifdef PERL_PACK_CAN_SHRIEKSIGN
1325 case '@' | TYPE_IS_SHRIEKING:
1326#endif
1327 case '@':
1328 s = strbeg + symptr->strbeg;
1329#ifdef PERL_PACK_CAN_SHRIEKSIGN
1330 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
1331#else /* PERL_PACK_CAN_SHRIEKSIGN */
1332 if (utf8)
1333#endif
1334 {
1335 while (len > 0) {
1336 if (s >= strend)
1337 Perl_croak(aTHX_ "'@' outside of string in unpack");
1338 s += UTF8SKIP(s);
1339 len--;
1340 }
1341 if (s > strend)
1342 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
1343 } else {
1344 if (strend-s < len)
1345 Perl_croak(aTHX_ "'@' outside of string in unpack");
1346 s += len;
1347 }
1348 break;
1349 case 'X' | TYPE_IS_SHRIEKING:
1350 if (!len) /* Avoid division by 0 */
1351 len = 1;
1352 if (utf8) {
1353 const char *hop, *last;
1354 I32 l = len;
1355 hop = last = strbeg;
1356 while (hop < s) {
1357 hop += UTF8SKIP(hop);
1358 if (--l == 0) {
1359 last = hop;
1360 l = len;
1361 }
1362 }
1363 if (last > s)
1364 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1365 s = last;
1366 break;
1367 }
1368 len = (s - strbeg) % len;
1369 /* FALL THROUGH */
1370 case 'X':
1371 if (utf8) {
1372 while (len > 0) {
1373 if (s <= strbeg)
1374 Perl_croak(aTHX_ "'X' outside of string in unpack");
1375 while (--s, UTF8_IS_CONTINUATION(*s)) {
1376 if (s <= strbeg)
1377 Perl_croak(aTHX_ "'X' outside of string in unpack");
1378 }
1379 len--;
1380 }
1381 } else {
1382 if (len > s - strbeg)
1383 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1384 s -= len;
1385 }
1386 break;
1387 case 'x' | TYPE_IS_SHRIEKING: {
1388 I32 ai32;
1389 if (!len) /* Avoid division by 0 */
1390 len = 1;
1391 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1392 else ai32 = (s - strbeg) % len;
1393 if (ai32 == 0) break;
1394 len -= ai32;
1395 }
1396 /* FALL THROUGH */
1397 case 'x':
1398 if (utf8) {
1399 while (len>0) {
1400 if (s >= strend)
1401 Perl_croak(aTHX_ "'x' outside of string in unpack");
1402 s += UTF8SKIP(s);
1403 len--;
1404 }
1405 } else {
1406 if (len > strend - s)
1407 Perl_croak(aTHX_ "'x' outside of string in unpack");
1408 s += len;
1409 }
1410 break;
1411 case '/':
1412 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1413 break;
1414 case 'A':
1415 case 'Z':
1416 case 'a':
1417 if (checksum) {
1418 /* Preliminary length estimate is assumed done in 'W' */
1419 if (len > strend - s) len = strend - s;
1420 goto W_checksum;
1421 }
1422 if (utf8) {
1423 I32 l;
1424 const char *hop;
1425 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1426 if (hop >= strend) {
1427 if (hop > strend)
1428 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1429 break;
1430 }
1431 }
1432 if (hop > strend)
1433 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1434 len = hop - s;
1435 } else if (len > strend - s)
1436 len = strend - s;
1437
1438 if (datumtype == 'Z') {
1439 /* 'Z' strips stuff after first null */
1440 const char *ptr, *end;
1441 end = s + len;
1442 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
1443 sv = newSVpvn(s, ptr-s);
1444 if (howlen == e_star) /* exact for 'Z*' */
1445 len = ptr-s + (ptr != strend ? 1 : 0);
1446 } else if (datumtype == 'A') {
1447 /* 'A' strips both nulls and spaces */
1448 const char *ptr;
1449 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1450 for (ptr = s+len-1; ptr >= s; ptr--)
1451 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
1452 !is_utf8_space((U8 *) ptr)) break;
1453 if (ptr >= s) ptr += UTF8SKIP(ptr);
1454 else ptr++;
1455 if (ptr > s+len)
1456 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1457 } else {
1458 for (ptr = s+len-1; ptr >= s; ptr--)
1459 if (*ptr != 0 && !isSPACE(*ptr)) break;
1460 ptr++;
1461 }
1462 sv = newSVpvn(s, ptr-s);
1463 } else sv = newSVpvn(s, len);
1464
1465 if (utf8) {
1466 SvUTF8_on(sv);
1467 /* Undo any upgrade done due to need_utf8() */
1468 if (!(symptr->flags & FLAG_WAS_UTF8))
1469 sv_utf8_downgrade(sv, 0);
1470 }
1471 mXPUSHs(sv);
1472 s += len;
1473 break;
1474 case 'B':
1475 case 'b': {
1476 char *str;
1477 if (howlen == e_star || len > (strend - s) * 8)
1478 len = (strend - s) * 8;
1479 if (checksum) {
1480 if (utf8)
1481 while (len >= 8 && s < strend) {
1482 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
1483 len -= 8;
1484 }
1485 else
1486 while (len >= 8) {
1487 cuv += PL_bitcount[*(U8 *)s++];
1488 len -= 8;
1489 }
1490 if (len && s < strend) {
1491 U8 bits;
1492 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1493 if (datumtype == 'b')
1494 while (len-- > 0) {
1495 if (bits & 1) cuv++;
1496 bits >>= 1;
1497 }
1498 else
1499 while (len-- > 0) {
1500 if (bits & 0x80) cuv++;
1501 bits <<= 1;
1502 }
1503 }
1504 break;
1505 }
1506
1507 sv = sv_2mortal(newSV(len ? len : 1));
1508 SvPOK_on(sv);
1509 str = SvPVX(sv);
1510 if (datumtype == 'b') {
1511 U8 bits = 0;
1512 const I32 ai32 = len;
1513 for (len = 0; len < ai32; len++) {
1514 if (len & 7) bits >>= 1;
1515 else if (utf8) {
1516 if (s >= strend) break;
1517 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1518 } else bits = *(U8 *) s++;
1519 *str++ = bits & 1 ? '1' : '0';
1520 }
1521 } else {
1522 U8 bits = 0;
1523 const I32 ai32 = len;
1524 for (len = 0; len < ai32; len++) {
1525 if (len & 7) bits <<= 1;
1526 else if (utf8) {
1527 if (s >= strend) break;
1528 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1529 } else bits = *(U8 *) s++;
1530 *str++ = bits & 0x80 ? '1' : '0';
1531 }
1532 }
1533 *str = '\0';
1534 SvCUR_set(sv, str - SvPVX_const(sv));
1535 XPUSHs(sv);
1536 break;
1537 }
1538 case 'H':
1539 case 'h': {
1540 char *str;
1541 /* Preliminary length estimate, acceptable for utf8 too */
1542 if (howlen == e_star || len > (strend - s) * 2)
1543 len = (strend - s) * 2;
1544 sv = sv_2mortal(newSV(len ? len : 1));
1545 SvPOK_on(sv);
1546 str = SvPVX(sv);
1547 if (datumtype == 'h') {
1548 U8 bits = 0;
1549 I32 ai32 = len;
1550 for (len = 0; len < ai32; len++) {
1551 if (len & 1) bits >>= 4;
1552 else if (utf8) {
1553 if (s >= strend) break;
1554 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1555 } else bits = * (U8 *) s++;
1556 *str++ = PL_hexdigit[bits & 15];
1557 }
1558 } else {
1559 U8 bits = 0;
1560 const I32 ai32 = len;
1561 for (len = 0; len < ai32; len++) {
1562 if (len & 1) bits <<= 4;
1563 else if (utf8) {
1564 if (s >= strend) break;
1565 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1566 } else bits = *(U8 *) s++;
1567 *str++ = PL_hexdigit[(bits >> 4) & 15];
1568 }
1569 }
1570 *str = '\0';
1571 SvCUR_set(sv, str - SvPVX_const(sv));
1572 XPUSHs(sv);
1573 break;
1574 }
1575 case 'C':
1576 if (len == 0) {
1577 if (explicit_length)
1578 /* Switch to "character" mode */
1579 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1580 break;
1581 }
1582 /* FALL THROUGH */
1583 case 'c':
1584 while (len-- > 0 && s < strend) {
1585 int aint;
1586 if (utf8)
1587 {
1588 STRLEN retlen;
1589 aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1590 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1591 if (retlen == (STRLEN) -1 || retlen == 0)
1592 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1593 s += retlen;
1594 }
1595 else
1596 aint = *(U8 *)(s)++;
1597 if (aint >= 128 && datumtype != 'C') /* fake up signed chars */
1598 aint -= 256;
1599 if (!checksum)
1600 mPUSHi(aint);
1601 else if (checksum > bits_in_uv)
1602 cdouble += (NV)aint;
1603 else
1604 cuv += aint;
1605 }
1606 break;
1607 case 'W':
1608 W_checksum:
1609 if (utf8) {
1610 while (len-- > 0 && s < strend) {
1611 STRLEN retlen;
1612 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1613 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1614 if (retlen == (STRLEN) -1 || retlen == 0)
1615 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1616 s += retlen;
1617 if (!checksum)
1618 mPUSHu(val);
1619 else if (checksum > bits_in_uv)
1620 cdouble += (NV) val;
1621 else
1622 cuv += val;
1623 }
1624 } else if (!checksum)
1625 while (len-- > 0) {
1626 const U8 ch = *(U8 *) s++;
1627 mPUSHu(ch);
1628 }
1629 else if (checksum > bits_in_uv)
1630 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1631 else
1632 while (len-- > 0) cuv += *(U8 *) s++;
1633 break;
1634 case 'U':
1635 if (len == 0) {
1636 if (explicit_length) {
1637 /* Switch to "bytes in UTF-8" mode */
1638 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
1639 else
1640 /* Should be impossible due to the need_utf8() test */
1641 Perl_croak(aTHX_ "U0 mode on a byte string");
1642 }
1643 break;
1644 }
1645 if (len > strend - s) len = strend - s;
1646 if (!checksum) {
1647 if (len && unpack_only_one) len = 1;
1648 EXTEND(SP, len);
1649 EXTEND_MORTAL(len);
1650 }
1651 while (len-- > 0 && s < strend) {
1652 STRLEN retlen;
1653 UV auv;
1654 if (utf8) {
1655 U8 result[UTF8_MAXLEN];
1656 const char *ptr = s;
1657 STRLEN len;
1658 /* Bug: warns about bad utf8 even if we are short on bytes
1659 and will break out of the loop */
1660 if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1661 'U'))
1662 break;
1663 len = UTF8SKIP(result);
1664 if (!uni_to_bytes(aTHX_ &ptr, strend,
1665 (char *) &result[1], len-1, 'U')) break;
1666 auv = utf8n_to_uvuni(result, len, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1667 s = ptr;
1668 } else {
1669 auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1670 if (retlen == (STRLEN) -1 || retlen == 0)
1671 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1672 s += retlen;
1673 }
1674 if (!checksum)
1675 mPUSHu(auv);
1676 else if (checksum > bits_in_uv)
1677 cdouble += (NV) auv;
1678 else
1679 cuv += auv;
1680 }
1681 break;
1682 case 's' | TYPE_IS_SHRIEKING:
1683#if SHORTSIZE != SIZE16
1684 while (len-- > 0) {
1685 short ashort;
1686 SHIFT_VAR(utf8, s, strend, ashort, datumtype);
1687 DO_BO_UNPACK(ashort, s);
1688 if (!checksum)
1689 mPUSHi(ashort);
1690 else if (checksum > bits_in_uv)
1691 cdouble += (NV)ashort;
1692 else
1693 cuv += ashort;
1694 }
1695 break;
1696#else
1697 /* Fallthrough! */
1698#endif
1699 case 's':
1700 while (len-- > 0) {
1701 I16 ai16;
1702
1703#if U16SIZE > SIZE16
1704 ai16 = 0;
1705#endif
1706 SHIFT16(utf8, s, strend, &ai16, datumtype);
1707 DO_BO_UNPACK(ai16, 16);
1708#if U16SIZE > SIZE16
1709 if (ai16 > 32767)
1710 ai16 -= 65536;
1711#endif
1712 if (!checksum)
1713 mPUSHi(ai16);
1714 else if (checksum > bits_in_uv)
1715 cdouble += (NV)ai16;
1716 else
1717 cuv += ai16;
1718 }
1719 break;
1720 case 'S' | TYPE_IS_SHRIEKING:
1721#if SHORTSIZE != SIZE16
1722 while (len-- > 0) {
1723 unsigned short aushort;
1724 SHIFT_VAR(utf8, s, strend, aushort, datumtype);
1725 DO_BO_UNPACK(aushort, s);
1726 if (!checksum)
1727 mPUSHu(aushort);
1728 else if (checksum > bits_in_uv)
1729 cdouble += (NV)aushort;
1730 else
1731 cuv += aushort;
1732 }
1733 break;
1734#else
1735 /* Fallhrough! */
1736#endif
1737 case 'v':
1738 case 'n':
1739 case 'S':
1740 while (len-- > 0) {
1741 U16 au16;
1742#if U16SIZE > SIZE16
1743 au16 = 0;
1744#endif
1745 SHIFT16(utf8, s, strend, &au16, datumtype);
1746 DO_BO_UNPACK(au16, 16);
1747#ifdef HAS_NTOHS
1748 if (datumtype == 'n')
1749 au16 = PerlSock_ntohs(au16);
1750#endif
1751#ifdef HAS_VTOHS
1752 if (datumtype == 'v')
1753 au16 = vtohs(au16);
1754#endif
1755 if (!checksum)
1756 mPUSHu(au16);
1757 else if (checksum > bits_in_uv)
1758 cdouble += (NV) au16;
1759 else
1760 cuv += au16;
1761 }
1762 break;
1763#ifdef PERL_PACK_CAN_SHRIEKSIGN
1764 case 'v' | TYPE_IS_SHRIEKING:
1765 case 'n' | TYPE_IS_SHRIEKING:
1766 while (len-- > 0) {
1767 I16 ai16;
1768# if U16SIZE > SIZE16
1769 ai16 = 0;
1770# endif
1771 SHIFT16(utf8, s, strend, &ai16, datumtype);
1772# ifdef HAS_NTOHS
1773 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
1774 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1775# endif /* HAS_NTOHS */
1776# ifdef HAS_VTOHS
1777 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
1778 ai16 = (I16) vtohs((U16) ai16);
1779# endif /* HAS_VTOHS */
1780 if (!checksum)
1781 mPUSHi(ai16);
1782 else if (checksum > bits_in_uv)
1783 cdouble += (NV) ai16;
1784 else
1785 cuv += ai16;
1786 }
1787 break;
1788#endif /* PERL_PACK_CAN_SHRIEKSIGN */
1789 case 'i':
1790 case 'i' | TYPE_IS_SHRIEKING:
1791 while (len-- > 0) {
1792 int aint;
1793 SHIFT_VAR(utf8, s, strend, aint, datumtype);
1794 DO_BO_UNPACK(aint, i);
1795 if (!checksum)
1796 mPUSHi(aint);
1797 else if (checksum > bits_in_uv)
1798 cdouble += (NV)aint;
1799 else
1800 cuv += aint;
1801 }
1802 break;
1803 case 'I':
1804 case 'I' | TYPE_IS_SHRIEKING:
1805 while (len-- > 0) {
1806 unsigned int auint;
1807 SHIFT_VAR(utf8, s, strend, auint, datumtype);
1808 DO_BO_UNPACK(auint, i);
1809 if (!checksum)
1810 mPUSHu(auint);
1811 else if (checksum > bits_in_uv)
1812 cdouble += (NV)auint;
1813 else
1814 cuv += auint;
1815 }
1816 break;
1817 case 'j':
1818 while (len-- > 0) {
1819 IV aiv;
1820 SHIFT_VAR(utf8, s, strend, aiv, datumtype);
1821#if IVSIZE == INTSIZE
1822 DO_BO_UNPACK(aiv, i);
1823#elif IVSIZE == LONGSIZE
1824 DO_BO_UNPACK(aiv, l);
1825#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
1826 DO_BO_UNPACK(aiv, 64);
1827#else
1828 Perl_croak(aTHX_ "'j' not supported on this platform");
1829#endif
1830 if (!checksum)
1831 mPUSHi(aiv);
1832 else if (checksum > bits_in_uv)
1833 cdouble += (NV)aiv;
1834 else
1835 cuv += aiv;
1836 }
1837 break;
1838 case 'J':
1839 while (len-- > 0) {
1840 UV auv;
1841 SHIFT_VAR(utf8, s, strend, auv, datumtype);
1842#if IVSIZE == INTSIZE
1843 DO_BO_UNPACK(auv, i);
1844#elif IVSIZE == LONGSIZE
1845 DO_BO_UNPACK(auv, l);
1846#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
1847 DO_BO_UNPACK(auv, 64);
1848#else
1849 Perl_croak(aTHX_ "'J' not supported on this platform");
1850#endif
1851 if (!checksum)
1852 mPUSHu(auv);
1853 else if (checksum > bits_in_uv)
1854 cdouble += (NV)auv;
1855 else
1856 cuv += auv;
1857 }
1858 break;
1859 case 'l' | TYPE_IS_SHRIEKING:
1860#if LONGSIZE != SIZE32
1861 while (len-- > 0) {
1862 long along;
1863 SHIFT_VAR(utf8, s, strend, along, datumtype);
1864 DO_BO_UNPACK(along, l);
1865 if (!checksum)
1866 mPUSHi(along);
1867 else if (checksum > bits_in_uv)
1868 cdouble += (NV)along;
1869 else
1870 cuv += along;
1871 }
1872 break;
1873#else
1874 /* Fallthrough! */
1875#endif
1876 case 'l':
1877 while (len-- > 0) {
1878 I32 ai32;
1879#if U32SIZE > SIZE32
1880 ai32 = 0;
1881#endif
1882 SHIFT32(utf8, s, strend, &ai32, datumtype);
1883 DO_BO_UNPACK(ai32, 32);
1884#if U32SIZE > SIZE32
1885 if (ai32 > 2147483647) ai32 -= 4294967296;
1886#endif
1887 if (!checksum)
1888 mPUSHi(ai32);
1889 else if (checksum > bits_in_uv)
1890 cdouble += (NV)ai32;
1891 else
1892 cuv += ai32;
1893 }
1894 break;
1895 case 'L' | TYPE_IS_SHRIEKING:
1896#if LONGSIZE != SIZE32
1897 while (len-- > 0) {
1898 unsigned long aulong;
1899 SHIFT_VAR(utf8, s, strend, aulong, datumtype);
1900 DO_BO_UNPACK(aulong, l);
1901 if (!checksum)
1902 mPUSHu(aulong);
1903 else if (checksum > bits_in_uv)
1904 cdouble += (NV)aulong;
1905 else
1906 cuv += aulong;
1907 }
1908 break;
1909#else
1910 /* Fall through! */
1911#endif
1912 case 'V':
1913 case 'N':
1914 case 'L':
1915 while (len-- > 0) {
1916 U32 au32;
1917#if U32SIZE > SIZE32
1918 au32 = 0;
1919#endif
1920 SHIFT32(utf8, s, strend, &au32, datumtype);
1921 DO_BO_UNPACK(au32, 32);
1922#ifdef HAS_NTOHL
1923 if (datumtype == 'N')
1924 au32 = PerlSock_ntohl(au32);
1925#endif
1926#ifdef HAS_VTOHL
1927 if (datumtype == 'V')
1928 au32 = vtohl(au32);
1929#endif
1930 if (!checksum)
1931 mPUSHu(au32);
1932 else if (checksum > bits_in_uv)
1933 cdouble += (NV)au32;
1934 else
1935 cuv += au32;
1936 }
1937 break;
1938#ifdef PERL_PACK_CAN_SHRIEKSIGN
1939 case 'V' | TYPE_IS_SHRIEKING:
1940 case 'N' | TYPE_IS_SHRIEKING:
1941 while (len-- > 0) {
1942 I32 ai32;
1943# if U32SIZE > SIZE32
1944 ai32 = 0;
1945# endif
1946 SHIFT32(utf8, s, strend, &ai32, datumtype);
1947# ifdef HAS_NTOHL
1948 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1949 ai32 = (I32)PerlSock_ntohl((U32)ai32);
1950# endif
1951# ifdef HAS_VTOHL
1952 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1953 ai32 = (I32)vtohl((U32)ai32);
1954# endif
1955 if (!checksum)
1956 mPUSHi(ai32);
1957 else if (checksum > bits_in_uv)
1958 cdouble += (NV)ai32;
1959 else
1960 cuv += ai32;
1961 }
1962 break;
1963#endif /* PERL_PACK_CAN_SHRIEKSIGN */
1964 case 'p':
1965 while (len-- > 0) {
1966 const char *aptr;
1967 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
1968 DO_BO_UNPACK_PC(aptr);
1969 /* newSVpv generates undef if aptr is NULL */
1970 mPUSHs(newSVpv(aptr, 0));
1971 }
1972 break;
1973 case 'w':
1974 {
1975 UV auv = 0;
1976 U32 bytes = 0;
1977
1978 while (len > 0 && s < strend) {
1979 U8 ch;
1980 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1981 auv = (auv << 7) | (ch & 0x7f);
1982 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
1983 if (ch < 0x80) {
1984 bytes = 0;
1985 mPUSHu(auv);
1986 len--;
1987 auv = 0;
1988 continue;
1989 }
1990 if (++bytes >= sizeof(UV)) { /* promote to string */
1991 const char *t;
1992
1993 sv = Perl_newSVpvf(aTHX_ "%.*"UVuf, (int)TYPE_DIGITS(UV), auv);
1994 while (s < strend) {
1995 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1996 sv = mul128(sv, (U8)(ch & 0x7f));
1997 if (!(ch & 0x80)) {
1998 bytes = 0;
1999 break;
2000 }
2001 }
2002 t = SvPV_nolen_const(sv);
2003 while (*t == '0')
2004 t++;
2005 sv_chop(sv, t);
2006 mPUSHs(sv);
2007 len--;
2008 auv = 0;
2009 }
2010 }
2011 if ((s >= strend) && bytes)
2012 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
2013 }
2014 break;
2015 case 'P':
2016 if (symptr->howlen == e_star)
2017 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
2018 EXTEND(SP, 1);
2019 if (s + sizeof(char*) <= strend) {
2020 char *aptr;
2021 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
2022 DO_BO_UNPACK_PC(aptr);
2023 /* newSVpvn generates undef if aptr is NULL */
2024 PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP));
2025 }
2026 break;
2027#ifdef HAS_QUAD
2028 case 'q':
2029 while (len-- > 0) {
2030 Quad_t aquad;
2031 SHIFT_VAR(utf8, s, strend, aquad, datumtype);
2032 DO_BO_UNPACK(aquad, 64);
2033 if (!checksum)
2034 mPUSHs(aquad >= IV_MIN && aquad <= IV_MAX ?
2035 newSViv((IV)aquad) : newSVnv((NV)aquad));
2036 else if (checksum > bits_in_uv)
2037 cdouble += (NV)aquad;
2038 else
2039 cuv += aquad;
2040 }
2041 break;
2042 case 'Q':
2043 while (len-- > 0) {
2044 Uquad_t auquad;
2045 SHIFT_VAR(utf8, s, strend, auquad, datumtype);
2046 DO_BO_UNPACK(auquad, 64);
2047 if (!checksum)
2048 mPUSHs(auquad <= UV_MAX ?
2049 newSVuv((UV)auquad) : newSVnv((NV)auquad));
2050 else if (checksum > bits_in_uv)
2051 cdouble += (NV)auquad;
2052 else
2053 cuv += auquad;
2054 }
2055 break;
2056#endif /* HAS_QUAD */
2057 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
2058 case 'f':
2059 while (len-- > 0) {
2060 float afloat;
2061 SHIFT_VAR(utf8, s, strend, afloat, datumtype);
2062 DO_BO_UNPACK_N(afloat, float);
2063 if (!checksum)
2064 mPUSHn(afloat);
2065 else
2066 cdouble += afloat;
2067 }
2068 break;
2069 case 'd':
2070 while (len-- > 0) {
2071 double adouble;
2072 SHIFT_VAR(utf8, s, strend, adouble, datumtype);
2073 DO_BO_UNPACK_N(adouble, double);
2074 if (!checksum)
2075 mPUSHn(adouble);
2076 else
2077 cdouble += adouble;
2078 }
2079 break;
2080 case 'F':
2081 while (len-- > 0) {
2082 NV anv;
2083 SHIFT_VAR(utf8, s, strend, anv, datumtype);
2084 DO_BO_UNPACK_N(anv, NV);
2085 if (!checksum)
2086 mPUSHn(anv);
2087 else
2088 cdouble += anv;
2089 }
2090 break;
2091#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2092 case 'D':
2093 while (len-- > 0) {
2094 long double aldouble;
2095 SHIFT_VAR(utf8, s, strend, aldouble, datumtype);
2096 DO_BO_UNPACK_N(aldouble, long double);
2097 if (!checksum)
2098 mPUSHn(aldouble);
2099 else
2100 cdouble += aldouble;
2101 }
2102 break;
2103#endif
2104 case 'u':
2105 {
2106 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
2107 sv = sv_2mortal(newSV(l));
2108 if (l) SvPOK_on(sv);
2109 }
2110 if (utf8) {
2111 while (next_uni_uu(aTHX_ &s, strend, &len)) {
2112 I32 a, b, c, d;
2113 char hunk[3];
2114
2115 while (len > 0) {
2116 next_uni_uu(aTHX_ &s, strend, &a);
2117 next_uni_uu(aTHX_ &s, strend, &b);
2118 next_uni_uu(aTHX_ &s, strend, &c);
2119 next_uni_uu(aTHX_ &s, strend, &d);
2120 hunk[0] = (char)((a << 2) | (b >> 4));
2121 hunk[1] = (char)((b << 4) | (c >> 2));
2122 hunk[2] = (char)((c << 6) | d);
2123 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2124 len -= 3;
2125 }
2126 if (s < strend) {
2127 if (*s == '\n') {
2128 s++;
2129 }
2130 else {
2131 /* possible checksum byte */
2132 const char *skip = s+UTF8SKIP(s);
2133 if (skip < strend && *skip == '\n')
2134 s = skip+1;
2135 }
2136 }
2137 }
2138 } else {
2139 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
2140 I32 a, b, c, d;
2141 char hunk[3];
2142
2143 len = PL_uudmap[*(U8*)s++] & 077;
2144 while (len > 0) {
2145 if (s < strend && ISUUCHAR(*s))
2146 a = PL_uudmap[*(U8*)s++] & 077;
2147 else
2148 a = 0;
2149 if (s < strend && ISUUCHAR(*s))
2150 b = PL_uudmap[*(U8*)s++] & 077;
2151 else
2152 b = 0;
2153 if (s < strend && ISUUCHAR(*s))
2154 c = PL_uudmap[*(U8*)s++] & 077;
2155 else
2156 c = 0;
2157 if (s < strend && ISUUCHAR(*s))
2158 d = PL_uudmap[*(U8*)s++] & 077;
2159 else
2160 d = 0;
2161 hunk[0] = (char)((a << 2) | (b >> 4));
2162 hunk[1] = (char)((b << 4) | (c >> 2));
2163 hunk[2] = (char)((c << 6) | d);
2164 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2165 len -= 3;
2166 }
2167 if (*s == '\n')
2168 s++;
2169 else /* possible checksum byte */
2170 if (s + 1 < strend && s[1] == '\n')
2171 s += 2;
2172 }
2173 }
2174 XPUSHs(sv);
2175 break;
2176 }
2177
2178 if (checksum) {
2179 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
2180 (checksum > bits_in_uv &&
2181 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
2182 NV trouble, anv;
2183
2184 anv = (NV) (1 << (checksum & 15));
2185 while (checksum >= 16) {
2186 checksum -= 16;
2187 anv *= 65536.0;
2188 }
2189 while (cdouble < 0.0)
2190 cdouble += anv;
2191 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
2192 sv = newSVnv(cdouble);
2193 }
2194 else {
2195 if (checksum < bits_in_uv) {
2196 UV mask = ((UV)1 << checksum) - 1;
2197 cuv &= mask;
2198 }
2199 sv = newSVuv(cuv);
2200 }
2201 mXPUSHs(sv);
2202 checksum = 0;
2203 }
2204
2205 if (symptr->flags & FLAG_SLASH){
2206 if (SP - PL_stack_base - start_sp_offset <= 0)
2207 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
2208 if( next_symbol(symptr) ){
2209 if( symptr->howlen == e_number )
2210 Perl_croak(aTHX_ "Count after length/code in unpack" );
2211 if( beyond ){
2212 /* ...end of char buffer then no decent length available */
2213 Perl_croak(aTHX_ "length/code after end of string in unpack" );
2214 } else {
2215 /* take top of stack (hope it's numeric) */
2216 len = POPi;
2217 if( len < 0 )
2218 Perl_croak(aTHX_ "Negative '/' count in unpack" );
2219 }
2220 } else {
2221 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
2222 }
2223 datumtype = symptr->code;
2224 explicit_length = FALSE;
2225 goto redo_switch;
2226 }
2227 }
2228
2229 if (new_s)
2230 *new_s = s;
2231 PUTBACK;
2232 return SP - PL_stack_base - start_sp_offset;
2233}
2234
2235PP(pp_unpack)
2236{
2237 dVAR;
2238 dSP;
2239 dPOPPOPssrl;
2240 I32 gimme = GIMME_V;
2241 STRLEN llen;
2242 STRLEN rlen;
2243 const char *pat = SvPV_const(left, llen);
2244 const char *s = SvPV_const(right, rlen);
2245 const char *strend = s + rlen;
2246 const char *patend = pat + llen;
2247 I32 cnt;
2248
2249 PUTBACK;
2250 cnt = unpackstring(pat, patend, s, strend,
2251 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
2252 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
2253
2254 SPAGAIN;
2255 if ( !cnt && gimme == G_SCALAR )
2256 PUSHs(&PL_sv_undef);
2257 RETURN;
2258}
2259
2260STATIC U8 *
2261doencodes(U8 *h, const char *s, I32 len)
2262{
2263 *h++ = PL_uuemap[len];
2264 while (len > 2) {
2265 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2266 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
2267 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
2268 *h++ = PL_uuemap[(077 & (s[2] & 077))];
2269 s += 3;
2270 len -= 3;
2271 }
2272 if (len > 0) {
2273 const char r = (len > 1 ? s[1] : '\0');
2274 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2275 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
2276 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
2277 *h++ = PL_uuemap[0];
2278 }
2279 *h++ = '\n';
2280 return h;
2281}
2282
2283STATIC SV *
2284S_is_an_int(pTHX_ const char *s, STRLEN l)
2285{
2286 SV *result = newSVpvn(s, l);
2287 char *const result_c = SvPV_nolen(result); /* convenience */
2288 char *out = result_c;
2289 bool skip = 1;
2290 bool ignore = 0;
2291
2292 PERL_ARGS_ASSERT_IS_AN_INT;
2293
2294 while (*s) {
2295 switch (*s) {
2296 case ' ':
2297 break;
2298 case '+':
2299 if (!skip) {
2300 SvREFCNT_dec(result);
2301 return (NULL);
2302 }
2303 break;
2304 case '0':
2305 case '1':
2306 case '2':
2307 case '3':
2308 case '4':
2309 case '5':
2310 case '6':
2311 case '7':
2312 case '8':
2313 case '9':
2314 skip = 0;
2315 if (!ignore) {
2316 *(out++) = *s;
2317 }
2318 break;
2319 case '.':
2320 ignore = 1;
2321 break;
2322 default:
2323 SvREFCNT_dec(result);
2324 return (NULL);
2325 }
2326 s++;
2327 }
2328 *(out++) = '\0';
2329 SvCUR_set(result, out - result_c);
2330 return (result);
2331}
2332
2333/* pnum must be '\0' terminated */
2334STATIC int
2335S_div128(pTHX_ SV *pnum, bool *done)
2336{
2337 STRLEN len;
2338 char * const s = SvPV(pnum, len);
2339 char *t = s;
2340 int m = 0;
2341
2342 PERL_ARGS_ASSERT_DIV128;
2343
2344 *done = 1;
2345 while (*t) {
2346 const int i = m * 10 + (*t - '0');
2347 const int r = (i >> 7); /* r < 10 */
2348 m = i & 0x7F;
2349 if (r) {
2350 *done = 0;
2351 }
2352 *(t++) = '0' + r;
2353 }
2354 *(t++) = '\0';
2355 SvCUR_set(pnum, (STRLEN) (t - s));
2356 return (m);
2357}
2358
2359/*
2360=for apidoc packlist
2361
2362The engine implementing pack() Perl function.
2363
2364=cut
2365*/
2366
2367void
2368Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, register SV **beglist, SV **endlist )
2369{
2370 dVAR;
2371 tempsym_t sym;
2372
2373 PERL_ARGS_ASSERT_PACKLIST;
2374
2375 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
2376
2377 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
2378 Also make sure any UTF8 flag is loaded */
2379 SvPV_force_nolen(cat);
2380 if (DO_UTF8(cat))
2381 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
2382
2383 (void)pack_rec( cat, &sym, beglist, endlist );
2384}
2385
2386/* like sv_utf8_upgrade, but also repoint the group start markers */
2387STATIC void
2388marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2389 STRLEN len;
2390 tempsym_t *group;
2391 const char *from_ptr, *from_start, *from_end, **marks, **m;
2392 char *to_start, *to_ptr;
2393
2394 if (SvUTF8(sv)) return;
2395
2396 from_start = SvPVX_const(sv);
2397 from_end = from_start + SvCUR(sv);
2398 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2399 if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
2400 if (from_ptr == from_end) {
2401 /* Simple case: no character needs to be changed */
2402 SvUTF8_on(sv);
2403 return;
2404 }
2405
2406 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
2407 Newx(to_start, len, char);
2408 Copy(from_start, to_start, from_ptr-from_start, char);
2409 to_ptr = to_start + (from_ptr-from_start);
2410
2411 Newx(marks, sym_ptr->level+2, const char *);
2412 for (group=sym_ptr; group; group = group->previous)
2413 marks[group->level] = from_start + group->strbeg;
2414 marks[sym_ptr->level+1] = from_end+1;
2415 for (m = marks; *m < from_ptr; m++)
2416 *m = to_start + (*m-from_start);
2417
2418 for (;from_ptr < from_end; from_ptr++) {
2419 while (*m == from_ptr) *m++ = to_ptr;
2420 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
2421 }
2422 *to_ptr = 0;
2423
2424 while (*m == from_ptr) *m++ = to_ptr;
2425 if (m != marks + sym_ptr->level+1) {
2426 Safefree(marks);
2427 Safefree(to_start);
2428 Perl_croak(aTHX_ "Assertion: marks beyond string end");
2429 }
2430 for (group=sym_ptr; group; group = group->previous)
2431 group->strbeg = marks[group->level] - to_start;
2432 Safefree(marks);
2433
2434 if (SvOOK(sv)) {
2435 if (SvIVX(sv)) {
2436 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
2437 from_start -= SvIVX(sv);
2438 SvIV_set(sv, 0);
2439 }
2440 SvFLAGS(sv) &= ~SVf_OOK;
2441 }
2442 if (SvLEN(sv) != 0)
2443 Safefree(from_start);
2444 SvPV_set(sv, to_start);
2445 SvCUR_set(sv, to_ptr - to_start);
2446 SvLEN_set(sv, len);
2447 SvUTF8_on(sv);
2448}
2449
2450/* Exponential string grower. Makes string extension effectively O(n)
2451 needed says how many extra bytes we need (not counting the final '\0')
2452 Only grows the string if there is an actual lack of space
2453*/
2454STATIC char *
2455S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
2456 const STRLEN cur = SvCUR(sv);
2457 const STRLEN len = SvLEN(sv);
2458 STRLEN extend;
2459
2460 PERL_ARGS_ASSERT_SV_EXP_GROW;
2461
2462 if (len - cur > needed) return SvPVX(sv);
2463 extend = needed > len ? needed : len;
2464 return SvGROW(sv, len+extend+1);
2465}
2466
2467STATIC
2468SV **
2469S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
2470{
2471 dVAR;
2472 tempsym_t lookahead;
2473 I32 items = endlist - beglist;
2474 bool found = next_symbol(symptr);
2475 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
2476 bool warn_utf8 = ckWARN(WARN_UTF8);
2477
2478 PERL_ARGS_ASSERT_PACK_REC;
2479
2480 if (symptr->level == 0 && found && symptr->code == 'U') {
2481 marked_upgrade(aTHX_ cat, symptr);
2482 symptr->flags |= FLAG_DO_UTF8;
2483 utf8 = 0;
2484 }
2485 symptr->strbeg = SvCUR(cat);
2486
2487 while (found) {
2488 SV *fromstr;
2489 STRLEN fromlen;
2490 I32 len;
2491 SV *lengthcode = NULL;
2492 I32 datumtype = symptr->code;
2493 howlen_t howlen = symptr->howlen;
2494 char *start = SvPVX(cat);
2495 char *cur = start + SvCUR(cat);
2496
2497#define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2498
2499 switch (howlen) {
2500 case e_star:
2501 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2502 0 : items;
2503 break;
2504 default:
2505 /* e_no_len and e_number */
2506 len = symptr->length;
2507 break;
2508 }
2509
2510 if (len) {
2511 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
2512
2513 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2514 /* We can process this letter. */
2515 STRLEN size = props & PACK_SIZE_MASK;
2516 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2517 }
2518 }
2519
2520 /* Look ahead for next symbol. Do we have code/code? */
2521 lookahead = *symptr;
2522 found = next_symbol(&lookahead);
2523 if (symptr->flags & FLAG_SLASH) {
2524 IV count;
2525 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
2526 if (strchr("aAZ", lookahead.code)) {
2527 if (lookahead.howlen == e_number) count = lookahead.length;
2528 else {
2529 if (items > 0) {
2530 if (SvGAMAGIC(*beglist)) {
2531 /* Avoid reading the active data more than once
2532 by copying it to a temporary. */
2533 STRLEN len;
2534 const char *const pv = SvPV_const(*beglist, len);
2535 SV *const temp
2536 = newSVpvn_flags(pv, len,
2537 SVs_TEMP | SvUTF8(*beglist));
2538 *beglist = temp;
2539 }
2540 count = DO_UTF8(*beglist) ?
2541 sv_len_utf8(*beglist) : sv_len(*beglist);
2542 }
2543 else count = 0;
2544 if (lookahead.code == 'Z') count++;
2545 }
2546 } else {
2547 if (lookahead.howlen == e_number && lookahead.length < items)
2548 count = lookahead.length;
2549 else count = items;
2550 }
2551 lookahead.howlen = e_number;
2552 lookahead.length = count;
2553 lengthcode = sv_2mortal(newSViv(count));
2554 }
2555
2556 /* Code inside the switch must take care to properly update
2557 cat (CUR length and '\0' termination) if it updated *cur and
2558 doesn't simply leave using break */
2559 switch(TYPE_NO_ENDIANNESS(datumtype)) {
2560 default:
2561 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2562 (int) TYPE_NO_MODIFIERS(datumtype));
2563 case '%':
2564 Perl_croak(aTHX_ "'%%' may not be used in pack");
2565 {
2566 char *from;
2567#ifdef PERL_PACK_CAN_SHRIEKSIGN
2568 case '.' | TYPE_IS_SHRIEKING:
2569#endif
2570 case '.':
2571 if (howlen == e_star) from = start;
2572 else if (len == 0) from = cur;
2573 else {
2574 tempsym_t *group = symptr;
2575
2576 while (--len && group) group = group->previous;
2577 from = group ? start + group->strbeg : start;
2578 }
2579 fromstr = NEXTFROM;
2580 len = SvIV(fromstr);
2581 goto resize;
2582#ifdef PERL_PACK_CAN_SHRIEKSIGN
2583 case '@' | TYPE_IS_SHRIEKING:
2584#endif
2585 case '@':
2586 from = start + symptr->strbeg;
2587 resize:
2588#ifdef PERL_PACK_CAN_SHRIEKSIGN
2589 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
2590#else /* PERL_PACK_CAN_SHRIEKSIGN */
2591 if (utf8)
2592#endif
2593 if (len >= 0) {
2594 while (len && from < cur) {
2595 from += UTF8SKIP(from);
2596 len--;
2597 }
2598 if (from > cur)
2599 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2600 if (len) {
2601 /* Here we know from == cur */
2602 grow:
2603 GROWING(0, cat, start, cur, len);
2604 Zero(cur, len, char);
2605 cur += len;
2606 } else if (from < cur) {
2607 len = cur - from;
2608 goto shrink;
2609 } else goto no_change;
2610 } else {
2611 cur = from;
2612 len = -len;
2613 goto utf8_shrink;
2614 }
2615 else {
2616 len -= cur - from;
2617 if (len > 0) goto grow;
2618 if (len == 0) goto no_change;
2619 len = -len;
2620 goto shrink;
2621 }
2622 break;
2623 }
2624 case '(': {
2625 tempsym_t savsym = *symptr;
2626 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2627 symptr->flags |= group_modifiers;
2628 symptr->patend = savsym.grpend;
2629 symptr->level++;
2630 symptr->previous = &lookahead;
2631 while (len--) {
2632 U32 was_utf8;
2633 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2634 else symptr->flags &= ~FLAG_PARSE_UTF8;
2635 was_utf8 = SvUTF8(cat);
2636 symptr->patptr = savsym.grpbeg;
2637 beglist = pack_rec(cat, symptr, beglist, endlist);
2638 if (SvUTF8(cat) != was_utf8)
2639 /* This had better be an upgrade while in utf8==0 mode */
2640 utf8 = 1;
2641
2642 if (savsym.howlen == e_star && beglist == endlist)
2643 break; /* No way to continue */
2644 }
2645 items = endlist - beglist;
2646 lookahead.flags = symptr->flags & ~group_modifiers;
2647 goto no_change;
2648 }
2649 case 'X' | TYPE_IS_SHRIEKING:
2650 if (!len) /* Avoid division by 0 */
2651 len = 1;
2652 if (utf8) {
2653 char *hop, *last;
2654 I32 l = len;
2655 hop = last = start;
2656 while (hop < cur) {
2657 hop += UTF8SKIP(hop);
2658 if (--l == 0) {
2659 last = hop;
2660 l = len;
2661 }
2662 }
2663 if (last > cur)
2664 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2665 cur = last;
2666 break;
2667 }
2668 len = (cur-start) % len;
2669 /* FALL THROUGH */
2670 case 'X':
2671 if (utf8) {
2672 if (len < 1) goto no_change;
2673 utf8_shrink:
2674 while (len > 0) {
2675 if (cur <= start)
2676 Perl_croak(aTHX_ "'%c' outside of string in pack",
2677 (int) TYPE_NO_MODIFIERS(datumtype));
2678 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2679 if (cur <= start)
2680 Perl_croak(aTHX_ "'%c' outside of string in pack",
2681 (int) TYPE_NO_MODIFIERS(datumtype));
2682 }
2683 len--;
2684 }
2685 } else {
2686 shrink:
2687 if (cur - start < len)
2688 Perl_croak(aTHX_ "'%c' outside of string in pack",
2689 (int) TYPE_NO_MODIFIERS(datumtype));
2690 cur -= len;
2691 }
2692 if (cur < start+symptr->strbeg) {
2693 /* Make sure group starts don't point into the void */
2694 tempsym_t *group;
2695 const STRLEN length = cur-start;
2696 for (group = symptr;
2697 group && length < group->strbeg;
2698 group = group->previous) group->strbeg = length;
2699 lookahead.strbeg = length;
2700 }
2701 break;
2702 case 'x' | TYPE_IS_SHRIEKING: {
2703 I32 ai32;
2704 if (!len) /* Avoid division by 0 */
2705 len = 1;
2706 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
2707 else ai32 = (cur - start) % len;
2708 if (ai32 == 0) goto no_change;
2709 len -= ai32;
2710 }
2711 /* FALL THROUGH */
2712 case 'x':
2713 goto grow;
2714 case 'A':
2715 case 'Z':
2716 case 'a': {
2717 const char *aptr;
2718
2719 fromstr = NEXTFROM;
2720 aptr = SvPV_const(fromstr, fromlen);
2721 if (DO_UTF8(fromstr)) {
2722 const char *end, *s;
2723
2724 if (!utf8 && !SvUTF8(cat)) {
2725 marked_upgrade(aTHX_ cat, symptr);
2726 lookahead.flags |= FLAG_DO_UTF8;
2727 lookahead.strbeg = symptr->strbeg;
2728 utf8 = 1;
2729 start = SvPVX(cat);
2730 cur = start + SvCUR(cat);
2731 }
2732 if (howlen == e_star) {
2733 if (utf8) goto string_copy;
2734 len = fromlen+1;
2735 }
2736 s = aptr;
2737 end = aptr + fromlen;
2738 fromlen = datumtype == 'Z' ? len-1 : len;
2739 while ((I32) fromlen > 0 && s < end) {
2740 s += UTF8SKIP(s);
2741 fromlen--;
2742 }
2743 if (s > end)
2744 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2745 if (utf8) {
2746 len = fromlen;
2747 if (datumtype == 'Z') len++;
2748 fromlen = s-aptr;
2749 len += fromlen;
2750
2751 goto string_copy;
2752 }
2753 fromlen = len - fromlen;
2754 if (datumtype == 'Z') fromlen--;
2755 if (howlen == e_star) {
2756 len = fromlen;
2757 if (datumtype == 'Z') len++;
2758 }
2759 GROWING(0, cat, start, cur, len);
2760 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
2761 datumtype | TYPE_IS_PACK))
2762 Perl_croak(aTHX_ "Perl bug: predicted utf8 length not available");
2763 cur += fromlen;
2764 len -= fromlen;
2765 } else if (utf8) {
2766 if (howlen == e_star) {
2767 len = fromlen;
2768 if (datumtype == 'Z') len++;
2769 }
2770 if (len <= (I32) fromlen) {
2771 fromlen = len;
2772 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2773 }
2774 /* assumes a byte expands to at most UTF8_EXPAND bytes on
2775 upgrade, so:
2776 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2777 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
2778 len -= fromlen;
2779 while (fromlen > 0) {
2780 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
2781 aptr++;
2782 fromlen--;
2783 }
2784 } else {
2785 string_copy:
2786 if (howlen == e_star) {
2787 len = fromlen;
2788 if (datumtype == 'Z') len++;
2789 }
2790 if (len <= (I32) fromlen) {
2791 fromlen = len;
2792 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2793 }
2794 GROWING(0, cat, start, cur, len);
2795 Copy(aptr, cur, fromlen, char);
2796 cur += fromlen;
2797 len -= fromlen;
2798 }
2799 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2800 cur += len;
2801 SvTAINT(cat);
2802 break;
2803 }
2804 case 'B':
2805 case 'b': {
2806 const char *str, *end;
2807 I32 l, field_len;
2808 U8 bits;
2809 bool utf8_source;
2810 U32 utf8_flags;
2811
2812 fromstr = NEXTFROM;
2813 str = SvPV_const(fromstr, fromlen);
2814 end = str + fromlen;
2815 if (DO_UTF8(fromstr)) {
2816 utf8_source = TRUE;
2817 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2818 } else {
2819 utf8_source = FALSE;
2820 utf8_flags = 0; /* Unused, but keep compilers happy */
2821 }
2822 if (howlen == e_star) len = fromlen;
2823 field_len = (len+7)/8;
2824 GROWING(utf8, cat, start, cur, field_len);
2825 if (len > (I32)fromlen) len = fromlen;
2826 bits = 0;
2827 l = 0;
2828 if (datumtype == 'B')
2829 while (l++ < len) {
2830 if (utf8_source) {
2831 UV val = 0;
2832 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2833 bits |= val & 1;
2834 } else bits |= *str++ & 1;
2835 if (l & 7) bits <<= 1;
2836 else {
2837 PUSH_BYTE(utf8, cur, bits);
2838 bits = 0;
2839 }
2840 }
2841 else
2842 /* datumtype == 'b' */
2843 while (l++ < len) {
2844 if (utf8_source) {
2845 UV val = 0;
2846 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2847 if (val & 1) bits |= 0x80;
2848 } else if (*str++ & 1)
2849 bits |= 0x80;
2850 if (l & 7) bits >>= 1;
2851 else {
2852 PUSH_BYTE(utf8, cur, bits);
2853 bits = 0;
2854 }
2855 }
2856 l--;
2857 if (l & 7) {
2858 if (datumtype == 'B')
2859 bits <<= 7 - (l & 7);
2860 else
2861 bits >>= 7 - (l & 7);
2862 PUSH_BYTE(utf8, cur, bits);
2863 l += 7;
2864 }
2865 /* Determine how many chars are left in the requested field */
2866 l /= 8;
2867 if (howlen == e_star) field_len = 0;
2868 else field_len -= l;
2869 Zero(cur, field_len, char);
2870 cur += field_len;
2871 break;
2872 }
2873 case 'H':
2874 case 'h': {
2875 const char *str, *end;
2876 I32 l, field_len;
2877 U8 bits;
2878 bool utf8_source;
2879 U32 utf8_flags;
2880
2881 fromstr = NEXTFROM;
2882 str = SvPV_const(fromstr, fromlen);
2883 end = str + fromlen;
2884 if (DO_UTF8(fromstr)) {
2885 utf8_source = TRUE;
2886 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2887 } else {
2888 utf8_source = FALSE;
2889 utf8_flags = 0; /* Unused, but keep compilers happy */
2890 }
2891 if (howlen == e_star) len = fromlen;
2892 field_len = (len+1)/2;
2893 GROWING(utf8, cat, start, cur, field_len);
2894 if (!utf8 && len > (I32)fromlen) len = fromlen;
2895 bits = 0;
2896 l = 0;
2897 if (datumtype == 'H')
2898 while (l++ < len) {
2899 if (utf8_source) {
2900 UV val = 0;
2901 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2902 if (val < 256 && isALPHA(val))
2903 bits |= (val + 9) & 0xf;
2904 else
2905 bits |= val & 0xf;
2906 } else if (isALPHA(*str))
2907 bits |= (*str++ + 9) & 0xf;
2908 else
2909 bits |= *str++ & 0xf;
2910 if (l & 1) bits <<= 4;
2911 else {
2912 PUSH_BYTE(utf8, cur, bits);
2913 bits = 0;
2914 }
2915 }
2916 else
2917 while (l++ < len) {
2918 if (utf8_source) {
2919 UV val = 0;
2920 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2921 if (val < 256 && isALPHA(val))
2922 bits |= ((val + 9) & 0xf) << 4;
2923 else
2924 bits |= (val & 0xf) << 4;
2925 } else if (isALPHA(*str))
2926 bits |= ((*str++ + 9) & 0xf) << 4;
2927 else
2928 bits |= (*str++ & 0xf) << 4;
2929 if (l & 1) bits >>= 4;
2930 else {
2931 PUSH_BYTE(utf8, cur, bits);
2932 bits = 0;
2933 }
2934 }
2935 l--;
2936 if (l & 1) {
2937 PUSH_BYTE(utf8, cur, bits);
2938 l++;
2939 }
2940 /* Determine how many chars are left in the requested field */
2941 l /= 2;
2942 if (howlen == e_star) field_len = 0;
2943 else field_len -= l;
2944 Zero(cur, field_len, char);
2945 cur += field_len;
2946 break;
2947 }
2948 case 'c':
2949 while (len-- > 0) {
2950 IV aiv;
2951 fromstr = NEXTFROM;
2952 aiv = SvIV(fromstr);
2953 if ((-128 > aiv || aiv > 127) &&
2954 ckWARN(WARN_PACK))
2955 Perl_warner(aTHX_ packWARN(WARN_PACK),
2956 "Character in 'c' format wrapped in pack");
2957 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
2958 }
2959 break;
2960 case 'C':
2961 if (len == 0) {
2962 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2963 break;
2964 }
2965 while (len-- > 0) {
2966 IV aiv;
2967 fromstr = NEXTFROM;
2968 aiv = SvIV(fromstr);
2969 if ((0 > aiv || aiv > 0xff) &&
2970 ckWARN(WARN_PACK))
2971 Perl_warner(aTHX_ packWARN(WARN_PACK),
2972 "Character in 'C' format wrapped in pack");
2973 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
2974 }
2975 break;
2976 case 'W': {
2977 char *end;
2978 U8 in_bytes = (U8)IN_BYTES;
2979
2980 end = start+SvLEN(cat)-1;
2981 if (utf8) end -= UTF8_MAXLEN-1;
2982 while (len-- > 0) {
2983 UV auv;
2984 fromstr = NEXTFROM;
2985 auv = SvUV(fromstr);
2986 if (in_bytes) auv = auv % 0x100;
2987 if (utf8) {
2988 W_utf8:
2989 if (cur > end) {
2990 *cur = '\0';
2991 SvCUR_set(cat, cur - start);
2992
2993 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2994 end = start+SvLEN(cat)-UTF8_MAXLEN;
2995 }
2996 cur = (char *) uvuni_to_utf8_flags((U8 *) cur,
2997 NATIVE_TO_UNI(auv),
2998 warn_utf8 ?
2999 0 : UNICODE_ALLOW_ANY);
3000 } else {
3001 if (auv >= 0x100) {
3002 if (!SvUTF8(cat)) {
3003 *cur = '\0';
3004 SvCUR_set(cat, cur - start);
3005 marked_upgrade(aTHX_ cat, symptr);
3006 lookahead.flags |= FLAG_DO_UTF8;
3007 lookahead.strbeg = symptr->strbeg;
3008 utf8 = 1;
3009 start = SvPVX(cat);
3010 cur = start + SvCUR(cat);
3011 end = start+SvLEN(cat)-UTF8_MAXLEN;
3012 goto W_utf8;
3013 }
3014 if (ckWARN(WARN_PACK))
3015 Perl_warner(aTHX_ packWARN(WARN_PACK),
3016 "Character in 'W' format wrapped in pack");
3017 auv &= 0xff;
3018 }
3019 if (cur >= end) {
3020 *cur = '\0';
3021 SvCUR_set(cat, cur - start);
3022 GROWING(0, cat, start, cur, len+1);
3023 end = start+SvLEN(cat)-1;
3024 }
3025 *(U8 *) cur++ = (U8)auv;
3026 }
3027 }
3028 break;
3029 }
3030 case 'U': {
3031 char *end;
3032
3033 if (len == 0) {
3034 if (!(symptr->flags & FLAG_DO_UTF8)) {
3035 marked_upgrade(aTHX_ cat, symptr);
3036 lookahead.flags |= FLAG_DO_UTF8;
3037 lookahead.strbeg = symptr->strbeg;
3038 }
3039 utf8 = 0;
3040 goto no_change;
3041 }
3042
3043 end = start+SvLEN(cat);
3044 if (!utf8) end -= UTF8_MAXLEN;
3045 while (len-- > 0) {
3046 UV auv;
3047 fromstr = NEXTFROM;
3048 auv = SvUV(fromstr);
3049 if (utf8) {
3050 U8 buffer[UTF8_MAXLEN], *endb;
3051 endb = uvuni_to_utf8_flags(buffer, auv,
3052 warn_utf8 ?
3053 0 : UNICODE_ALLOW_ANY);
3054 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
3055 *cur = '\0';
3056 SvCUR_set(cat, cur - start);
3057 GROWING(0, cat, start, cur,
3058 len+(endb-buffer)*UTF8_EXPAND);
3059 end = start+SvLEN(cat);
3060 }
3061 cur = bytes_to_uni(buffer, endb-buffer, cur);
3062 } else {
3063 if (cur >= end) {
3064 *cur = '\0';
3065 SvCUR_set(cat, cur - start);
3066 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3067 end = start+SvLEN(cat)-UTF8_MAXLEN;
3068 }
3069 cur = (char *) uvuni_to_utf8_flags((U8 *) cur, auv,
3070 warn_utf8 ?
3071 0 : UNICODE_ALLOW_ANY);
3072 }
3073 }
3074 break;
3075 }
3076 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
3077 case 'f':
3078 while (len-- > 0) {
3079 float afloat;
3080 NV anv;
3081 fromstr = NEXTFROM;
3082 anv = SvNV(fromstr);
3083#ifdef __VOS__
3084 /* VOS does not automatically map a floating-point overflow
3085 during conversion from double to float into infinity, so we
3086 do it by hand. This code should either be generalized for
3087 any OS that needs it, or removed if and when VOS implements
3088 posix-976 (suggestion to support mapping to infinity).
3089 Paul.Green@stratus.com 02-04-02. */
3090{
3091extern const float _float_constants[];
3092 if (anv > FLT_MAX)
3093 afloat = _float_constants[0]; /* single prec. inf. */
3094 else if (anv < -FLT_MAX)
3095 afloat = _float_constants[0]; /* single prec. inf. */
3096 else afloat = (float) anv;
3097}
3098#else /* __VOS__ */
3099# if defined(VMS) && !defined(__IEEE_FP)
3100 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
3101 * on Alpha; fake it if we don't have them.
3102 */
3103 if (anv > FLT_MAX)
3104 afloat = FLT_MAX;
3105 else if (anv < -FLT_MAX)
3106 afloat = -FLT_MAX;
3107 else afloat = (float)anv;
3108# else
3109 afloat = (float)anv;
3110# endif
3111#endif /* __VOS__ */
3112 DO_BO_PACK_N(afloat, float);
3113 PUSH_VAR(utf8, cur, afloat);
3114 }
3115 break;
3116 case 'd':
3117 while (len-- > 0) {
3118 double adouble;
3119 NV anv;
3120 fromstr = NEXTFROM;
3121 anv = SvNV(fromstr);
3122#ifdef __VOS__
3123 /* VOS does not automatically map a floating-point overflow
3124 during conversion from long double to double into infinity,
3125 so we do it by hand. This code should either be generalized
3126 for any OS that needs it, or removed if and when VOS
3127 implements posix-976 (suggestion to support mapping to
3128 infinity). Paul.Green@stratus.com 02-04-02. */
3129{
3130extern const double _double_constants[];
3131 if (anv > DBL_MAX)
3132 adouble = _double_constants[0]; /* double prec. inf. */
3133 else if (anv < -DBL_MAX)
3134 adouble = _double_constants[0]; /* double prec. inf. */
3135 else adouble = (double) anv;
3136}
3137#else /* __VOS__ */
3138# if defined(VMS) && !defined(__IEEE_FP)
3139 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
3140 * on Alpha; fake it if we don't have them.
3141 */
3142 if (anv > DBL_MAX)
3143 adouble = DBL_MAX;
3144 else if (anv < -DBL_MAX)
3145 adouble = -DBL_MAX;
3146 else adouble = (double)anv;
3147# else
3148 adouble = (double)anv;
3149# endif
3150#endif /* __VOS__ */
3151 DO_BO_PACK_N(adouble, double);
3152 PUSH_VAR(utf8, cur, adouble);
3153 }
3154 break;
3155 case 'F': {
3156 NV anv;
3157 Zero(&anv, 1, NV); /* can be long double with unused bits */
3158 while (len-- > 0) {
3159 fromstr = NEXTFROM;
3160 anv = SvNV(fromstr);
3161 DO_BO_PACK_N(anv, NV);
3162 PUSH_VAR(utf8, cur, anv);
3163 }
3164 break;
3165 }
3166#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
3167 case 'D': {
3168 long double aldouble;
3169 /* long doubles can have unused bits, which may be nonzero */
3170 Zero(&aldouble, 1, long double);
3171 while (len-- > 0) {
3172 fromstr = NEXTFROM;
3173 aldouble = (long double)SvNV(fromstr);
3174 DO_BO_PACK_N(aldouble, long double);
3175 PUSH_VAR(utf8, cur, aldouble);
3176 }
3177 break;
3178 }
3179#endif
3180#ifdef PERL_PACK_CAN_SHRIEKSIGN
3181 case 'n' | TYPE_IS_SHRIEKING:
3182#endif
3183 case 'n':
3184 while (len-- > 0) {
3185 I16 ai16;
3186 fromstr = NEXTFROM;
3187 ai16 = (I16)SvIV(fromstr);
3188#ifdef HAS_HTONS
3189 ai16 = PerlSock_htons(ai16);
3190#endif
3191 PUSH16(utf8, cur, &ai16);
3192 }
3193 break;
3194#ifdef PERL_PACK_CAN_SHRIEKSIGN
3195 case 'v' | TYPE_IS_SHRIEKING:
3196#endif
3197 case 'v':
3198 while (len-- > 0) {
3199 I16 ai16;
3200 fromstr = NEXTFROM;
3201 ai16 = (I16)SvIV(fromstr);
3202#ifdef HAS_HTOVS
3203 ai16 = htovs(ai16);
3204#endif
3205 PUSH16(utf8, cur, &ai16);
3206 }
3207 break;
3208 case 'S' | TYPE_IS_SHRIEKING:
3209#if SHORTSIZE != SIZE16
3210 while (len-- > 0) {
3211 unsigned short aushort;
3212 fromstr = NEXTFROM;
3213 aushort = SvUV(fromstr);
3214 DO_BO_PACK(aushort, s);
3215 PUSH_VAR(utf8, cur, aushort);
3216 }
3217 break;
3218#else
3219 /* Fall through! */
3220#endif
3221 case 'S':
3222 while (len-- > 0) {
3223 U16 au16;
3224 fromstr = NEXTFROM;
3225 au16 = (U16)SvUV(fromstr);
3226 DO_BO_PACK(au16, 16);
3227 PUSH16(utf8, cur, &au16);
3228 }
3229 break;
3230 case 's' | TYPE_IS_SHRIEKING:
3231#if SHORTSIZE != SIZE16
3232 while (len-- > 0) {
3233 short ashort;
3234 fromstr = NEXTFROM;
3235 ashort = SvIV(fromstr);
3236 DO_BO_PACK(ashort, s);
3237 PUSH_VAR(utf8, cur, ashort);
3238 }
3239 break;
3240#else
3241 /* Fall through! */
3242#endif
3243 case 's':
3244 while (len-- > 0) {
3245 I16 ai16;
3246 fromstr = NEXTFROM;
3247 ai16 = (I16)SvIV(fromstr);
3248 DO_BO_PACK(ai16, 16);
3249 PUSH16(utf8, cur, &ai16);
3250 }
3251 break;
3252 case 'I':
3253 case 'I' | TYPE_IS_SHRIEKING:
3254 while (len-- > 0) {
3255 unsigned int auint;
3256 fromstr = NEXTFROM;
3257 auint = SvUV(fromstr);
3258 DO_BO_PACK(auint, i);
3259 PUSH_VAR(utf8, cur, auint);
3260 }
3261 break;
3262 case 'j':
3263 while (len-- > 0) {
3264 IV aiv;
3265 fromstr = NEXTFROM;
3266 aiv = SvIV(fromstr);
3267#if IVSIZE == INTSIZE
3268 DO_BO_PACK(aiv, i);
3269#elif IVSIZE == LONGSIZE
3270 DO_BO_PACK(aiv, l);
3271#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
3272 DO_BO_PACK(aiv, 64);
3273#else
3274 Perl_croak(aTHX_ "'j' not supported on this platform");
3275#endif
3276 PUSH_VAR(utf8, cur, aiv);
3277 }
3278 break;
3279 case 'J':
3280 while (len-- > 0) {
3281 UV auv;
3282 fromstr = NEXTFROM;
3283 auv = SvUV(fromstr);
3284#if UVSIZE == INTSIZE
3285 DO_BO_PACK(auv, i);
3286#elif UVSIZE == LONGSIZE
3287 DO_BO_PACK(auv, l);
3288#elif defined(HAS_QUAD) && UVSIZE == U64SIZE
3289 DO_BO_PACK(auv, 64);
3290#else
3291 Perl_croak(aTHX_ "'J' not supported on this platform");
3292#endif
3293 PUSH_VAR(utf8, cur, auv);
3294 }
3295 break;
3296 case 'w':
3297 while (len-- > 0) {
3298 NV anv;
3299 fromstr = NEXTFROM;
3300 anv = SvNV(fromstr);
3301
3302 if (anv < 0) {
3303 *cur = '\0';
3304 SvCUR_set(cat, cur - start);
3305 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
3306 }
3307
3308 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
3309 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
3310 any negative IVs will have already been got by the croak()
3311 above. IOK is untrue for fractions, so we test them
3312 against UV_MAX_P1. */
3313 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
3314 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
3315 char *in = buf + sizeof(buf);
3316 UV auv = SvUV(fromstr);
3317
3318 do {
3319 *--in = (char)((auv & 0x7f) | 0x80);
3320 auv >>= 7;
3321 } while (auv);
3322 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
3323 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3324 in, (buf + sizeof(buf)) - in);
3325 } else if (SvPOKp(fromstr))
3326 goto w_string;
3327 else if (SvNOKp(fromstr)) {
3328 /* 10**NV_MAX_10_EXP is the largest power of 10
3329 so 10**(NV_MAX_10_EXP+1) is definately unrepresentable
3330 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
3331 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
3332 And with that many bytes only Inf can overflow.
3333 Some C compilers are strict about integral constant
3334 expressions so we conservatively divide by a slightly
3335 smaller integer instead of multiplying by the exact
3336 floating-point value.
3337 */
3338#ifdef NV_MAX_10_EXP
3339 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
3340 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
3341#else
3342 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
3343 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
3344#endif
3345 char *in = buf + sizeof(buf);
3346
3347 anv = Perl_floor(anv);
3348 do {
3349 const NV next = Perl_floor(anv / 128);
3350 if (in <= buf) /* this cannot happen ;-) */
3351 Perl_croak(aTHX_ "Cannot compress integer in pack");
3352 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
3353 anv = next;
3354 } while (anv > 0);
3355 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
3356 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3357 in, (buf + sizeof(buf)) - in);
3358 } else {
3359 const char *from;
3360 char *result, *in;
3361 SV *norm;
3362 STRLEN len;
3363 bool done;
3364
3365 w_string:
3366 /* Copy string and check for compliance */
3367 from = SvPV_const(fromstr, len);
3368 if ((norm = is_an_int(from, len)) == NULL)
3369 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
3370
3371 Newx(result, len, char);
3372 in = result + len;
3373 done = FALSE;
3374 while (!done) *--in = div128(norm, &done) | 0x80;
3375 result[len - 1] &= 0x7F; /* clear continue bit */
3376 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3377 in, (result + len) - in);
3378 Safefree(result);
3379 SvREFCNT_dec(norm); /* free norm */
3380 }
3381 }
3382 break;
3383 case 'i':
3384 case 'i' | TYPE_IS_SHRIEKING:
3385 while (len-- > 0) {
3386 int aint;
3387 fromstr = NEXTFROM;
3388 aint = SvIV(fromstr);
3389 DO_BO_PACK(aint, i);
3390 PUSH_VAR(utf8, cur, aint);
3391 }
3392 break;
3393#ifdef PERL_PACK_CAN_SHRIEKSIGN
3394 case 'N' | TYPE_IS_SHRIEKING:
3395#endif
3396 case 'N':
3397 while (len-- > 0) {
3398 U32 au32;
3399 fromstr = NEXTFROM;
3400 au32 = SvUV(fromstr);
3401#ifdef HAS_HTONL
3402 au32 = PerlSock_htonl(au32);
3403#endif
3404 PUSH32(utf8, cur, &au32);
3405 }
3406 break;
3407#ifdef PERL_PACK_CAN_SHRIEKSIGN
3408 case 'V' | TYPE_IS_SHRIEKING:
3409#endif
3410 case 'V':
3411 while (len-- > 0) {
3412 U32 au32;
3413 fromstr = NEXTFROM;
3414 au32 = SvUV(fromstr);
3415#ifdef HAS_HTOVL
3416 au32 = htovl(au32);
3417#endif
3418 PUSH32(utf8, cur, &au32);
3419 }
3420 break;
3421 case 'L' | TYPE_IS_SHRIEKING:
3422#if LONGSIZE != SIZE32
3423 while (len-- > 0) {
3424 unsigned long aulong;
3425 fromstr = NEXTFROM;
3426 aulong = SvUV(fromstr);
3427 DO_BO_PACK(aulong, l);
3428 PUSH_VAR(utf8, cur, aulong);
3429 }
3430 break;
3431#else
3432 /* Fall though! */
3433#endif
3434 case 'L':
3435 while (len-- > 0) {
3436 U32 au32;
3437 fromstr = NEXTFROM;
3438 au32 = SvUV(fromstr);
3439 DO_BO_PACK(au32, 32);
3440 PUSH32(utf8, cur, &au32);
3441 }
3442 break;
3443 case 'l' | TYPE_IS_SHRIEKING:
3444#if LONGSIZE != SIZE32
3445 while (len-- > 0) {
3446 long along;
3447 fromstr = NEXTFROM;
3448 along = SvIV(fromstr);
3449 DO_BO_PACK(along, l);
3450 PUSH_VAR(utf8, cur, along);
3451 }
3452 break;
3453#else
3454 /* Fall though! */
3455#endif
3456 case 'l':
3457 while (len-- > 0) {
3458 I32 ai32;
3459 fromstr = NEXTFROM;
3460 ai32 = SvIV(fromstr);
3461 DO_BO_PACK(ai32, 32);
3462 PUSH32(utf8, cur, &ai32);
3463 }
3464 break;
3465#ifdef HAS_QUAD
3466 case 'Q':
3467 while (len-- > 0) {
3468 Uquad_t auquad;
3469 fromstr = NEXTFROM;
3470 auquad = (Uquad_t) SvUV(fromstr);
3471 DO_BO_PACK(auquad, 64);
3472 PUSH_VAR(utf8, cur, auquad);
3473 }
3474 break;
3475 case 'q':
3476 while (len-- > 0) {
3477 Quad_t aquad;
3478 fromstr = NEXTFROM;
3479 aquad = (Quad_t)SvIV(fromstr);
3480 DO_BO_PACK(aquad, 64);
3481 PUSH_VAR(utf8, cur, aquad);
3482 }
3483 break;
3484#endif /* HAS_QUAD */
3485 case 'P':
3486 len = 1; /* assume SV is correct length */
3487 GROWING(utf8, cat, start, cur, sizeof(char *));
3488 /* Fall through! */
3489 case 'p':
3490 while (len-- > 0) {
3491 const char *aptr;
3492
3493 fromstr = NEXTFROM;
3494 SvGETMAGIC(fromstr);
3495 if (!SvOK(fromstr)) aptr = NULL;
3496 else {
3497 /* XXX better yet, could spirit away the string to
3498 * a safe spot and hang on to it until the result
3499 * of pack() (and all copies of the result) are
3500 * gone.
3501 */
3502 if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
3503 !SvREADONLY(fromstr))) && ckWARN(WARN_PACK)) {
3504 Perl_warner(aTHX_ packWARN(WARN_PACK),
3505 "Attempt to pack pointer to temporary value");
3506 }
3507 if (SvPOK(fromstr) || SvNIOK(fromstr))
3508 aptr = SvPV_nomg_const_nolen(fromstr);
3509 else
3510 aptr = SvPV_force_flags_nolen(fromstr, 0);
3511 }
3512 DO_BO_PACK_PC(aptr);
3513 PUSH_VAR(utf8, cur, aptr);
3514 }
3515 break;
3516 case 'u': {
3517 const char *aptr, *aend;
3518 bool from_utf8;
3519
3520 fromstr = NEXTFROM;
3521 if (len <= 2) len = 45;
3522 else len = len / 3 * 3;
3523 if (len >= 64) {
3524 if (ckWARN(WARN_PACK))
3525 Perl_warner(aTHX_ packWARN(WARN_PACK),
3526 "Field too wide in 'u' format in pack");
3527 len = 63;
3528 }
3529 aptr = SvPV_const(fromstr, fromlen);
3530 from_utf8 = DO_UTF8(fromstr);
3531 if (from_utf8) {
3532 aend = aptr + fromlen;
3533 fromlen = sv_len_utf8(fromstr);
3534 } else aend = NULL; /* Unused, but keep compilers happy */
3535 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
3536 while (fromlen > 0) {
3537 U8 *end;
3538 I32 todo;
3539 U8 hunk[1+63/3*4+1];
3540
3541 if ((I32)fromlen > len)
3542 todo = len;
3543 else
3544 todo = fromlen;
3545 if (from_utf8) {
3546 char buffer[64];
3547 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3548 'u' | TYPE_IS_PACK)) {
3549 *cur = '\0';
3550 SvCUR_set(cat, cur - start);
3551 Perl_croak(aTHX_ "Assertion: string is shorter than advertised");
3552 }
3553 end = doencodes(hunk, buffer, todo);
3554 } else {
3555 end = doencodes(hunk, aptr, todo);
3556 aptr += todo;
3557 }
3558 PUSH_BYTES(utf8, cur, hunk, end-hunk);
3559 fromlen -= todo;
3560 }
3561 break;
3562 }
3563 }
3564 *cur = '\0';
3565 SvCUR_set(cat, cur - start);
3566 no_change:
3567 *symptr = lookahead;
3568 }
3569 return beglist;
3570}
3571#undef NEXTFROM
3572
3573
3574PP(pp_pack)
3575{
3576 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
3577 register SV *cat = TARG;
3578 STRLEN fromlen;
3579 SV *pat_sv = *++MARK;
3580 register const char *pat = SvPV_const(pat_sv, fromlen);
3581 register const char *patend = pat + fromlen;
3582
3583 MARK++;
3584 sv_setpvs(cat, "");
3585 SvUTF8_off(cat);
3586
3587 packlist(cat, pat, patend, MARK, SP + 1);
3588
3589 SvSETMAGIC(cat);
3590 SP = ORIGMARK;
3591 PUSHs(cat);
3592 RETURN;
3593}
3594
3595/*
3596 * Local variables:
3597 * c-indentation-style: bsd
3598 * c-basic-offset: 4
3599 * indent-tabs-mode: t
3600 * End:
3601 *
3602 * ex: set ts=8 sts=4 sw=4 noet:
3603 */