This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Don't test the core XS code yet with PERL_DEBUG_COW > 1
[perl5.git] / pp_pack.c
... / ...
CommitLineData
1/* pp_pack.c
2 *
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
4 * 2000, 2001, 2002, 2003, 2004, 2005, by Larry Wall and others
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
9 */
10
11/*
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
16 * some salt.
17 */
18
19/* This file contains pp ("push/pop") functions that
20 * execute the opcodes that make up a perl program. A typical pp function
21 * expects to find its arguments on the stack, and usually pushes its
22 * results onto the stack, hence the 'pp' terminology. Each OP structure
23 * contains a pointer to the relevant pp_foo() function.
24 *
25 * This particular file just contains pp_pack() and pp_unpack(). See the
26 * other pp*.c files for the rest of the pp_ functions.
27 */
28
29
30#include "EXTERN.h"
31#define PERL_IN_PP_PACK_C
32#include "perl.h"
33
34/* Types used by pack/unpack */
35typedef enum {
36 e_no_len, /* no length */
37 e_number, /* number, [] */
38 e_star /* asterisk */
39} howlen_t;
40
41typedef struct tempsym {
42 const char* patptr; /* current template char */
43 const char* patend; /* one after last char */
44 const char* grpbeg; /* 1st char of ()-group */
45 const char* grpend; /* end of ()-group */
46 I32 code; /* template code (!<>) */
47 I32 length; /* length/repeat count */
48 howlen_t howlen; /* how length is given */
49 int level; /* () nesting level */
50 U32 flags; /* /=4, comma=2, pack=1 */
51 /* and group modifiers */
52 STRLEN strbeg; /* offset of group start */
53 struct tempsym *previous; /* previous group */
54} tempsym_t;
55
56#define TEMPSYM_INIT(symptr, p, e, f) \
57 STMT_START { \
58 (symptr)->patptr = (p); \
59 (symptr)->patend = (e); \
60 (symptr)->grpbeg = NULL; \
61 (symptr)->grpend = NULL; \
62 (symptr)->grpend = NULL; \
63 (symptr)->code = 0; \
64 (symptr)->length = 0; \
65 (symptr)->howlen = 0; \
66 (symptr)->level = 0; \
67 (symptr)->flags = (f); \
68 (symptr)->strbeg = 0; \
69 (symptr)->previous = NULL; \
70 } STMT_END
71
72#if PERL_VERSION >= 9
73# define PERL_PACK_CAN_BYTEORDER
74# define PERL_PACK_CAN_SHRIEKSIGN
75#endif
76
77#ifndef CHAR_BIT
78# define CHAR_BIT 8
79#endif
80/* Maximum number of bytes to which a byte can grow due to upgrade */
81#define UTF8_EXPAND 2
82
83/*
84 * Offset for integer pack/unpack.
85 *
86 * On architectures where I16 and I32 aren't really 16 and 32 bits,
87 * which for now are all Crays, pack and unpack have to play games.
88 */
89
90/*
91 * These values are required for portability of pack() output.
92 * If they're not right on your machine, then pack() and unpack()
93 * wouldn't work right anyway; you'll need to apply the Cray hack.
94 * (I'd like to check them with #if, but you can't use sizeof() in
95 * the preprocessor.) --???
96 */
97/*
98 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
99 defines are now in config.h. --Andy Dougherty April 1998
100 */
101#define SIZE16 2
102#define SIZE32 4
103
104/* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
105 --jhi Feb 1999 */
106
107#if U16SIZE > SIZE16 || U32SIZE > SIZE32
108# if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
109# define OFF16(p) ((char*)(p))
110# define OFF32(p) ((char*)(p))
111# else
112# if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
113# define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
114# define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
115# else
116 ++++ bad cray byte order
117# endif
118# endif
119#else
120# define OFF16(p) ((char *) (p))
121# define OFF32(p) ((char *) (p))
122#endif
123
124/* Only to be used inside a loop (see the break) */
125#define SHIFT16(utf8, s, strend, p, datumtype) STMT_START { \
126 if (utf8) { \
127 if (!uni_to_bytes(aTHX_ &(s), strend, OFF16(p), SIZE16, datumtype)) break; \
128 } else { \
129 Copy(s, OFF16(p), SIZE16, char); \
130 (s) += SIZE16; \
131 } \
132} STMT_END
133
134/* Only to be used inside a loop (see the break) */
135#define SHIFT32(utf8, s, strend, p, datumtype) STMT_START { \
136 if (utf8) { \
137 if (!uni_to_bytes(aTHX_ &(s), strend, OFF32(p), SIZE32, datumtype)) break; \
138 } else { \
139 Copy(s, OFF32(p), SIZE32, char); \
140 (s) += SIZE32; \
141 } \
142} STMT_END
143
144#define PUSH16(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF16(p), SIZE16)
145#define PUSH32(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF32(p), SIZE32)
146
147/* Only to be used inside a loop (see the break) */
148#define SHIFT_VAR(utf8, s, strend, var, datumtype) \
149STMT_START { \
150 if (utf8) { \
151 if (!uni_to_bytes(aTHX_ &s, strend, \
152 (char *) &var, sizeof(var), datumtype)) break;\
153 } else { \
154 Copy(s, (char *) &var, sizeof(var), char); \
155 s += sizeof(var); \
156 } \
157} STMT_END
158
159#define PUSH_VAR(utf8, aptr, var) \
160 PUSH_BYTES(utf8, aptr, &(var), sizeof(var))
161
162/* Avoid stack overflow due to pathological templates. 100 should be plenty. */
163#define MAX_SUB_TEMPLATE_LEVEL 100
164
165/* flags (note that type modifiers can also be used as flags!) */
166#define FLAG_WAS_UTF8 0x40
167#define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
168#define FLAG_UNPACK_ONLY_ONE 0x10
169#define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
170#define FLAG_SLASH 0x04
171#define FLAG_COMMA 0x02
172#define FLAG_PACK 0x01
173
174STATIC SV *
175S_mul128(pTHX_ SV *sv, U8 m)
176{
177 STRLEN len;
178 char *s = SvPV(sv, len);
179 char *t;
180
181 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
182 SV *tmpNew = newSVpvn("0000000000", 10);
183
184 sv_catsv(tmpNew, sv);
185 SvREFCNT_dec(sv); /* free old sv */
186 sv = tmpNew;
187 s = SvPV(sv, len);
188 }
189 t = s + len - 1;
190 while (!*t) /* trailing '\0'? */
191 t--;
192 while (t > s) {
193 const U32 i = ((*t - '0') << 7) + m;
194 *(t--) = '0' + (char)(i % 10);
195 m = (char)(i / 10);
196 }
197 return (sv);
198}
199
200/* Explosives and implosives. */
201
202#if 'I' == 73 && 'J' == 74
203/* On an ASCII/ISO kind of system */
204#define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
205#else
206/*
207 Some other sort of character set - use memchr() so we don't match
208 the null byte.
209 */
210#define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
211#endif
212
213/* type modifiers */
214#define TYPE_IS_SHRIEKING 0x100
215#define TYPE_IS_BIG_ENDIAN 0x200
216#define TYPE_IS_LITTLE_ENDIAN 0x400
217#define TYPE_IS_PACK 0x800
218#define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
219#define TYPE_MODIFIERS(t) ((t) & ~0xFF)
220#define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
221
222#ifdef PERL_PACK_CAN_SHRIEKSIGN
223# define SHRIEKING_ALLOWED_TYPES "sSiIlLxXnNvV@."
224#else
225# define SHRIEKING_ALLOWED_TYPES "sSiIlLxX"
226#endif
227
228#ifndef PERL_PACK_CAN_BYTEORDER
229/* Put "can't" first because it is shorter */
230# define TYPE_ENDIANNESS(t) 0
231# define TYPE_NO_ENDIANNESS(t) (t)
232
233# define ENDIANNESS_ALLOWED_TYPES ""
234
235# define DO_BO_UNPACK(var, type)
236# define DO_BO_PACK(var, type)
237# define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast)
238# define DO_BO_PACK_PTR(var, type, pre_cast, post_cast)
239# define DO_BO_UNPACK_N(var, type)
240# define DO_BO_PACK_N(var, type)
241# define DO_BO_UNPACK_P(var)
242# define DO_BO_PACK_P(var)
243
244#else /* PERL_PACK_CAN_BYTEORDER */
245
246# define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
247# define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
248
249# define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
250
251# define DO_BO_UNPACK(var, type) \
252 STMT_START { \
253 switch (TYPE_ENDIANNESS(datumtype)) { \
254 case TYPE_IS_BIG_ENDIAN: var = my_betoh ## type (var); break; \
255 case TYPE_IS_LITTLE_ENDIAN: var = my_letoh ## type (var); break; \
256 default: break; \
257 } \
258 } STMT_END
259
260# define DO_BO_PACK(var, type) \
261 STMT_START { \
262 switch (TYPE_ENDIANNESS(datumtype)) { \
263 case TYPE_IS_BIG_ENDIAN: var = my_htobe ## type (var); break; \
264 case TYPE_IS_LITTLE_ENDIAN: var = my_htole ## type (var); break; \
265 default: break; \
266 } \
267 } STMT_END
268
269# define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast) \
270 STMT_START { \
271 switch (TYPE_ENDIANNESS(datumtype)) { \
272 case TYPE_IS_BIG_ENDIAN: \
273 var = (post_cast*) my_betoh ## type ((pre_cast) var); \
274 break; \
275 case TYPE_IS_LITTLE_ENDIAN: \
276 var = (post_cast *) my_letoh ## type ((pre_cast) var); \
277 break; \
278 default: \
279 break; \
280 } \
281 } STMT_END
282
283# define DO_BO_PACK_PTR(var, type, pre_cast, post_cast) \
284 STMT_START { \
285 switch (TYPE_ENDIANNESS(datumtype)) { \
286 case TYPE_IS_BIG_ENDIAN: \
287 var = (post_cast *) my_htobe ## type ((pre_cast) var); \
288 break; \
289 case TYPE_IS_LITTLE_ENDIAN: \
290 var = (post_cast *) my_htole ## type ((pre_cast) var); \
291 break; \
292 default: \
293 break; \
294 } \
295 } STMT_END
296
297# define BO_CANT_DOIT(action, type) \
298 STMT_START { \
299 switch (TYPE_ENDIANNESS(datumtype)) { \
300 case TYPE_IS_BIG_ENDIAN: \
301 Perl_croak(aTHX_ "Can't %s big-endian %ss on this " \
302 "platform", #action, #type); \
303 break; \
304 case TYPE_IS_LITTLE_ENDIAN: \
305 Perl_croak(aTHX_ "Can't %s little-endian %ss on this " \
306 "platform", #action, #type); \
307 break; \
308 default: \
309 break; \
310 } \
311 } STMT_END
312
313# if PTRSIZE == INTSIZE
314# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, i, int, void)
315# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, i, int, void)
316# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, i, int, char)
317# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, i, int, char)
318# elif PTRSIZE == LONGSIZE
319# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, long, void)
320# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, long, void)
321# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, long, char)
322# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, long, char)
323# else
324# define DO_BO_UNPACK_P(var) BO_CANT_DOIT(unpack, pointer)
325# define DO_BO_PACK_P(var) BO_CANT_DOIT(pack, pointer)
326# endif
327
328# if defined(my_htolen) && defined(my_letohn) && \
329 defined(my_htoben) && defined(my_betohn)
330# define DO_BO_UNPACK_N(var, type) \
331 STMT_START { \
332 switch (TYPE_ENDIANNESS(datumtype)) { \
333 case TYPE_IS_BIG_ENDIAN: my_betohn(&var, sizeof(type)); break;\
334 case TYPE_IS_LITTLE_ENDIAN: my_letohn(&var, sizeof(type)); break;\
335 default: break; \
336 } \
337 } STMT_END
338
339# define DO_BO_PACK_N(var, type) \
340 STMT_START { \
341 switch (TYPE_ENDIANNESS(datumtype)) { \
342 case TYPE_IS_BIG_ENDIAN: my_htoben(&var, sizeof(type)); break;\
343 case TYPE_IS_LITTLE_ENDIAN: my_htolen(&var, sizeof(type)); break;\
344 default: break; \
345 } \
346 } STMT_END
347# else
348# define DO_BO_UNPACK_N(var, type) BO_CANT_DOIT(unpack, type)
349# define DO_BO_PACK_N(var, type) BO_CANT_DOIT(pack, type)
350# endif
351
352#endif /* PERL_PACK_CAN_BYTEORDER */
353
354#define PACK_SIZE_CANNOT_CSUM 0x80
355#define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
356#define PACK_SIZE_MASK 0x3F
357
358/* These tables are regenerated by genpacksizetables.pl (and then hand pasted
359 in). You're unlikely ever to need to regenerate them. */
360
361#if TYPE_IS_SHRIEKING != 0x100
362 ++++shriek offset should be 256
363#endif
364
365typedef U8 packprops_t;
366#if 'J'-'I' == 1
367/* ASCII */
368const packprops_t packprops[512] = {
369 /* normal */
370 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
371 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
372 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
373 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
374 0, 0, 0,
375 /* C */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
376#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
377 /* D */ LONG_DOUBLESIZE,
378#else
379 0,
380#endif
381 0,
382 /* F */ NVSIZE,
383 0, 0,
384 /* I */ sizeof(unsigned int),
385 /* J */ UVSIZE,
386 0,
387 /* L */ SIZE32,
388 0,
389 /* N */ SIZE32,
390 0, 0,
391#if defined(HAS_QUAD)
392 /* Q */ sizeof(Uquad_t),
393#else
394 0,
395#endif
396 0,
397 /* S */ SIZE16,
398 0,
399 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
400 /* V */ SIZE32,
401 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
402 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
403 /* c */ sizeof(char),
404 /* d */ sizeof(double),
405 0,
406 /* f */ sizeof(float),
407 0, 0,
408 /* i */ sizeof(int),
409 /* j */ IVSIZE,
410 0,
411 /* l */ SIZE32,
412 0,
413 /* n */ SIZE16,
414 0,
415 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
416#if defined(HAS_QUAD)
417 /* q */ sizeof(Quad_t),
418#else
419 0,
420#endif
421 0,
422 /* s */ SIZE16,
423 0, 0,
424 /* v */ SIZE16,
425 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
426 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
427 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
428 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
429 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
430 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
431 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
432 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
433 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
434 0, 0, 0, 0, 0, 0, 0, 0,
435 /* shrieking */
436 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
437 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
438 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
439 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
440 0, 0, 0, 0, 0, 0, 0, 0, 0,
441 /* I */ sizeof(unsigned int),
442 0, 0,
443 /* L */ sizeof(unsigned long),
444 0,
445#if defined(PERL_PACK_CAN_SHRIEKSIGN)
446 /* N */ SIZE32,
447#else
448 0,
449#endif
450 0, 0, 0, 0,
451 /* S */ sizeof(unsigned short),
452 0, 0,
453#if defined(PERL_PACK_CAN_SHRIEKSIGN)
454 /* V */ SIZE32,
455#else
456 0,
457#endif
458 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
459 0, 0,
460 /* i */ sizeof(int),
461 0, 0,
462 /* l */ sizeof(long),
463 0,
464#if defined(PERL_PACK_CAN_SHRIEKSIGN)
465 /* n */ SIZE16,
466#else
467 0,
468#endif
469 0, 0, 0, 0,
470 /* s */ sizeof(short),
471 0, 0,
472#if defined(PERL_PACK_CAN_SHRIEKSIGN)
473 /* v */ SIZE16,
474#else
475 0,
476#endif
477 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
478 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
479 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
480 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
481 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
482 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
483 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
484 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
485 0, 0, 0, 0, 0, 0, 0, 0, 0
486};
487#else
488/* EBCDIC (or bust) */
489const packprops_t packprops[512] = {
490 /* normal */
491 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
492 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
493 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
494 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
495 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
496 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
497 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
498 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
499 0, 0, 0,
500 /* c */ sizeof(char),
501 /* d */ sizeof(double),
502 0,
503 /* f */ sizeof(float),
504 0, 0,
505 /* i */ sizeof(int),
506 0, 0, 0, 0, 0, 0, 0,
507 /* j */ IVSIZE,
508 0,
509 /* l */ SIZE32,
510 0,
511 /* n */ SIZE16,
512 0,
513 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
514#if defined(HAS_QUAD)
515 /* q */ sizeof(Quad_t),
516#else
517 0,
518#endif
519 0, 0, 0, 0, 0, 0, 0, 0, 0,
520 /* s */ SIZE16,
521 0, 0,
522 /* v */ SIZE16,
523 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
524 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
525 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
526 /* C */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
527#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
528 /* D */ LONG_DOUBLESIZE,
529#else
530 0,
531#endif
532 0,
533 /* F */ NVSIZE,
534 0, 0,
535 /* I */ sizeof(unsigned int),
536 0, 0, 0, 0, 0, 0, 0,
537 /* J */ UVSIZE,
538 0,
539 /* L */ SIZE32,
540 0,
541 /* N */ SIZE32,
542 0, 0,
543#if defined(HAS_QUAD)
544 /* Q */ sizeof(Uquad_t),
545#else
546 0,
547#endif
548 0, 0, 0, 0, 0, 0, 0, 0, 0,
549 /* S */ SIZE16,
550 0,
551 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
552 /* V */ SIZE32,
553 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
554 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
555 0, 0, 0, 0, 0, 0, 0, 0, 0,
556 /* shrieking */
557 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
558 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
559 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
560 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
561 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
562 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
563 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
564 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
565 0, 0, 0, 0, 0, 0, 0, 0, 0,
566 /* i */ sizeof(int),
567 0, 0, 0, 0, 0, 0, 0, 0, 0,
568 /* l */ sizeof(long),
569 0,
570#if defined(PERL_PACK_CAN_SHRIEKSIGN)
571 /* n */ SIZE16,
572#else
573 0,
574#endif
575 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
576 /* s */ sizeof(short),
577 0, 0,
578#if defined(PERL_PACK_CAN_SHRIEKSIGN)
579 /* v */ SIZE16,
580#else
581 0,
582#endif
583 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
584 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
585 0, 0, 0,
586 /* I */ sizeof(unsigned int),
587 0, 0, 0, 0, 0, 0, 0, 0, 0,
588 /* L */ sizeof(unsigned long),
589 0,
590#if defined(PERL_PACK_CAN_SHRIEKSIGN)
591 /* N */ SIZE32,
592#else
593 0,
594#endif
595 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
596 /* S */ sizeof(unsigned short),
597 0, 0,
598#if defined(PERL_PACK_CAN_SHRIEKSIGN)
599 /* V */ SIZE32,
600#else
601 0,
602#endif
603 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
604 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
605};
606#endif
607
608STATIC U8
609uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
610{
611 UV val;
612 STRLEN retlen;
613 val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
614 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
615 /* We try to process malformed UTF-8 as much as possible (preferrably with
616 warnings), but these two mean we make no progress in the string and
617 might enter an infinite loop */
618 if (retlen == (STRLEN) -1 || retlen == 0)
619 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
620 (int) TYPE_NO_MODIFIERS(datumtype));
621 if (val >= 0x100) {
622 if (ckWARN(WARN_UNPACK))
623 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
624 "Character in '%c' format wrapped in unpack",
625 (int) TYPE_NO_MODIFIERS(datumtype));
626 val &= 0xff;
627 }
628 *s += retlen;
629 return (U8)val;
630}
631
632#define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
633 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
634 *(U8 *)(s)++)
635
636STATIC bool
637uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
638{
639 UV val;
640 STRLEN retlen;
641 const char *from = *s;
642 int bad = 0;
643 const U32 flags = ckWARN(WARN_UTF8) ?
644 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
645 for (;buf_len > 0; buf_len--) {
646 if (from >= end) return FALSE;
647 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
648 if (retlen == (STRLEN) -1 || retlen == 0) {
649 from += UTF8SKIP(from);
650 bad |= 1;
651 } else from += retlen;
652 if (val >= 0x100) {
653 bad |= 2;
654 val &= 0xff;
655 }
656 *(U8 *)buf++ = (U8)val;
657 }
658 /* We have enough characters for the buffer. Did we have problems ? */
659 if (bad) {
660 if (bad & 1) {
661 /* Rewalk the string fragment while warning */
662 const char *ptr;
663 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
664 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
665 if (ptr >= end) break;
666 utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
667 }
668 if (from > end) from = end;
669 }
670 if ((bad & 2) && ckWARN(WARN_UNPACK))
671 Perl_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
672 WARN_PACK : WARN_UNPACK),
673 "Character(s) in '%c' format wrapped in %s",
674 (int) TYPE_NO_MODIFIERS(datumtype),
675 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
676 }
677 *s = from;
678 return TRUE;
679}
680
681STATIC bool
682next_uni_uu(pTHX_ const char **s, const char *end, I32 *out)
683{
684 UV val;
685 STRLEN retlen;
686 val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
687 if (val >= 0x100 || !ISUUCHAR(val) ||
688 retlen == (STRLEN) -1 || retlen == 0) {
689 *out = 0;
690 return FALSE;
691 }
692 *out = PL_uudmap[val] & 077;
693 *s += retlen;
694 return TRUE;
695}
696
697STATIC void
698bytes_to_uni(pTHX_ const U8 *start, STRLEN len, char **dest) {
699 U8 buffer[UTF8_MAXLEN];
700 const U8 *end = start + len;
701 char *d = *dest;
702 while (start < end) {
703 const int length =
704 uvuni_to_utf8_flags(buffer, NATIVE_TO_UNI(*start), 0) - buffer;
705 switch(length) {
706 case 1:
707 *d++ = buffer[0];
708 break;
709 case 2:
710 *d++ = buffer[0];
711 *d++ = buffer[1];
712 break;
713 default:
714 Perl_croak(aTHX_ "Perl bug: value %d UTF-8 expands to %d bytes",
715 *start, length);
716 }
717 start++;
718 }
719 *dest = d;
720}
721
722#define PUSH_BYTES(utf8, cur, buf, len) \
723STMT_START { \
724 if (utf8) bytes_to_uni(aTHX_ (U8 *) buf, len, &(cur)); \
725 else { \
726 Copy(buf, cur, len, char); \
727 (cur) += (len); \
728 } \
729} STMT_END
730
731#define GROWING(utf8, cat, start, cur, in_len) \
732STMT_START { \
733 STRLEN glen = (in_len); \
734 if (utf8) glen *= UTF8_EXPAND; \
735 if ((cur) + glen >= (start) + SvLEN(cat)) { \
736 (start) = sv_exp_grow(aTHX_ cat, glen); \
737 (cur) = (start) + SvCUR(cat); \
738 } \
739} STMT_END
740
741#define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
742STMT_START { \
743 const STRLEN glen = (in_len); \
744 STRLEN gl = glen; \
745 if (utf8) gl *= UTF8_EXPAND; \
746 if ((cur) + gl >= (start) + SvLEN(cat)) { \
747 *cur = '\0'; \
748 SvCUR_set((cat), (cur) - (start)); \
749 (start) = sv_exp_grow(aTHX_ cat, gl); \
750 (cur) = (start) + SvCUR(cat); \
751 } \
752 PUSH_BYTES(utf8, cur, buf, glen); \
753} STMT_END
754
755#define PUSH_BYTE(utf8, s, byte) \
756STMT_START { \
757 if (utf8) { \
758 const U8 au8 = (byte); \
759 bytes_to_uni(aTHX_ &au8, 1, &(s)); \
760 } else *(U8 *)(s)++ = (byte); \
761} STMT_END
762
763/* Only to be used inside a loop (see the break) */
764#define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
765STMT_START { \
766 STRLEN retlen; \
767 if (str >= end) break; \
768 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
769 if (retlen == (STRLEN) -1 || retlen == 0) { \
770 *cur = '\0'; \
771 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
772 } \
773 str += retlen; \
774} STMT_END
775
776static const char *_action( const tempsym_t* symptr )
777{
778 return ( symptr->flags & FLAG_PACK ) ? "pack" : "unpack";
779}
780
781/* Returns the sizeof() struct described by pat */
782STATIC I32
783S_measure_struct(pTHX_ tempsym_t* symptr)
784{
785 I32 total = 0;
786
787 while (next_symbol(symptr)) {
788 I32 len;
789 int size;
790
791 switch (symptr->howlen) {
792 case e_star:
793 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
794 _action( symptr ) );
795 break;
796 default:
797 /* e_no_len and e_number */
798 len = symptr->length;
799 break;
800 }
801
802 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
803 if (!size) {
804 int star;
805 /* endianness doesn't influence the size of a type */
806 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
807 default:
808 Perl_croak(aTHX_ "Invalid type '%c' in %s",
809 (int)TYPE_NO_MODIFIERS(symptr->code),
810 _action( symptr ) );
811#ifdef PERL_PACK_CAN_SHRIEKSIGN
812 case '.' | TYPE_IS_SHRIEKING:
813 case '@' | TYPE_IS_SHRIEKING:
814#endif
815 case '@':
816 case '.':
817 case '/':
818 case 'U': /* XXXX Is it correct? */
819 case 'w':
820 case 'u':
821 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
822 (int) TYPE_NO_MODIFIERS(symptr->code),
823 _action( symptr ) );
824 case '%':
825 size = 0;
826 break;
827 case '(':
828 {
829 tempsym_t savsym = *symptr;
830 symptr->patptr = savsym.grpbeg;
831 symptr->patend = savsym.grpend;
832 /* XXXX Theoretically, we need to measure many times at
833 different positions, since the subexpression may contain
834 alignment commands, but be not of aligned length.
835 Need to detect this and croak(). */
836 size = measure_struct(symptr);
837 *symptr = savsym;
838 break;
839 }
840 case 'X' | TYPE_IS_SHRIEKING:
841 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
842 */
843 if (!len) /* Avoid division by 0 */
844 len = 1;
845 len = total % len; /* Assumed: the start is aligned. */
846 /* FALL THROUGH */
847 case 'X':
848 size = -1;
849 if (total < len)
850 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
851 break;
852 case 'x' | TYPE_IS_SHRIEKING:
853 if (!len) /* Avoid division by 0 */
854 len = 1;
855 star = total % len; /* Assumed: the start is aligned. */
856 if (star) /* Other portable ways? */
857 len = len - star;
858 else
859 len = 0;
860 /* FALL THROUGH */
861 case 'x':
862 case 'A':
863 case 'Z':
864 case 'a':
865 size = 1;
866 break;
867 case 'B':
868 case 'b':
869 len = (len + 7)/8;
870 size = 1;
871 break;
872 case 'H':
873 case 'h':
874 len = (len + 1)/2;
875 size = 1;
876 break;
877
878 case 'P':
879 len = 1;
880 size = sizeof(char*);
881 break;
882 }
883 }
884 total += len * size;
885 }
886 return total;
887}
888
889
890/* locate matching closing parenthesis or bracket
891 * returns char pointer to char after match, or NULL
892 */
893STATIC const char *
894S_group_end(pTHX_ register const char *patptr, register const char *patend, char ender)
895{
896 while (patptr < patend) {
897 const char c = *patptr++;
898
899 if (isSPACE(c))
900 continue;
901 else if (c == ender)
902 return patptr-1;
903 else if (c == '#') {
904 while (patptr < patend && *patptr != '\n')
905 patptr++;
906 continue;
907 } else if (c == '(')
908 patptr = group_end(patptr, patend, ')') + 1;
909 else if (c == '[')
910 patptr = group_end(patptr, patend, ']') + 1;
911 }
912 Perl_croak(aTHX_ "No group ending character '%c' found in template",
913 ender);
914 return 0;
915}
916
917
918/* Convert unsigned decimal number to binary.
919 * Expects a pointer to the first digit and address of length variable
920 * Advances char pointer to 1st non-digit char and returns number
921 */
922STATIC const char *
923S_get_num(pTHX_ register const char *patptr, I32 *lenptr )
924{
925 I32 len = *patptr++ - '0';
926 while (isDIGIT(*patptr)) {
927 if (len >= 0x7FFFFFFF/10)
928 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
929 len = (len * 10) + (*patptr++ - '0');
930 }
931 *lenptr = len;
932 return patptr;
933}
934
935/* The marvellous template parsing routine: Using state stored in *symptr,
936 * locates next template code and count
937 */
938STATIC bool
939S_next_symbol(pTHX_ tempsym_t* symptr )
940{
941 const char* patptr = symptr->patptr;
942 const char* patend = symptr->patend;
943
944 symptr->flags &= ~FLAG_SLASH;
945
946 while (patptr < patend) {
947 if (isSPACE(*patptr))
948 patptr++;
949 else if (*patptr == '#') {
950 patptr++;
951 while (patptr < patend && *patptr != '\n')
952 patptr++;
953 if (patptr < patend)
954 patptr++;
955 } else {
956 /* We should have found a template code */
957 I32 code = *patptr++ & 0xFF;
958 U32 inherited_modifiers = 0;
959
960 if (code == ','){ /* grandfather in commas but with a warning */
961 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
962 symptr->flags |= FLAG_COMMA;
963 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
964 "Invalid type ',' in %s", _action( symptr ) );
965 }
966 continue;
967 }
968
969 /* for '(', skip to ')' */
970 if (code == '(') {
971 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
972 Perl_croak(aTHX_ "()-group starts with a count in %s",
973 _action( symptr ) );
974 symptr->grpbeg = patptr;
975 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
976 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
977 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
978 _action( symptr ) );
979 }
980
981 /* look for group modifiers to inherit */
982 if (TYPE_ENDIANNESS(symptr->flags)) {
983 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
984 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
985 }
986
987 /* look for modifiers */
988 while (patptr < patend) {
989 const char *allowed;
990 I32 modifier;
991 switch (*patptr) {
992 case '!':
993 modifier = TYPE_IS_SHRIEKING;
994 allowed = SHRIEKING_ALLOWED_TYPES;
995 break;
996#ifdef PERL_PACK_CAN_BYTEORDER
997 case '>':
998 modifier = TYPE_IS_BIG_ENDIAN;
999 allowed = ENDIANNESS_ALLOWED_TYPES;
1000 break;
1001 case '<':
1002 modifier = TYPE_IS_LITTLE_ENDIAN;
1003 allowed = ENDIANNESS_ALLOWED_TYPES;
1004 break;
1005#endif /* PERL_PACK_CAN_BYTEORDER */
1006 default:
1007 allowed = "";
1008 modifier = 0;
1009 break;
1010 }
1011
1012 if (modifier == 0)
1013 break;
1014
1015 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
1016 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
1017 allowed, _action( symptr ) );
1018
1019 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
1020 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
1021 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
1022 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
1023 TYPE_ENDIANNESS_MASK)
1024 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
1025 *patptr, _action( symptr ) );
1026
1027 if (ckWARN(WARN_UNPACK)) {
1028 if (code & modifier)
1029 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
1030 "Duplicate modifier '%c' after '%c' in %s",
1031 *patptr, (int) TYPE_NO_MODIFIERS(code),
1032 _action( symptr ) );
1033 }
1034
1035 code |= modifier;
1036 patptr++;
1037 }
1038
1039 /* inherit modifiers */
1040 code |= inherited_modifiers;
1041
1042 /* look for count and/or / */
1043 if (patptr < patend) {
1044 if (isDIGIT(*patptr)) {
1045 patptr = get_num( patptr, &symptr->length );
1046 symptr->howlen = e_number;
1047
1048 } else if (*patptr == '*') {
1049 patptr++;
1050 symptr->howlen = e_star;
1051
1052 } else if (*patptr == '[') {
1053 const char* lenptr = ++patptr;
1054 symptr->howlen = e_number;
1055 patptr = group_end( patptr, patend, ']' ) + 1;
1056 /* what kind of [] is it? */
1057 if (isDIGIT(*lenptr)) {
1058 lenptr = get_num( lenptr, &symptr->length );
1059 if( *lenptr != ']' )
1060 Perl_croak(aTHX_ "Malformed integer in [] in %s",
1061 _action( symptr ) );
1062 } else {
1063 tempsym_t savsym = *symptr;
1064 symptr->patend = patptr-1;
1065 symptr->patptr = lenptr;
1066 savsym.length = measure_struct(symptr);
1067 *symptr = savsym;
1068 }
1069 } else {
1070 symptr->howlen = e_no_len;
1071 symptr->length = 1;
1072 }
1073
1074 /* try to find / */
1075 while (patptr < patend) {
1076 if (isSPACE(*patptr))
1077 patptr++;
1078 else if (*patptr == '#') {
1079 patptr++;
1080 while (patptr < patend && *patptr != '\n')
1081 patptr++;
1082 if (patptr < patend)
1083 patptr++;
1084 } else {
1085 if (*patptr == '/') {
1086 symptr->flags |= FLAG_SLASH;
1087 patptr++;
1088 if (patptr < patend &&
1089 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
1090 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
1091 _action( symptr ) );
1092 }
1093 break;
1094 }
1095 }
1096 } else {
1097 /* at end - no count, no / */
1098 symptr->howlen = e_no_len;
1099 symptr->length = 1;
1100 }
1101
1102 symptr->code = code;
1103 symptr->patptr = patptr;
1104 return TRUE;
1105 }
1106 }
1107 symptr->patptr = patptr;
1108 return FALSE;
1109}
1110
1111/*
1112 There is no way to cleanly handle the case where we should process the
1113 string per byte in its upgraded form while it's really in downgraded form
1114 (e.g. estimates like strend-s as an upper bound for the number of
1115 characters left wouldn't work). So if we foresee the need of this
1116 (pattern starts with U or contains U0), we want to work on the encoded
1117 version of the string. Users are advised to upgrade their pack string
1118 themselves if they need to do a lot of unpacks like this on it
1119*/
1120/* XXX These can be const */
1121STATIC bool
1122need_utf8(const char *pat, const char *patend)
1123{
1124 bool first = TRUE;
1125 while (pat < patend) {
1126 if (pat[0] == '#') {
1127 pat++;
1128 pat = (const char *) memchr(pat, '\n', patend-pat);
1129 if (!pat) return FALSE;
1130 } else if (pat[0] == 'U') {
1131 if (first || pat[1] == '0') return TRUE;
1132 } else first = FALSE;
1133 pat++;
1134 }
1135 return FALSE;
1136}
1137
1138STATIC char
1139first_symbol(const char *pat, const char *patend) {
1140 while (pat < patend) {
1141 if (pat[0] != '#') return pat[0];
1142 pat++;
1143 pat = (const char *) memchr(pat, '\n', patend-pat);
1144 if (!pat) return 0;
1145 pat++;
1146 }
1147 return 0;
1148}
1149
1150/*
1151=for apidoc unpack_str
1152
1153The engine implementing unpack() Perl function. Note: parameters strbeg, new_s
1154and ocnt are not used. This call should not be used, use unpackstring instead.
1155
1156=cut */
1157
1158I32
1159Perl_unpack_str(pTHX_ const char *pat, const char *patend, const char *s, const char *strbeg, const char *strend, char **new_s, I32 ocnt, U32 flags)
1160{
1161 tempsym_t sym;
1162 (void)strbeg;
1163 (void)new_s;
1164 (void)ocnt;
1165
1166 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
1167 else if (need_utf8(pat, patend)) {
1168 /* We probably should try to avoid this in case a scalar context call
1169 wouldn't get to the "U0" */
1170 STRLEN len = strend - s;
1171 s = (char *) bytes_to_utf8((U8 *) s, &len);
1172 SAVEFREEPV(s);
1173 strend = s + len;
1174 flags |= FLAG_DO_UTF8;
1175 }
1176
1177 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
1178 flags |= FLAG_PARSE_UTF8;
1179
1180 TEMPSYM_INIT(&sym, pat, patend, flags);
1181
1182 return unpack_rec(&sym, s, s, strend, NULL );
1183}
1184
1185/*
1186=for apidoc unpackstring
1187
1188The engine implementing unpack() Perl function. C<unpackstring> puts the
1189extracted list items on the stack and returns the number of elements.
1190Issue C<PUTBACK> before and C<SPAGAIN> after the call to this function.
1191
1192=cut */
1193
1194I32
1195Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
1196{
1197 tempsym_t sym;
1198
1199 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
1200 else if (need_utf8(pat, patend)) {
1201 /* We probably should try to avoid this in case a scalar context call
1202 wouldn't get to the "U0" */
1203 STRLEN len = strend - s;
1204 s = (char *) bytes_to_utf8((U8 *) s, &len);
1205 SAVEFREEPV(s);
1206 strend = s + len;
1207 flags |= FLAG_DO_UTF8;
1208 }
1209
1210 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
1211 flags |= FLAG_PARSE_UTF8;
1212
1213 TEMPSYM_INIT(&sym, pat, patend, flags);
1214
1215 return unpack_rec(&sym, s, s, strend, NULL );
1216}
1217
1218STATIC
1219I32
1220S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
1221{
1222 dVAR; dSP;
1223 SV *sv;
1224 const I32 start_sp_offset = SP - PL_stack_base;
1225 howlen_t howlen;
1226
1227 I32 checksum = 0;
1228 UV cuv = 0;
1229 NV cdouble = 0.0;
1230 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
1231 bool beyond = FALSE;
1232 bool explicit_length;
1233 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
1234 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
1235 symptr->strbeg = s - strbeg;
1236
1237 while (next_symbol(symptr)) {
1238 packprops_t props;
1239 I32 len;
1240 I32 datumtype = symptr->code;
1241 /* do first one only unless in list context
1242 / is implemented by unpacking the count, then popping it from the
1243 stack, so must check that we're not in the middle of a / */
1244 if ( unpack_only_one
1245 && (SP - PL_stack_base == start_sp_offset + 1)
1246 && (datumtype != '/') ) /* XXX can this be omitted */
1247 break;
1248
1249 switch (howlen = symptr->howlen) {
1250 case e_star:
1251 len = strend - strbeg; /* long enough */
1252 break;
1253 default:
1254 /* e_no_len and e_number */
1255 len = symptr->length;
1256 break;
1257 }
1258
1259 explicit_length = TRUE;
1260 redo_switch:
1261 beyond = s >= strend;
1262
1263 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
1264 if (props) {
1265 /* props nonzero means we can process this letter. */
1266 const long size = props & PACK_SIZE_MASK;
1267 const long howmany = (strend - s) / size;
1268 if (len > howmany)
1269 len = howmany;
1270
1271 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
1272 if (len && unpack_only_one) len = 1;
1273 EXTEND(SP, len);
1274 EXTEND_MORTAL(len);
1275 }
1276 }
1277
1278 switch(TYPE_NO_ENDIANNESS(datumtype)) {
1279 default:
1280 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
1281
1282 case '%':
1283 if (howlen == e_no_len)
1284 len = 16; /* len is not specified */
1285 checksum = len;
1286 cuv = 0;
1287 cdouble = 0;
1288 continue;
1289 break;
1290 case '(':
1291 {
1292 tempsym_t savsym = *symptr;
1293 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
1294 symptr->flags |= group_modifiers;
1295 symptr->patend = savsym.grpend;
1296 symptr->previous = &savsym;
1297 symptr->level++;
1298 PUTBACK;
1299 while (len--) {
1300 symptr->patptr = savsym.grpbeg;
1301 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
1302 else symptr->flags &= ~FLAG_PARSE_UTF8;
1303 unpack_rec(symptr, s, strbeg, strend, &s);
1304 if (s == strend && savsym.howlen == e_star)
1305 break; /* No way to continue */
1306 }
1307 SPAGAIN;
1308 savsym.flags = symptr->flags & ~group_modifiers;
1309 *symptr = savsym;
1310 break;
1311 }
1312#ifdef PERL_PACK_CAN_SHRIEKSIGN
1313 case '.' | TYPE_IS_SHRIEKING:
1314#endif
1315 case '.': {
1316 const char *from;
1317 SV *sv;
1318#ifdef PERL_PACK_CAN_SHRIEKSIGN
1319 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
1320#else /* PERL_PACK_CAN_SHRIEKSIGN */
1321 const bool u8 = utf8;
1322#endif
1323 if (howlen == e_star) from = strbeg;
1324 else if (len <= 0) from = s;
1325 else {
1326 tempsym_t *group = symptr;
1327
1328 while (--len && group) group = group->previous;
1329 from = group ? strbeg + group->strbeg : strbeg;
1330 }
1331 sv = from <= s ?
1332 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
1333 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
1334 XPUSHs(sv_2mortal(sv));
1335 break;
1336 }
1337#ifdef PERL_PACK_CAN_SHRIEKSIGN
1338 case '@' | TYPE_IS_SHRIEKING:
1339#endif
1340 case '@':
1341 s = strbeg + symptr->strbeg;
1342#ifdef PERL_PACK_CAN_SHRIEKSIGN
1343 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
1344#else /* PERL_PACK_CAN_SHRIEKSIGN */
1345 if (utf8)
1346#endif
1347 {
1348 while (len > 0) {
1349 if (s >= strend)
1350 Perl_croak(aTHX_ "'@' outside of string in unpack");
1351 s += UTF8SKIP(s);
1352 len--;
1353 }
1354 if (s > strend)
1355 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
1356 } else {
1357 if (strend-s < len)
1358 Perl_croak(aTHX_ "'@' outside of string in unpack");
1359 s += len;
1360 }
1361 break;
1362 case 'X' | TYPE_IS_SHRIEKING:
1363 if (!len) /* Avoid division by 0 */
1364 len = 1;
1365 if (utf8) {
1366 const char *hop, *last;
1367 I32 l = len;
1368 hop = last = strbeg;
1369 while (hop < s) {
1370 hop += UTF8SKIP(hop);
1371 if (--l == 0) {
1372 last = hop;
1373 l = len;
1374 }
1375 }
1376 if (last > s)
1377 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1378 s = last;
1379 break;
1380 }
1381 len = (s - strbeg) % len;
1382 /* FALL THROUGH */
1383 case 'X':
1384 if (utf8) {
1385 while (len > 0) {
1386 if (s <= strbeg)
1387 Perl_croak(aTHX_ "'X' outside of string in unpack");
1388 while (--s, UTF8_IS_CONTINUATION(*s)) {
1389 if (s <= strbeg)
1390 Perl_croak(aTHX_ "'X' outside of string in unpack");
1391 }
1392 len--;
1393 }
1394 } else {
1395 if (len > s - strbeg)
1396 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1397 s -= len;
1398 }
1399 break;
1400 case 'x' | TYPE_IS_SHRIEKING: {
1401 I32 ai32;
1402 if (!len) /* Avoid division by 0 */
1403 len = 1;
1404 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1405 else ai32 = (s - strbeg) % len;
1406 if (ai32 == 0) break;
1407 len -= ai32;
1408 }
1409 /* FALL THROUGH */
1410 case 'x':
1411 if (utf8) {
1412 while (len>0) {
1413 if (s >= strend)
1414 Perl_croak(aTHX_ "'x' outside of string in unpack");
1415 s += UTF8SKIP(s);
1416 len--;
1417 }
1418 } else {
1419 if (len > strend - s)
1420 Perl_croak(aTHX_ "'x' outside of string in unpack");
1421 s += len;
1422 }
1423 break;
1424 case '/':
1425 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1426 break;
1427 case 'A':
1428 case 'Z':
1429 case 'a':
1430 if (checksum) {
1431 /* Preliminary length estimate is assumed done in 'W' */
1432 if (len > strend - s) len = strend - s;
1433 goto W_checksum;
1434 }
1435 if (utf8) {
1436 I32 l;
1437 const char *hop;
1438 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1439 if (hop >= strend) {
1440 if (hop > strend)
1441 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1442 break;
1443 }
1444 }
1445 if (hop > strend)
1446 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1447 len = hop - s;
1448 } else if (len > strend - s)
1449 len = strend - s;
1450
1451 if (datumtype == 'Z') {
1452 /* 'Z' strips stuff after first null */
1453 const char *ptr, *end;
1454 end = s + len;
1455 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
1456 sv = newSVpvn(s, ptr-s);
1457 if (howlen == e_star) /* exact for 'Z*' */
1458 len = ptr-s + (ptr != strend ? 1 : 0);
1459 } else if (datumtype == 'A') {
1460 /* 'A' strips both nulls and spaces */
1461 const char *ptr;
1462 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1463 for (ptr = s+len-1; ptr >= s; ptr--)
1464 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
1465 !is_utf8_space((U8 *) ptr)) break;
1466 if (ptr >= s) ptr += UTF8SKIP(ptr);
1467 else ptr++;
1468 if (ptr > s+len)
1469 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1470 } else {
1471 for (ptr = s+len-1; ptr >= s; ptr--)
1472 if (*ptr != 0 && !isSPACE(*ptr)) break;
1473 ptr++;
1474 }
1475 sv = newSVpvn(s, ptr-s);
1476 } else sv = newSVpvn(s, len);
1477
1478 if (utf8) {
1479 SvUTF8_on(sv);
1480 /* Undo any upgrade done due to need_utf8() */
1481 if (!(symptr->flags & FLAG_WAS_UTF8))
1482 sv_utf8_downgrade(sv, 0);
1483 }
1484 XPUSHs(sv_2mortal(sv));
1485 s += len;
1486 break;
1487 case 'B':
1488 case 'b': {
1489 char *str;
1490 if (howlen == e_star || len > (strend - s) * 8)
1491 len = (strend - s) * 8;
1492 if (checksum) {
1493 if (!PL_bitcount) {
1494 int bits;
1495 Newz(601, PL_bitcount, 256, char);
1496 for (bits = 1; bits < 256; bits++) {
1497 if (bits & 1) PL_bitcount[bits]++;
1498 if (bits & 2) PL_bitcount[bits]++;
1499 if (bits & 4) PL_bitcount[bits]++;
1500 if (bits & 8) PL_bitcount[bits]++;
1501 if (bits & 16) PL_bitcount[bits]++;
1502 if (bits & 32) PL_bitcount[bits]++;
1503 if (bits & 64) PL_bitcount[bits]++;
1504 if (bits & 128) PL_bitcount[bits]++;
1505 }
1506 }
1507 if (utf8)
1508 while (len >= 8 && s < strend) {
1509 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
1510 len -= 8;
1511 }
1512 else
1513 while (len >= 8) {
1514 cuv += PL_bitcount[*(U8 *)s++];
1515 len -= 8;
1516 }
1517 if (len && s < strend) {
1518 U8 bits;
1519 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1520 if (datumtype == 'b')
1521 while (len-- > 0) {
1522 if (bits & 1) cuv++;
1523 bits >>= 1;
1524 }
1525 else
1526 while (len-- > 0) {
1527 if (bits & 0x80) cuv++;
1528 bits <<= 1;
1529 }
1530 }
1531 break;
1532 }
1533
1534 sv = sv_2mortal(NEWSV(35, len ? len : 1));
1535 SvPOK_on(sv);
1536 str = SvPVX(sv);
1537 if (datumtype == 'b') {
1538 U8 bits = 0;
1539 const I32 ai32 = len;
1540 for (len = 0; len < ai32; len++) {
1541 if (len & 7) bits >>= 1;
1542 else if (utf8) {
1543 if (s >= strend) break;
1544 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1545 } else bits = *(U8 *) s++;
1546 *str++ = bits & 1 ? '1' : '0';
1547 }
1548 } else {
1549 U8 bits = 0;
1550 const I32 ai32 = len;
1551 for (len = 0; len < ai32; len++) {
1552 if (len & 7) bits <<= 1;
1553 else if (utf8) {
1554 if (s >= strend) break;
1555 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1556 } else bits = *(U8 *) s++;
1557 *str++ = bits & 0x80 ? '1' : '0';
1558 }
1559 }
1560 *str = '\0';
1561 SvCUR_set(sv, str - SvPVX_const(sv));
1562 XPUSHs(sv);
1563 break;
1564 }
1565 case 'H':
1566 case 'h': {
1567 char *str;
1568 /* Preliminary length estimate, acceptable for utf8 too */
1569 if (howlen == e_star || len > (strend - s) * 2)
1570 len = (strend - s) * 2;
1571 sv = sv_2mortal(NEWSV(35, len ? len : 1));
1572 SvPOK_on(sv);
1573 str = SvPVX(sv);
1574 if (datumtype == 'h') {
1575 U8 bits = 0;
1576 I32 ai32 = len;
1577 for (len = 0; len < ai32; len++) {
1578 if (len & 1) bits >>= 4;
1579 else if (utf8) {
1580 if (s >= strend) break;
1581 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1582 } else bits = * (U8 *) s++;
1583 *str++ = PL_hexdigit[bits & 15];
1584 }
1585 } else {
1586 U8 bits = 0;
1587 const I32 ai32 = len;
1588 for (len = 0; len < ai32; len++) {
1589 if (len & 1) bits <<= 4;
1590 else if (utf8) {
1591 if (s >= strend) break;
1592 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1593 } else bits = *(U8 *) s++;
1594 *str++ = PL_hexdigit[(bits >> 4) & 15];
1595 }
1596 }
1597 *str = '\0';
1598 SvCUR_set(sv, str - SvPVX_const(sv));
1599 XPUSHs(sv);
1600 break;
1601 }
1602 case 'c':
1603 while (len-- > 0) {
1604 int aint = SHIFT_BYTE(utf8, s, strend, datumtype);
1605 if (aint >= 128) /* fake up signed chars */
1606 aint -= 256;
1607 if (!checksum)
1608 PUSHs(sv_2mortal(newSViv((IV)aint)));
1609 else if (checksum > bits_in_uv)
1610 cdouble += (NV)aint;
1611 else
1612 cuv += aint;
1613 }
1614 break;
1615 case 'C':
1616 case 'W':
1617 W_checksum:
1618 if (len == 0) {
1619 if (explicit_length && datumtype == 'C')
1620 /* Switch to "character" mode */
1621 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1622 break;
1623 }
1624 if (datumtype == 'C' ?
1625 (symptr->flags & FLAG_DO_UTF8) &&
1626 !(symptr->flags & FLAG_WAS_UTF8) : utf8) {
1627 while (len-- > 0 && s < strend) {
1628 STRLEN retlen;
1629 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1630 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1631 if (retlen == (STRLEN) -1 || retlen == 0)
1632 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1633 s += retlen;
1634 if (!checksum)
1635 PUSHs(sv_2mortal(newSVuv((UV) val)));
1636 else if (checksum > bits_in_uv)
1637 cdouble += (NV) val;
1638 else
1639 cuv += val;
1640 }
1641 } else if (!checksum)
1642 while (len-- > 0) {
1643 const U8 ch = *(U8 *) s++;
1644 PUSHs(sv_2mortal(newSVuv((UV) ch)));
1645 }
1646 else if (checksum > bits_in_uv)
1647 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1648 else
1649 while (len-- > 0) cuv += *(U8 *) s++;
1650 break;
1651 case 'U':
1652 if (len == 0) {
1653 if (explicit_length) {
1654 /* Switch to "bytes in UTF-8" mode */
1655 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
1656 else
1657 /* Should be impossible due to the need_utf8() test */
1658 Perl_croak(aTHX_ "U0 mode on a byte string");
1659 }
1660 break;
1661 }
1662 if (len > strend - s) len = strend - s;
1663 if (!checksum) {
1664 if (len && unpack_only_one) len = 1;
1665 EXTEND(SP, len);
1666 EXTEND_MORTAL(len);
1667 }
1668 while (len-- > 0 && s < strend) {
1669 STRLEN retlen;
1670 UV auv;
1671 if (utf8) {
1672 U8 result[UTF8_MAXLEN];
1673 const char *ptr = s;
1674 STRLEN len;
1675 /* Bug: warns about bad utf8 even if we are short on bytes
1676 and will break out of the loop */
1677 if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1678 'U'))
1679 break;
1680 len = UTF8SKIP(result);
1681 if (!uni_to_bytes(aTHX_ &ptr, strend,
1682 (char *) &result[1], len-1, 'U')) break;
1683 auv = utf8n_to_uvuni(result, len, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1684 s = ptr;
1685 } else {
1686 auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1687 if (retlen == (STRLEN) -1 || retlen == 0)
1688 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1689 s += retlen;
1690 }
1691 if (!checksum)
1692 PUSHs(sv_2mortal(newSVuv((UV) auv)));
1693 else if (checksum > bits_in_uv)
1694 cdouble += (NV) auv;
1695 else
1696 cuv += auv;
1697 }
1698 break;
1699 case 's' | TYPE_IS_SHRIEKING:
1700#if SHORTSIZE != SIZE16
1701 while (len-- > 0) {
1702 short ashort;
1703 SHIFT_VAR(utf8, s, strend, ashort, datumtype);
1704 DO_BO_UNPACK(ashort, s);
1705 if (!checksum)
1706 PUSHs(sv_2mortal(newSViv((IV)ashort)));
1707 else if (checksum > bits_in_uv)
1708 cdouble += (NV)ashort;
1709 else
1710 cuv += ashort;
1711 }
1712 break;
1713#else
1714 /* Fallthrough! */
1715#endif
1716 case 's':
1717 while (len-- > 0) {
1718 I16 ai16;
1719
1720#if U16SIZE > SIZE16
1721 ai16 = 0;
1722#endif
1723 SHIFT16(utf8, s, strend, &ai16, datumtype);
1724 DO_BO_UNPACK(ai16, 16);
1725#if U16SIZE > SIZE16
1726 if (ai16 > 32767)
1727 ai16 -= 65536;
1728#endif
1729 if (!checksum)
1730 PUSHs(sv_2mortal(newSViv((IV)ai16)));
1731 else if (checksum > bits_in_uv)
1732 cdouble += (NV)ai16;
1733 else
1734 cuv += ai16;
1735 }
1736 break;
1737 case 'S' | TYPE_IS_SHRIEKING:
1738#if SHORTSIZE != SIZE16
1739 while (len-- > 0) {
1740 unsigned short aushort;
1741 SHIFT_VAR(utf8, s, strend, aushort, datumtype);
1742 DO_BO_UNPACK(aushort, s);
1743 if (!checksum)
1744 PUSHs(sv_2mortal(newSVuv((UV) aushort)));
1745 else if (checksum > bits_in_uv)
1746 cdouble += (NV)aushort;
1747 else
1748 cuv += aushort;
1749 }
1750 break;
1751#else
1752 /* Fallhrough! */
1753#endif
1754 case 'v':
1755 case 'n':
1756 case 'S':
1757 while (len-- > 0) {
1758 U16 au16;
1759#if U16SIZE > SIZE16
1760 au16 = 0;
1761#endif
1762 SHIFT16(utf8, s, strend, &au16, datumtype);
1763 DO_BO_UNPACK(au16, 16);
1764#ifdef HAS_NTOHS
1765 if (datumtype == 'n')
1766 au16 = PerlSock_ntohs(au16);
1767#endif
1768#ifdef HAS_VTOHS
1769 if (datumtype == 'v')
1770 au16 = vtohs(au16);
1771#endif
1772 if (!checksum)
1773 PUSHs(sv_2mortal(newSVuv((UV)au16)));
1774 else if (checksum > bits_in_uv)
1775 cdouble += (NV) au16;
1776 else
1777 cuv += au16;
1778 }
1779 break;
1780#ifdef PERL_PACK_CAN_SHRIEKSIGN
1781 case 'v' | TYPE_IS_SHRIEKING:
1782 case 'n' | TYPE_IS_SHRIEKING:
1783 while (len-- > 0) {
1784 I16 ai16;
1785# if U16SIZE > SIZE16
1786 ai16 = 0;
1787# endif
1788 SHIFT16(utf8, s, strend, &ai16, datumtype);
1789# ifdef HAS_NTOHS
1790 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
1791 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1792# endif /* HAS_NTOHS */
1793# ifdef HAS_VTOHS
1794 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
1795 ai16 = (I16) vtohs((U16) ai16);
1796# endif /* HAS_VTOHS */
1797 if (!checksum)
1798 PUSHs(sv_2mortal(newSViv((IV)ai16)));
1799 else if (checksum > bits_in_uv)
1800 cdouble += (NV) ai16;
1801 else
1802 cuv += ai16;
1803 }
1804 break;
1805#endif /* PERL_PACK_CAN_SHRIEKSIGN */
1806 case 'i':
1807 case 'i' | TYPE_IS_SHRIEKING:
1808 while (len-- > 0) {
1809 int aint;
1810 SHIFT_VAR(utf8, s, strend, aint, datumtype);
1811 DO_BO_UNPACK(aint, i);
1812 if (!checksum)
1813 PUSHs(sv_2mortal(newSViv((IV)aint)));
1814 else if (checksum > bits_in_uv)
1815 cdouble += (NV)aint;
1816 else
1817 cuv += aint;
1818 }
1819 break;
1820 case 'I':
1821 case 'I' | TYPE_IS_SHRIEKING:
1822 while (len-- > 0) {
1823 unsigned int auint;
1824 SHIFT_VAR(utf8, s, strend, auint, datumtype);
1825 DO_BO_UNPACK(auint, i);
1826 if (!checksum)
1827 PUSHs(sv_2mortal(newSVuv((UV)auint)));
1828 else if (checksum > bits_in_uv)
1829 cdouble += (NV)auint;
1830 else
1831 cuv += auint;
1832 }
1833 break;
1834 case 'j':
1835 while (len-- > 0) {
1836 IV aiv;
1837 SHIFT_VAR(utf8, s, strend, aiv, datumtype);
1838#if IVSIZE == INTSIZE
1839 DO_BO_UNPACK(aiv, i);
1840#elif IVSIZE == LONGSIZE
1841 DO_BO_UNPACK(aiv, l);
1842#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
1843 DO_BO_UNPACK(aiv, 64);
1844#else
1845 Perl_croak(aTHX_ "'j' not supported on this platform");
1846#endif
1847 if (!checksum)
1848 PUSHs(sv_2mortal(newSViv(aiv)));
1849 else if (checksum > bits_in_uv)
1850 cdouble += (NV)aiv;
1851 else
1852 cuv += aiv;
1853 }
1854 break;
1855 case 'J':
1856 while (len-- > 0) {
1857 UV auv;
1858 SHIFT_VAR(utf8, s, strend, auv, datumtype);
1859#if IVSIZE == INTSIZE
1860 DO_BO_UNPACK(auv, i);
1861#elif IVSIZE == LONGSIZE
1862 DO_BO_UNPACK(auv, l);
1863#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
1864 DO_BO_UNPACK(auv, 64);
1865#else
1866 Perl_croak(aTHX_ "'J' not supported on this platform");
1867#endif
1868 if (!checksum)
1869 PUSHs(sv_2mortal(newSVuv(auv)));
1870 else if (checksum > bits_in_uv)
1871 cdouble += (NV)auv;
1872 else
1873 cuv += auv;
1874 }
1875 break;
1876 case 'l' | TYPE_IS_SHRIEKING:
1877#if LONGSIZE != SIZE32
1878 while (len-- > 0) {
1879 long along;
1880 SHIFT_VAR(utf8, s, strend, along, datumtype);
1881 DO_BO_UNPACK(along, l);
1882 if (!checksum)
1883 PUSHs(sv_2mortal(newSViv((IV)along)));
1884 else if (checksum > bits_in_uv)
1885 cdouble += (NV)along;
1886 else
1887 cuv += along;
1888 }
1889 break;
1890#else
1891 /* Fallthrough! */
1892#endif
1893 case 'l':
1894 while (len-- > 0) {
1895 I32 ai32;
1896#if U32SIZE > SIZE32
1897 ai32 = 0;
1898#endif
1899 SHIFT32(utf8, s, strend, &ai32, datumtype);
1900 DO_BO_UNPACK(ai32, 32);
1901#if U32SIZE > SIZE32
1902 if (ai32 > 2147483647) ai32 -= 4294967296;
1903#endif
1904 if (!checksum)
1905 PUSHs(sv_2mortal(newSViv((IV)ai32)));
1906 else if (checksum > bits_in_uv)
1907 cdouble += (NV)ai32;
1908 else
1909 cuv += ai32;
1910 }
1911 break;
1912 case 'L' | TYPE_IS_SHRIEKING:
1913#if LONGSIZE != SIZE32
1914 while (len-- > 0) {
1915 unsigned long aulong;
1916 SHIFT_VAR(utf8, s, strend, aulong, datumtype);
1917 DO_BO_UNPACK(aulong, l);
1918 if (!checksum)
1919 PUSHs(sv_2mortal(newSVuv((UV)aulong)));
1920 else if (checksum > bits_in_uv)
1921 cdouble += (NV)aulong;
1922 else
1923 cuv += aulong;
1924 }
1925 break;
1926#else
1927 /* Fall through! */
1928#endif
1929 case 'V':
1930 case 'N':
1931 case 'L':
1932 while (len-- > 0) {
1933 U32 au32;
1934#if U32SIZE > SIZE32
1935 au32 = 0;
1936#endif
1937 SHIFT32(utf8, s, strend, &au32, datumtype);
1938 DO_BO_UNPACK(au32, 32);
1939#ifdef HAS_NTOHL
1940 if (datumtype == 'N')
1941 au32 = PerlSock_ntohl(au32);
1942#endif
1943#ifdef HAS_VTOHL
1944 if (datumtype == 'V')
1945 au32 = vtohl(au32);
1946#endif
1947 if (!checksum)
1948 PUSHs(sv_2mortal(newSVuv((UV)au32)));
1949 else if (checksum > bits_in_uv)
1950 cdouble += (NV)au32;
1951 else
1952 cuv += au32;
1953 }
1954 break;
1955#ifdef PERL_PACK_CAN_SHRIEKSIGN
1956 case 'V' | TYPE_IS_SHRIEKING:
1957 case 'N' | TYPE_IS_SHRIEKING:
1958 while (len-- > 0) {
1959 I32 ai32;
1960# if U32SIZE > SIZE32
1961 ai32 = 0;
1962# endif
1963 SHIFT32(utf8, s, strend, &ai32, datumtype);
1964# ifdef HAS_NTOHL
1965 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1966 ai32 = (I32)PerlSock_ntohl((U32)ai32);
1967# endif
1968# ifdef HAS_VTOHL
1969 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1970 ai32 = (I32)vtohl((U32)ai32);
1971# endif
1972 if (!checksum)
1973 PUSHs(sv_2mortal(newSViv((IV)ai32)));
1974 else if (checksum > bits_in_uv)
1975 cdouble += (NV)ai32;
1976 else
1977 cuv += ai32;
1978 }
1979 break;
1980#endif /* PERL_PACK_CAN_SHRIEKSIGN */
1981 case 'p':
1982 while (len-- > 0) {
1983 const char *aptr;
1984 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
1985 DO_BO_UNPACK_PC(aptr);
1986 /* newSVpv generates undef if aptr is NULL */
1987 PUSHs(sv_2mortal(newSVpv(aptr, 0)));
1988 }
1989 break;
1990 case 'w':
1991 {
1992 UV auv = 0;
1993 U32 bytes = 0;
1994
1995 while (len > 0 && s < strend) {
1996 U8 ch;
1997 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1998 auv = (auv << 7) | (ch & 0x7f);
1999 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
2000 if (ch < 0x80) {
2001 bytes = 0;
2002 PUSHs(sv_2mortal(newSVuv(auv)));
2003 len--;
2004 auv = 0;
2005 continue;
2006 }
2007 if (++bytes >= sizeof(UV)) { /* promote to string */
2008 const char *t;
2009
2010 sv = Perl_newSVpvf(aTHX_ "%.*"UVf, (int)TYPE_DIGITS(UV), auv);
2011 while (s < strend) {
2012 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
2013 sv = mul128(sv, (U8)(ch & 0x7f));
2014 if (!(ch & 0x80)) {
2015 bytes = 0;
2016 break;
2017 }
2018 }
2019 t = SvPV_nolen_const(sv);
2020 while (*t == '0')
2021 t++;
2022 sv_chop(sv, t);
2023 PUSHs(sv_2mortal(sv));
2024 len--;
2025 auv = 0;
2026 }
2027 }
2028 if ((s >= strend) && bytes)
2029 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
2030 }
2031 break;
2032 case 'P':
2033 if (symptr->howlen == e_star)
2034 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
2035 EXTEND(SP, 1);
2036 if (sizeof(char*) <= strend - s) {
2037 char *aptr;
2038 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
2039 DO_BO_UNPACK_PC(aptr);
2040 /* newSVpvn generates undef if aptr is NULL */
2041 PUSHs(sv_2mortal(newSVpvn(aptr, len)));
2042 }
2043 break;
2044#ifdef HAS_QUAD
2045 case 'q':
2046 while (len-- > 0) {
2047 Quad_t aquad;
2048 SHIFT_VAR(utf8, s, strend, aquad, datumtype);
2049 DO_BO_UNPACK(aquad, 64);
2050 if (!checksum)
2051 PUSHs(sv_2mortal(aquad >= IV_MIN && aquad <= IV_MAX ?
2052 newSViv((IV)aquad) : newSVnv((NV)aquad)));
2053 else if (checksum > bits_in_uv)
2054 cdouble += (NV)aquad;
2055 else
2056 cuv += aquad;
2057 }
2058 break;
2059 case 'Q':
2060 while (len-- > 0) {
2061 Uquad_t auquad;
2062 SHIFT_VAR(utf8, s, strend, auquad, datumtype);
2063 DO_BO_UNPACK(auquad, 64);
2064 if (!checksum)
2065 PUSHs(sv_2mortal(auquad <= UV_MAX ?
2066 newSVuv((UV)auquad):newSVnv((NV)auquad)));
2067 else if (checksum > bits_in_uv)
2068 cdouble += (NV)auquad;
2069 else
2070 cuv += auquad;
2071 }
2072 break;
2073#endif /* HAS_QUAD */
2074 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
2075 case 'f':
2076 while (len-- > 0) {
2077 float afloat;
2078 SHIFT_VAR(utf8, s, strend, afloat, datumtype);
2079 DO_BO_UNPACK_N(afloat, float);
2080 if (!checksum)
2081 PUSHs(sv_2mortal(newSVnv((NV)afloat)));
2082 else
2083 cdouble += afloat;
2084 }
2085 break;
2086 case 'd':
2087 while (len-- > 0) {
2088 double adouble;
2089 SHIFT_VAR(utf8, s, strend, adouble, datumtype);
2090 DO_BO_UNPACK_N(adouble, double);
2091 if (!checksum)
2092 PUSHs(sv_2mortal(newSVnv((NV)adouble)));
2093 else
2094 cdouble += adouble;
2095 }
2096 break;
2097 case 'F':
2098 while (len-- > 0) {
2099 NV anv;
2100 SHIFT_VAR(utf8, s, strend, anv, datumtype);
2101 DO_BO_UNPACK_N(anv, NV);
2102 if (!checksum)
2103 PUSHs(sv_2mortal(newSVnv(anv)));
2104 else
2105 cdouble += anv;
2106 }
2107 break;
2108#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2109 case 'D':
2110 while (len-- > 0) {
2111 long double aldouble;
2112 SHIFT_VAR(utf8, s, strend, aldouble, datumtype);
2113 DO_BO_UNPACK_N(aldouble, long double);
2114 if (!checksum)
2115 PUSHs(sv_2mortal(newSVnv((NV)aldouble)));
2116 else
2117 cdouble += aldouble;
2118 }
2119 break;
2120#endif
2121 case 'u':
2122 /* MKS:
2123 * Initialise the decode mapping. By using a table driven
2124 * algorithm, the code will be character-set independent
2125 * (and just as fast as doing character arithmetic)
2126 */
2127 if (PL_uudmap['M'] == 0) {
2128 int i;
2129
2130 for (i = 0; i < sizeof(PL_uuemap); i += 1)
2131 PL_uudmap[(U8)PL_uuemap[i]] = i;
2132 /*
2133 * Because ' ' and '`' map to the same value,
2134 * we need to decode them both the same.
2135 */
2136 PL_uudmap[' '] = 0;
2137 }
2138 {
2139 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
2140 sv = sv_2mortal(NEWSV(42, l));
2141 if (l) SvPOK_on(sv);
2142 }
2143 if (utf8) {
2144 while (next_uni_uu(aTHX_ &s, strend, &len)) {
2145 I32 a, b, c, d;
2146 char hunk[4];
2147
2148 hunk[3] = '\0';
2149 while (len > 0) {
2150 next_uni_uu(aTHX_ &s, strend, &a);
2151 next_uni_uu(aTHX_ &s, strend, &b);
2152 next_uni_uu(aTHX_ &s, strend, &c);
2153 next_uni_uu(aTHX_ &s, strend, &d);
2154 hunk[0] = (char)((a << 2) | (b >> 4));
2155 hunk[1] = (char)((b << 4) | (c >> 2));
2156 hunk[2] = (char)((c << 6) | d);
2157 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2158 len -= 3;
2159 }
2160 if (s < strend) {
2161 if (*s == '\n') {
2162 s++;
2163 }
2164 else {
2165 /* possible checksum byte */
2166 const char *skip = s+UTF8SKIP(s);
2167 if (skip < strend && *skip == '\n')
2168 s = skip+1;
2169 }
2170 }
2171 }
2172 } else {
2173 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
2174 I32 a, b, c, d;
2175 char hunk[4];
2176
2177 hunk[3] = '\0';
2178 len = PL_uudmap[*(U8*)s++] & 077;
2179 while (len > 0) {
2180 if (s < strend && ISUUCHAR(*s))
2181 a = PL_uudmap[*(U8*)s++] & 077;
2182 else
2183 a = 0;
2184 if (s < strend && ISUUCHAR(*s))
2185 b = PL_uudmap[*(U8*)s++] & 077;
2186 else
2187 b = 0;
2188 if (s < strend && ISUUCHAR(*s))
2189 c = PL_uudmap[*(U8*)s++] & 077;
2190 else
2191 c = 0;
2192 if (s < strend && ISUUCHAR(*s))
2193 d = PL_uudmap[*(U8*)s++] & 077;
2194 else
2195 d = 0;
2196 hunk[0] = (char)((a << 2) | (b >> 4));
2197 hunk[1] = (char)((b << 4) | (c >> 2));
2198 hunk[2] = (char)((c << 6) | d);
2199 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2200 len -= 3;
2201 }
2202 if (*s == '\n')
2203 s++;
2204 else /* possible checksum byte */
2205 if (s + 1 < strend && s[1] == '\n')
2206 s += 2;
2207 }
2208 }
2209 XPUSHs(sv);
2210 break;
2211 }
2212
2213 if (checksum) {
2214 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
2215 (checksum > bits_in_uv &&
2216 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
2217 NV trouble, anv;
2218
2219 anv = (NV) (1 << (checksum & 15));
2220 while (checksum >= 16) {
2221 checksum -= 16;
2222 anv *= 65536.0;
2223 }
2224 while (cdouble < 0.0)
2225 cdouble += anv;
2226 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
2227 sv = newSVnv(cdouble);
2228 }
2229 else {
2230 if (checksum < bits_in_uv) {
2231 UV mask = ((UV)1 << checksum) - 1;
2232 cuv &= mask;
2233 }
2234 sv = newSVuv(cuv);
2235 }
2236 XPUSHs(sv_2mortal(sv));
2237 checksum = 0;
2238 }
2239
2240 if (symptr->flags & FLAG_SLASH){
2241 if (SP - PL_stack_base - start_sp_offset <= 0)
2242 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
2243 if( next_symbol(symptr) ){
2244 if( symptr->howlen == e_number )
2245 Perl_croak(aTHX_ "Count after length/code in unpack" );
2246 if( beyond ){
2247 /* ...end of char buffer then no decent length available */
2248 Perl_croak(aTHX_ "length/code after end of string in unpack" );
2249 } else {
2250 /* take top of stack (hope it's numeric) */
2251 len = POPi;
2252 if( len < 0 )
2253 Perl_croak(aTHX_ "Negative '/' count in unpack" );
2254 }
2255 } else {
2256 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
2257 }
2258 datumtype = symptr->code;
2259 explicit_length = FALSE;
2260 goto redo_switch;
2261 }
2262 }
2263
2264 if (new_s)
2265 *new_s = s;
2266 PUTBACK;
2267 return SP - PL_stack_base - start_sp_offset;
2268}
2269
2270PP(pp_unpack)
2271{
2272 dSP;
2273 dPOPPOPssrl;
2274 I32 gimme = GIMME_V;
2275 STRLEN llen;
2276 STRLEN rlen;
2277 const char *pat = SvPV_const(left, llen);
2278 const char *s = SvPV_const(right, rlen);
2279 const char *strend = s + rlen;
2280 const char *patend = pat + llen;
2281 I32 cnt;
2282
2283 PUTBACK;
2284 cnt = unpackstring(pat, patend, s, strend,
2285 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
2286 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
2287
2288 SPAGAIN;
2289 if ( !cnt && gimme == G_SCALAR )
2290 PUSHs(&PL_sv_undef);
2291 RETURN;
2292}
2293
2294STATIC U8 *
2295doencodes(U8 *h, const char *s, I32 len)
2296{
2297 *h++ = PL_uuemap[len];
2298 while (len > 2) {
2299 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2300 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
2301 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
2302 *h++ = PL_uuemap[(077 & (s[2] & 077))];
2303 s += 3;
2304 len -= 3;
2305 }
2306 if (len > 0) {
2307 const char r = (len > 1 ? s[1] : '\0');
2308 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2309 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
2310 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
2311 *h++ = PL_uuemap[0];
2312 }
2313 *h++ = '\n';
2314 return h;
2315}
2316
2317STATIC SV *
2318S_is_an_int(pTHX_ const char *s, STRLEN l)
2319{
2320 STRLEN n_a;
2321 SV *result = newSVpvn(s, l);
2322 char *result_c = SvPV(result, n_a); /* convenience */
2323 char *out = result_c;
2324 bool skip = 1;
2325 bool ignore = 0;
2326
2327 while (*s) {
2328 switch (*s) {
2329 case ' ':
2330 break;
2331 case '+':
2332 if (!skip) {
2333 SvREFCNT_dec(result);
2334 return (NULL);
2335 }
2336 break;
2337 case '0':
2338 case '1':
2339 case '2':
2340 case '3':
2341 case '4':
2342 case '5':
2343 case '6':
2344 case '7':
2345 case '8':
2346 case '9':
2347 skip = 0;
2348 if (!ignore) {
2349 *(out++) = *s;
2350 }
2351 break;
2352 case '.':
2353 ignore = 1;
2354 break;
2355 default:
2356 SvREFCNT_dec(result);
2357 return (NULL);
2358 }
2359 s++;
2360 }
2361 *(out++) = '\0';
2362 SvCUR_set(result, out - result_c);
2363 return (result);
2364}
2365
2366/* pnum must be '\0' terminated */
2367STATIC int
2368S_div128(pTHX_ SV *pnum, bool *done)
2369{
2370 STRLEN len;
2371 char *s = SvPV(pnum, len);
2372 int m = 0;
2373 int r = 0;
2374 char *t = s;
2375
2376 *done = 1;
2377 while (*t) {
2378 const int i = m * 10 + (*t - '0');
2379 m = i & 0x7F;
2380 r = (i >> 7); /* r < 10 */
2381 if (r) {
2382 *done = 0;
2383 }
2384 *(t++) = '0' + r;
2385 }
2386 *(t++) = '\0';
2387 SvCUR_set(pnum, (STRLEN) (t - s));
2388 return (m);
2389}
2390
2391/*
2392=for apidoc pack_cat
2393
2394The engine implementing pack() Perl function. Note: parameters next_in_list and
2395flags are not used. This call should not be used; use packlist instead.
2396
2397=cut */
2398
2399
2400void
2401Perl_pack_cat(pTHX_ SV *cat, const char *pat, const char *patend, register SV **beglist, SV **endlist, SV ***next_in_list, U32 flags)
2402{
2403 tempsym_t sym;
2404 (void)next_in_list;
2405 (void)flags;
2406
2407 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
2408
2409 (void)pack_rec( cat, &sym, beglist, endlist );
2410}
2411
2412
2413/*
2414=for apidoc packlist
2415
2416The engine implementing pack() Perl function.
2417
2418=cut */
2419
2420
2421void
2422Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, register SV **beglist, SV **endlist )
2423{
2424 STRLEN no_len;
2425 tempsym_t sym;
2426
2427 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
2428
2429 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
2430 Also make sure any UTF8 flag is loaded */
2431 SvPV_force(cat, no_len);
2432 if (DO_UTF8(cat)) sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
2433
2434 (void)pack_rec( cat, &sym, beglist, endlist );
2435}
2436
2437/* like sv_utf8_upgrade, but also repoint the group start markers */
2438STATIC void
2439marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2440 STRLEN len;
2441 tempsym_t *group;
2442 const char *from_ptr, *from_start, *from_end, **marks, **m;
2443 char *to_start, *to_ptr;
2444
2445 if (SvUTF8(sv)) return;
2446
2447 from_start = SvPVX_const(sv);
2448 from_end = from_start + SvCUR(sv);
2449 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2450 if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
2451 if (from_ptr == from_end) {
2452 /* Simple case: no character needs to be changed */
2453 SvUTF8_on(sv);
2454 return;
2455 }
2456
2457 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
2458 New('U', to_start, len, char);
2459 Copy(from_start, to_start, from_ptr-from_start, char);
2460 to_ptr = to_start + (from_ptr-from_start);
2461
2462 New('U', marks, sym_ptr->level+2, const char *);
2463 for (group=sym_ptr; group; group = group->previous)
2464 marks[group->level] = from_start + group->strbeg;
2465 marks[sym_ptr->level+1] = from_end+1;
2466 for (m = marks; *m < from_ptr; m++)
2467 *m = to_start + (*m-from_start);
2468
2469 for (;from_ptr < from_end; from_ptr++) {
2470 while (*m == from_ptr) *m++ = to_ptr;
2471 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
2472 }
2473 *to_ptr = 0;
2474
2475 while (*m == from_ptr) *m++ = to_ptr;
2476 if (m != marks + sym_ptr->level+1) {
2477 Safefree(marks);
2478 Safefree(to_start);
2479 Perl_croak(aTHX_ "Assertion: marks beyond string end");
2480 }
2481 for (group=sym_ptr; group; group = group->previous)
2482 group->strbeg = marks[group->level] - to_start;
2483 Safefree(marks);
2484
2485 if (SvOOK(sv)) {
2486 if (SvIVX(sv)) {
2487 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
2488 from_start -= SvIVX(sv);
2489 SvIV_set(sv, 0);
2490 }
2491 SvFLAGS(sv) &= ~SVf_OOK;
2492 }
2493 if (SvLEN(sv) != 0)
2494 Safefree(from_start);
2495 SvPV_set(sv, to_start);
2496 SvCUR_set(sv, to_ptr - to_start);
2497 SvLEN_set(sv, len);
2498 SvUTF8_on(sv);
2499}
2500
2501/* Exponential string grower. Makes string extension effectively O(n)
2502 needed says how many extra bytes we need (not counting the final '\0')
2503 Only grows the string if there is an actual lack of space
2504*/
2505STATIC char *
2506sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
2507 const STRLEN cur = SvCUR(sv);
2508 const STRLEN len = SvLEN(sv);
2509 STRLEN extend;
2510 if (len - cur > needed) return SvPVX(sv);
2511 extend = needed > len ? needed : len;
2512 return SvGROW(sv, len+extend+1);
2513}
2514
2515STATIC
2516SV **
2517S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
2518{
2519 tempsym_t lookahead;
2520 I32 items = endlist - beglist;
2521 bool found = next_symbol(symptr);
2522 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
2523
2524 if (symptr->level == 0 && found && symptr->code == 'U') {
2525 marked_upgrade(aTHX_ cat, symptr);
2526 symptr->flags |= FLAG_DO_UTF8;
2527 utf8 = 0;
2528 }
2529 symptr->strbeg = SvCUR(cat);
2530
2531 while (found) {
2532 SV *fromstr;
2533 STRLEN fromlen;
2534 I32 len;
2535 SV *lengthcode = Nullsv;
2536 I32 datumtype = symptr->code;
2537 howlen_t howlen = symptr->howlen;
2538 char *start = SvPVX(cat);
2539 char *cur = start + SvCUR(cat);
2540
2541#define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2542
2543 switch (howlen) {
2544 case e_star:
2545 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2546 0 : items;
2547 break;
2548 default:
2549 /* e_no_len and e_number */
2550 len = symptr->length;
2551 break;
2552 }
2553
2554 if (len) {
2555 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
2556
2557 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2558 /* We can process this letter. */
2559 STRLEN size = props & PACK_SIZE_MASK;
2560 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2561 }
2562 }
2563
2564 /* Look ahead for next symbol. Do we have code/code? */
2565 lookahead = *symptr;
2566 found = next_symbol(&lookahead);
2567 if (symptr->flags & FLAG_SLASH) {
2568 IV count;
2569 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
2570 if (strchr("aAZ", lookahead.code)) {
2571 if (lookahead.howlen == e_number) count = lookahead.length;
2572 else {
2573 if (items > 0)
2574 count = DO_UTF8(*beglist) ?
2575 sv_len_utf8(*beglist) : sv_len(*beglist);
2576 else count = 0;
2577 if (lookahead.code == 'Z') count++;
2578 }
2579 } else {
2580 if (lookahead.howlen == e_number && lookahead.length < items)
2581 count = lookahead.length;
2582 else count = items;
2583 }
2584 lookahead.howlen = e_number;
2585 lookahead.length = count;
2586 lengthcode = sv_2mortal(newSViv(count));
2587 }
2588
2589 /* Code inside the switch must take care to properly update
2590 cat (CUR length and '\0' termination) if it updated *cur and
2591 doesn't simply leave using break */
2592 switch(TYPE_NO_ENDIANNESS(datumtype)) {
2593 default:
2594 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2595 (int) TYPE_NO_MODIFIERS(datumtype));
2596 case '%':
2597 Perl_croak(aTHX_ "'%%' may not be used in pack");
2598 {
2599 char *from;
2600#ifdef PERL_PACK_CAN_SHRIEKSIGN
2601 case '.' | TYPE_IS_SHRIEKING:
2602#endif
2603 case '.':
2604 if (howlen == e_star) from = start;
2605 else if (len == 0) from = cur;
2606 else {
2607 tempsym_t *group = symptr;
2608
2609 while (--len && group) group = group->previous;
2610 from = group ? start + group->strbeg : start;
2611 }
2612 fromstr = NEXTFROM;
2613 len = SvIV(fromstr);
2614 goto resize;
2615#ifdef PERL_PACK_CAN_SHRIEKSIGN
2616 case '@' | TYPE_IS_SHRIEKING:
2617#endif
2618 case '@':
2619 from = start + symptr->strbeg;
2620 resize:
2621#ifdef PERL_PACK_CAN_SHRIEKSIGN
2622 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
2623#else /* PERL_PACK_CAN_SHRIEKSIGN */
2624 if (utf8)
2625#endif
2626 if (len >= 0) {
2627 while (len && from < cur) {
2628 from += UTF8SKIP(from);
2629 len--;
2630 }
2631 if (from > cur)
2632 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2633 if (len) {
2634 /* Here we know from == cur */
2635 grow:
2636 GROWING(0, cat, start, cur, len);
2637 Zero(cur, len, char);
2638 cur += len;
2639 } else if (from < cur) {
2640 len = cur - from;
2641 goto shrink;
2642 } else goto no_change;
2643 } else {
2644 cur = from;
2645 len = -len;
2646 goto utf8_shrink;
2647 }
2648 else {
2649 len -= cur - from;
2650 if (len > 0) goto grow;
2651 if (len == 0) goto no_change;
2652 len = -len;
2653 goto shrink;
2654 }
2655 break;
2656 }
2657 case '(': {
2658 tempsym_t savsym = *symptr;
2659 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2660 symptr->flags |= group_modifiers;
2661 symptr->patend = savsym.grpend;
2662 symptr->level++;
2663 symptr->previous = &lookahead;
2664 while (len--) {
2665 U32 was_utf8;
2666 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2667 else symptr->flags &= ~FLAG_PARSE_UTF8;
2668 was_utf8 = SvUTF8(cat);
2669 symptr->patptr = savsym.grpbeg;
2670 beglist = pack_rec(cat, symptr, beglist, endlist);
2671 if (SvUTF8(cat) != was_utf8)
2672 /* This had better be an upgrade while in utf8==0 mode */
2673 utf8 = 1;
2674
2675 if (savsym.howlen == e_star && beglist == endlist)
2676 break; /* No way to continue */
2677 }
2678 lookahead.flags = symptr->flags & ~group_modifiers;
2679 goto no_change;
2680 }
2681 case 'X' | TYPE_IS_SHRIEKING:
2682 if (!len) /* Avoid division by 0 */
2683 len = 1;
2684 if (utf8) {
2685 char *hop, *last;
2686 I32 l = len;
2687 hop = last = start;
2688 while (hop < cur) {
2689 hop += UTF8SKIP(hop);
2690 if (--l == 0) {
2691 last = hop;
2692 l = len;
2693 }
2694 }
2695 if (last > cur)
2696 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2697 cur = last;
2698 break;
2699 }
2700 len = (cur-start) % len;
2701 /* FALL THROUGH */
2702 case 'X':
2703 if (utf8) {
2704 if (len < 1) goto no_change;
2705 utf8_shrink:
2706 while (len > 0) {
2707 if (cur <= start)
2708 Perl_croak(aTHX_ "'%c' outside of string in pack",
2709 (int) TYPE_NO_MODIFIERS(datumtype));
2710 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2711 if (cur <= start)
2712 Perl_croak(aTHX_ "'%c' outside of string in pack",
2713 (int) TYPE_NO_MODIFIERS(datumtype));
2714 }
2715 len--;
2716 }
2717 } else {
2718 shrink:
2719 if (cur - start < len)
2720 Perl_croak(aTHX_ "'%c' outside of string in pack",
2721 (int) TYPE_NO_MODIFIERS(datumtype));
2722 cur -= len;
2723 }
2724 if (cur < start+symptr->strbeg) {
2725 /* Make sure group starts don't point into the void */
2726 tempsym_t *group;
2727 const STRLEN length = cur-start;
2728 for (group = symptr;
2729 group && length < group->strbeg;
2730 group = group->previous) group->strbeg = length;
2731 lookahead.strbeg = length;
2732 }
2733 break;
2734 case 'x' | TYPE_IS_SHRIEKING: {
2735 I32 ai32;
2736 if (!len) /* Avoid division by 0 */
2737 len = 1;
2738 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
2739 else ai32 = (cur - start) % len;
2740 if (ai32 == 0) goto no_change;
2741 len -= ai32;
2742 }
2743 /* FALL THROUGH */
2744 case 'x':
2745 goto grow;
2746 case 'A':
2747 case 'Z':
2748 case 'a': {
2749 const char *aptr;
2750
2751 fromstr = NEXTFROM;
2752 aptr = SvPV_const(fromstr, fromlen);
2753 if (DO_UTF8(fromstr)) {
2754 const char *end, *s;
2755
2756 if (!utf8 && !SvUTF8(cat)) {
2757 marked_upgrade(aTHX_ cat, symptr);
2758 lookahead.flags |= FLAG_DO_UTF8;
2759 lookahead.strbeg = symptr->strbeg;
2760 utf8 = 1;
2761 start = SvPVX(cat);
2762 cur = start + SvCUR(cat);
2763 }
2764 if (howlen == e_star) {
2765 if (utf8) goto string_copy;
2766 len = fromlen+1;
2767 }
2768 s = aptr;
2769 end = aptr + fromlen;
2770 fromlen = datumtype == 'Z' ? len-1 : len;
2771 while ((I32) fromlen > 0 && s < end) {
2772 s += UTF8SKIP(s);
2773 fromlen--;
2774 }
2775 if (s > end)
2776 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2777 if (utf8) {
2778 len = fromlen;
2779 if (datumtype == 'Z') len++;
2780 fromlen = s-aptr;
2781 len += fromlen;
2782
2783 goto string_copy;
2784 }
2785 fromlen = len - fromlen;
2786 if (datumtype == 'Z') fromlen--;
2787 if (howlen == e_star) {
2788 len = fromlen;
2789 if (datumtype == 'Z') len++;
2790 }
2791 GROWING(0, cat, start, cur, len);
2792 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
2793 datumtype | TYPE_IS_PACK))
2794 Perl_croak(aTHX_ "Perl bug: predicted utf8 length not available");
2795 cur += fromlen;
2796 len -= fromlen;
2797 } else if (utf8) {
2798 if (howlen == e_star) {
2799 len = fromlen;
2800 if (datumtype == 'Z') len++;
2801 }
2802 if (len <= (I32) fromlen) {
2803 fromlen = len;
2804 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2805 }
2806 /* assumes a byte expands to at most UTF8_EXPAND bytes on
2807 upgrade, so:
2808 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2809 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
2810 len -= fromlen;
2811 while (fromlen > 0) {
2812 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
2813 aptr++;
2814 fromlen--;
2815 }
2816 } else {
2817 string_copy:
2818 if (howlen == e_star) {
2819 len = fromlen;
2820 if (datumtype == 'Z') len++;
2821 }
2822 if (len <= (I32) fromlen) {
2823 fromlen = len;
2824 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2825 }
2826 GROWING(0, cat, start, cur, len);
2827 Copy(aptr, cur, fromlen, char);
2828 cur += fromlen;
2829 len -= fromlen;
2830 }
2831 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2832 cur += len;
2833 break;
2834 }
2835 case 'B':
2836 case 'b': {
2837 char *str, *end;
2838 I32 l, field_len;
2839 U8 bits;
2840 bool utf8_source;
2841 U32 utf8_flags;
2842
2843 fromstr = NEXTFROM;
2844 str = SvPV(fromstr, fromlen);
2845 end = str + fromlen;
2846 if (DO_UTF8(fromstr)) {
2847 utf8_source = TRUE;
2848 utf8_flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
2849 } else {
2850 utf8_source = FALSE;
2851 utf8_flags = 0; /* Unused, but keep compilers happy */
2852 }
2853 if (howlen == e_star) len = fromlen;
2854 field_len = (len+7)/8;
2855 GROWING(utf8, cat, start, cur, field_len);
2856 if (len > (I32)fromlen) len = fromlen;
2857 bits = 0;
2858 l = 0;
2859 if (datumtype == 'B')
2860 while (l++ < len) {
2861 if (utf8_source) {
2862 UV val;
2863 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2864 bits |= val & 1;
2865 } else bits |= *str++ & 1;
2866 if (l & 7) bits <<= 1;
2867 else {
2868 PUSH_BYTE(utf8, cur, bits);
2869 bits = 0;
2870 }
2871 }
2872 else
2873 /* datumtype == 'b' */
2874 while (l++ < len) {
2875 if (utf8_source) {
2876 UV val;
2877 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2878 if (val & 1) bits |= 0x80;
2879 } else if (*str++ & 1)
2880 bits |= 0x80;
2881 if (l & 7) bits >>= 1;
2882 else {
2883 PUSH_BYTE(utf8, cur, bits);
2884 bits = 0;
2885 }
2886 }
2887 l--;
2888 if (l & 7) {
2889 if (datumtype == 'B')
2890 bits <<= 7 - (l & 7);
2891 else
2892 bits >>= 7 - (l & 7);
2893 PUSH_BYTE(utf8, cur, bits);
2894 l += 7;
2895 }
2896 /* Determine how many chars are left in the requested field */
2897 l /= 8;
2898 if (howlen == e_star) field_len = 0;
2899 else field_len -= l;
2900 Zero(cur, field_len, char);
2901 cur += field_len;
2902 break;
2903 }
2904 case 'H':
2905 case 'h': {
2906 const char *str, *end;
2907 I32 l, field_len;
2908 U8 bits;
2909 bool utf8_source;
2910 U32 utf8_flags;
2911
2912 fromstr = NEXTFROM;
2913 str = SvPV_const(fromstr, fromlen);
2914 end = str + fromlen;
2915 if (DO_UTF8(fromstr)) {
2916 utf8_source = TRUE;
2917 utf8_flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
2918 } else {
2919 utf8_source = FALSE;
2920 utf8_flags = 0; /* Unused, but keep compilers happy */
2921 }
2922 if (howlen == e_star) len = fromlen;
2923 field_len = (len+1)/2;
2924 GROWING(utf8, cat, start, cur, field_len);
2925 if (!utf8 && len > (I32)fromlen) len = fromlen;
2926 bits = 0;
2927 l = 0;
2928 if (datumtype == 'H')
2929 while (l++ < len) {
2930 if (utf8_source) {
2931 UV val;
2932 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2933 if (val < 256 && isALPHA(val))
2934 bits |= (val + 9) & 0xf;
2935 else
2936 bits |= val & 0xf;
2937 } else if (isALPHA(*str))
2938 bits |= (*str++ + 9) & 0xf;
2939 else
2940 bits |= *str++ & 0xf;
2941 if (l & 1) bits <<= 4;
2942 else {
2943 PUSH_BYTE(utf8, cur, bits);
2944 bits = 0;
2945 }
2946 }
2947 else
2948 while (l++ < len) {
2949 if (utf8_source) {
2950 UV val;
2951 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2952 if (val < 256 && isALPHA(val))
2953 bits |= ((val + 9) & 0xf) << 4;
2954 else
2955 bits |= (val & 0xf) << 4;
2956 } else if (isALPHA(*str))
2957 bits |= ((*str++ + 9) & 0xf) << 4;
2958 else
2959 bits |= (*str++ & 0xf) << 4;
2960 if (l & 1) bits >>= 4;
2961 else {
2962 PUSH_BYTE(utf8, cur, bits);
2963 bits = 0;
2964 }
2965 }
2966 l--;
2967 if (l & 1) {
2968 PUSH_BYTE(utf8, cur, bits);
2969 l++;
2970 }
2971 /* Determine how many chars are left in the requested field */
2972 l /= 2;
2973 if (howlen == e_star) field_len = 0;
2974 else field_len -= l;
2975 Zero(cur, field_len, char);
2976 cur += field_len;
2977 break;
2978 }
2979 case 'c':
2980 while (len-- > 0) {
2981 IV aiv;
2982 fromstr = NEXTFROM;
2983 aiv = SvIV(fromstr);
2984 if ((-128 > aiv || aiv > 127) &&
2985 ckWARN(WARN_PACK))
2986 Perl_warner(aTHX_ packWARN(WARN_PACK),
2987 "Character in 'c' format wrapped in pack");
2988 PUSH_BYTE(utf8, cur, aiv & 0xff);
2989 }
2990 break;
2991 case 'C':
2992 if (len == 0) {
2993 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2994 break;
2995 }
2996 GROWING(0, cat, start, cur, len);
2997 while (len-- > 0) {
2998 IV aiv;
2999 fromstr = NEXTFROM;
3000 aiv = SvIV(fromstr);
3001 if ((0 > aiv || aiv > 0xff) &&
3002 ckWARN(WARN_PACK))
3003 Perl_warner(aTHX_ packWARN(WARN_PACK),
3004 "Character in 'C' format wrapped in pack");
3005 *cur++ = aiv & 0xff;
3006 }
3007 break;
3008 case 'W': {
3009 char *end;
3010 U8 in_bytes = IN_BYTES;
3011
3012 end = start+SvLEN(cat)-1;
3013 if (utf8) end -= UTF8_MAXLEN-1;
3014 while (len-- > 0) {
3015 UV auv;
3016 fromstr = NEXTFROM;
3017 auv = SvUV(fromstr);
3018 if (in_bytes) auv = auv % 0x100;
3019 if (utf8) {
3020 W_utf8:
3021 if (cur > end) {
3022 *cur = '\0';
3023 SvCUR_set(cat, cur - start);
3024
3025 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3026 end = start+SvLEN(cat)-UTF8_MAXLEN;
3027 }
3028 cur = (char *) uvuni_to_utf8_flags((U8 *) cur,
3029 NATIVE_TO_UNI(auv),
3030 ckWARN(WARN_UTF8) ?
3031 0 : UNICODE_ALLOW_ANY);
3032 } else {
3033 if (auv >= 0x100) {
3034 if (!SvUTF8(cat)) {
3035 *cur = '\0';
3036 SvCUR_set(cat, cur - start);
3037 marked_upgrade(aTHX_ cat, symptr);
3038 lookahead.flags |= FLAG_DO_UTF8;
3039 lookahead.strbeg = symptr->strbeg;
3040 utf8 = 1;
3041 start = SvPVX(cat);
3042 cur = start + SvCUR(cat);
3043 end = start+SvLEN(cat)-UTF8_MAXLEN;
3044 goto W_utf8;
3045 }
3046 if (ckWARN(WARN_PACK))
3047 Perl_warner(aTHX_ packWARN(WARN_PACK),
3048 "Character in 'W' format wrapped in pack");
3049 auv &= 0xff;
3050 }
3051 if (cur >= end) {
3052 *cur = '\0';
3053 SvCUR_set(cat, cur - start);
3054 GROWING(0, cat, start, cur, len+1);
3055 end = start+SvLEN(cat)-1;
3056 }
3057 *(U8 *) cur++ = (U8)auv;
3058 }
3059 }
3060 break;
3061 }
3062 case 'U': {
3063 char *end;
3064
3065 if (len == 0) {
3066 if (!(symptr->flags & FLAG_DO_UTF8)) {
3067 marked_upgrade(aTHX_ cat, symptr);
3068 lookahead.flags |= FLAG_DO_UTF8;
3069 lookahead.strbeg = symptr->strbeg;
3070 }
3071 utf8 = 0;
3072 goto no_change;
3073 }
3074
3075 end = start+SvLEN(cat);
3076 if (!utf8) end -= UTF8_MAXLEN;
3077 while (len-- > 0) {
3078 UV auv;
3079 fromstr = NEXTFROM;
3080 auv = SvUV(fromstr);
3081 if (utf8) {
3082 U8 buffer[UTF8_MAXLEN], *endb;
3083 endb = uvuni_to_utf8_flags(buffer, auv,
3084 ckWARN(WARN_UTF8) ?
3085 0 : UNICODE_ALLOW_ANY);
3086 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
3087 *cur = '\0';
3088 SvCUR_set(cat, cur - start);
3089 GROWING(0, cat, start, cur,
3090 len+(endb-buffer)*UTF8_EXPAND);
3091 end = start+SvLEN(cat);
3092 }
3093 bytes_to_uni(aTHX_ buffer, endb-buffer, &cur);
3094 } else {
3095 if (cur >= end) {
3096 *cur = '\0';
3097 SvCUR_set(cat, cur - start);
3098 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3099 end = start+SvLEN(cat)-UTF8_MAXLEN;
3100 }
3101 cur = (char *) uvuni_to_utf8_flags((U8 *) cur, auv,
3102 ckWARN(WARN_UTF8) ?
3103 0 : UNICODE_ALLOW_ANY);
3104 }
3105 }
3106 break;
3107 }
3108 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
3109 case 'f':
3110 while (len-- > 0) {
3111 float afloat;
3112 NV anv;
3113 fromstr = NEXTFROM;
3114 anv = SvNV(fromstr);
3115#ifdef __VOS__
3116 /* VOS does not automatically map a floating-point overflow
3117 during conversion from double to float into infinity, so we
3118 do it by hand. This code should either be generalized for
3119 any OS that needs it, or removed if and when VOS implements
3120 posix-976 (suggestion to support mapping to infinity).
3121 Paul.Green@stratus.com 02-04-02. */
3122 if (anv > FLT_MAX)
3123 afloat = _float_constants[0]; /* single prec. inf. */
3124 else if (anv < -FLT_MAX)
3125 afloat = _float_constants[0]; /* single prec. inf. */
3126 else afloat = (float) anv;
3127#else /* __VOS__ */
3128# if defined(VMS) && !defined(__IEEE_FP)
3129 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
3130 * on Alpha; fake it if we don't have them.
3131 */
3132 if (anv > FLT_MAX)
3133 afloat = FLT_MAX;
3134 else if (anv < -FLT_MAX)
3135 afloat = -FLT_MAX;
3136 else afloat = (float)anv;
3137# else
3138 afloat = (float)anv;
3139# endif
3140#endif /* __VOS__ */
3141 DO_BO_PACK_N(afloat, float);
3142 PUSH_VAR(utf8, cur, afloat);
3143 }
3144 break;
3145 case 'd':
3146 while (len-- > 0) {
3147 double adouble;
3148 NV anv;
3149 fromstr = NEXTFROM;
3150 anv = SvNV(fromstr);
3151#ifdef __VOS__
3152 /* VOS does not automatically map a floating-point overflow
3153 during conversion from long double to double into infinity,
3154 so we do it by hand. This code should either be generalized
3155 for any OS that needs it, or removed if and when VOS
3156 implements posix-976 (suggestion to support mapping to
3157 infinity). Paul.Green@stratus.com 02-04-02. */
3158 if (anv > DBL_MAX)
3159 adouble = _double_constants[0]; /* double prec. inf. */
3160 else if (anv < -DBL_MAX)
3161 adouble = _double_constants[0]; /* double prec. inf. */
3162 else adouble = (double) anv;
3163#else /* __VOS__ */
3164# if defined(VMS) && !defined(__IEEE_FP)
3165 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
3166 * on Alpha; fake it if we don't have them.
3167 */
3168 if (anv > DBL_MAX)
3169 adouble = DBL_MAX;
3170 else if (anv < -DBL_MAX)
3171 adouble = -DBL_MAX;
3172 else adouble = (double)anv;
3173# else
3174 adouble = (double)anv;
3175# endif
3176#endif /* __VOS__ */
3177 DO_BO_PACK_N(adouble, double);
3178 PUSH_VAR(utf8, cur, adouble);
3179 }
3180 break;
3181 case 'F': {
3182 NV anv;
3183 Zero(&anv, 1, NV); /* can be long double with unused bits */
3184 while (len-- > 0) {
3185 fromstr = NEXTFROM;
3186 anv = SvNV(fromstr);
3187 DO_BO_PACK_N(anv, NV);
3188 PUSH_VAR(utf8, cur, anv);
3189 }
3190 break;
3191 }
3192#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
3193 case 'D': {
3194 long double aldouble;
3195 /* long doubles can have unused bits, which may be nonzero */
3196 Zero(&aldouble, 1, long double);
3197 while (len-- > 0) {
3198 fromstr = NEXTFROM;
3199 aldouble = (long double)SvNV(fromstr);
3200 DO_BO_PACK_N(aldouble, long double);
3201 PUSH_VAR(utf8, cur, aldouble);
3202 }
3203 break;
3204 }
3205#endif
3206#ifdef PERL_PACK_CAN_SHRIEKSIGN
3207 case 'n' | TYPE_IS_SHRIEKING:
3208#endif
3209 case 'n':
3210 while (len-- > 0) {
3211 I16 ai16;
3212 fromstr = NEXTFROM;
3213 ai16 = (I16)SvIV(fromstr);
3214#ifdef HAS_HTONS
3215 ai16 = PerlSock_htons(ai16);
3216#endif
3217 PUSH16(utf8, cur, &ai16);
3218 }
3219 break;
3220#ifdef PERL_PACK_CAN_SHRIEKSIGN
3221 case 'v' | TYPE_IS_SHRIEKING:
3222#endif
3223 case 'v':
3224 while (len-- > 0) {
3225 I16 ai16;
3226 fromstr = NEXTFROM;
3227 ai16 = (I16)SvIV(fromstr);
3228#ifdef HAS_HTOVS
3229 ai16 = htovs(ai16);
3230#endif
3231 PUSH16(utf8, cur, &ai16);
3232 }
3233 break;
3234 case 'S' | TYPE_IS_SHRIEKING:
3235#if SHORTSIZE != SIZE16
3236 while (len-- > 0) {
3237 unsigned short aushort;
3238 fromstr = NEXTFROM;
3239 aushort = SvUV(fromstr);
3240 DO_BO_PACK(aushort, s);
3241 PUSH_VAR(utf8, cur, aushort);
3242 }
3243 break;
3244#else
3245 /* Fall through! */
3246#endif
3247 case 'S':
3248 while (len-- > 0) {
3249 U16 au16;
3250 fromstr = NEXTFROM;
3251 au16 = (U16)SvUV(fromstr);
3252 DO_BO_PACK(au16, 16);
3253 PUSH16(utf8, cur, &au16);
3254 }
3255 break;
3256 case 's' | TYPE_IS_SHRIEKING:
3257#if SHORTSIZE != SIZE16
3258 while (len-- > 0) {
3259 short ashort;
3260 fromstr = NEXTFROM;
3261 ashort = SvIV(fromstr);
3262 DO_BO_PACK(ashort, s);
3263 PUSH_VAR(utf8, cur, ashort);
3264 }
3265 break;
3266#else
3267 /* Fall through! */
3268#endif
3269 case 's':
3270 while (len-- > 0) {
3271 I16 ai16;
3272 fromstr = NEXTFROM;
3273 ai16 = (I16)SvIV(fromstr);
3274 DO_BO_PACK(ai16, 16);
3275 PUSH16(utf8, cur, &ai16);
3276 }
3277 break;
3278 case 'I':
3279 case 'I' | TYPE_IS_SHRIEKING:
3280 while (len-- > 0) {
3281 unsigned int auint;
3282 fromstr = NEXTFROM;
3283 auint = SvUV(fromstr);
3284 DO_BO_PACK(auint, i);
3285 PUSH_VAR(utf8, cur, auint);
3286 }
3287 break;
3288 case 'j':
3289 while (len-- > 0) {
3290 IV aiv;
3291 fromstr = NEXTFROM;
3292 aiv = SvIV(fromstr);
3293#if IVSIZE == INTSIZE
3294 DO_BO_PACK(aiv, i);
3295#elif IVSIZE == LONGSIZE
3296 DO_BO_PACK(aiv, l);
3297#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
3298 DO_BO_PACK(aiv, 64);
3299#else
3300 Perl_croak(aTHX_ "'j' not supported on this platform");
3301#endif
3302 PUSH_VAR(utf8, cur, aiv);
3303 }
3304 break;
3305 case 'J':
3306 while (len-- > 0) {
3307 UV auv;
3308 fromstr = NEXTFROM;
3309 auv = SvUV(fromstr);
3310#if UVSIZE == INTSIZE
3311 DO_BO_PACK(auv, i);
3312#elif UVSIZE == LONGSIZE
3313 DO_BO_PACK(auv, l);
3314#elif defined(HAS_QUAD) && UVSIZE == U64SIZE
3315 DO_BO_PACK(auv, 64);
3316#else
3317 Perl_croak(aTHX_ "'J' not supported on this platform");
3318#endif
3319 PUSH_VAR(utf8, cur, auv);
3320 }
3321 break;
3322 case 'w':
3323 while (len-- > 0) {
3324 NV anv;
3325 fromstr = NEXTFROM;
3326 anv = SvNV(fromstr);
3327
3328 if (anv < 0) {
3329 *cur = '\0';
3330 SvCUR_set(cat, cur - start);
3331 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
3332 }
3333
3334 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
3335 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
3336 any negative IVs will have already been got by the croak()
3337 above. IOK is untrue for fractions, so we test them
3338 against UV_MAX_P1. */
3339 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
3340 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
3341 char *in = buf + sizeof(buf);
3342 UV auv = SvUV(fromstr);
3343
3344 do {
3345 *--in = (char)((auv & 0x7f) | 0x80);
3346 auv >>= 7;
3347 } while (auv);
3348 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
3349 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3350 in, (buf + sizeof(buf)) - in);
3351 } else if (SvPOKp(fromstr))
3352 goto w_string;
3353 else if (SvNOKp(fromstr)) {
3354 /* 10**NV_MAX_10_EXP is the largest power of 10
3355 so 10**(NV_MAX_10_EXP+1) is definately unrepresentable
3356 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
3357 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
3358 And with that many bytes only Inf can overflow.
3359 Some C compilers are strict about integral constant
3360 expressions so we conservatively divide by a slightly
3361 smaller integer instead of multiplying by the exact
3362 floating-point value.
3363 */
3364#ifdef NV_MAX_10_EXP
3365 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
3366 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
3367#else
3368 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
3369 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
3370#endif
3371 char *in = buf + sizeof(buf);
3372
3373 anv = Perl_floor(anv);
3374 do {
3375 NV next = Perl_floor(anv / 128);
3376 if (in <= buf) /* this cannot happen ;-) */
3377 Perl_croak(aTHX_ "Cannot compress integer in pack");
3378 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
3379 anv = next;
3380 } while (anv > 0);
3381 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
3382 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3383 in, (buf + sizeof(buf)) - in);
3384 } else {
3385 char *from, *result, *in;
3386 SV *norm;
3387 STRLEN len;
3388 bool done;
3389
3390 w_string:
3391 /* Copy string and check for compliance */
3392 from = SvPV(fromstr, len);
3393 if ((norm = is_an_int(from, len)) == NULL)
3394 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
3395
3396 New('w', result, len, char);
3397 in = result + len;
3398 done = FALSE;
3399 while (!done) *--in = div128(norm, &done) | 0x80;
3400 result[len - 1] &= 0x7F; /* clear continue bit */
3401 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3402 in, (result + len) - in);
3403 Safefree(result);
3404 SvREFCNT_dec(norm); /* free norm */
3405 }
3406 }
3407 break;
3408 case 'i':
3409 case 'i' | TYPE_IS_SHRIEKING:
3410 while (len-- > 0) {
3411 int aint;
3412 fromstr = NEXTFROM;
3413 aint = SvIV(fromstr);
3414 DO_BO_PACK(aint, i);
3415 PUSH_VAR(utf8, cur, aint);
3416 }
3417 break;
3418#ifdef PERL_PACK_CAN_SHRIEKSIGN
3419 case 'N' | TYPE_IS_SHRIEKING:
3420#endif
3421 case 'N':
3422 while (len-- > 0) {
3423 U32 au32;
3424 fromstr = NEXTFROM;
3425 au32 = SvUV(fromstr);
3426#ifdef HAS_HTONL
3427 au32 = PerlSock_htonl(au32);
3428#endif
3429 PUSH32(utf8, cur, &au32);
3430 }
3431 break;
3432#ifdef PERL_PACK_CAN_SHRIEKSIGN
3433 case 'V' | TYPE_IS_SHRIEKING:
3434#endif
3435 case 'V':
3436 while (len-- > 0) {
3437 U32 au32;
3438 fromstr = NEXTFROM;
3439 au32 = SvUV(fromstr);
3440#ifdef HAS_HTOVL
3441 au32 = htovl(au32);
3442#endif
3443 PUSH32(utf8, cur, &au32);
3444 }
3445 break;
3446 case 'L' | TYPE_IS_SHRIEKING:
3447#if LONGSIZE != SIZE32
3448 while (len-- > 0) {
3449 unsigned long aulong;
3450 fromstr = NEXTFROM;
3451 aulong = SvUV(fromstr);
3452 DO_BO_PACK(aulong, l);
3453 PUSH_VAR(utf8, cur, aulong);
3454 }
3455 break;
3456#else
3457 /* Fall though! */
3458#endif
3459 case 'L':
3460 while (len-- > 0) {
3461 U32 au32;
3462 fromstr = NEXTFROM;
3463 au32 = SvUV(fromstr);
3464 DO_BO_PACK(au32, 32);
3465 PUSH32(utf8, cur, &au32);
3466 }
3467 break;
3468 case 'l' | TYPE_IS_SHRIEKING:
3469#if LONGSIZE != SIZE32
3470 while (len-- > 0) {
3471 long along;
3472 fromstr = NEXTFROM;
3473 along = SvIV(fromstr);
3474 DO_BO_PACK(along, l);
3475 PUSH_VAR(utf8, cur, along);
3476 }
3477 break;
3478#else
3479 /* Fall though! */
3480#endif
3481 case 'l':
3482 while (len-- > 0) {
3483 I32 ai32;
3484 fromstr = NEXTFROM;
3485 ai32 = SvIV(fromstr);
3486 DO_BO_PACK(ai32, 32);
3487 PUSH32(utf8, cur, &ai32);
3488 }
3489 break;
3490#ifdef HAS_QUAD
3491 case 'Q':
3492 while (len-- > 0) {
3493 Uquad_t auquad;
3494 fromstr = NEXTFROM;
3495 auquad = (Uquad_t) SvUV(fromstr);
3496 DO_BO_PACK(auquad, 64);
3497 PUSH_VAR(utf8, cur, auquad);
3498 }
3499 break;
3500 case 'q':
3501 while (len-- > 0) {
3502 Quad_t aquad;
3503 fromstr = NEXTFROM;
3504 aquad = (Quad_t)SvIV(fromstr);
3505 DO_BO_PACK(aquad, 64);
3506 PUSH_VAR(utf8, cur, aquad);
3507 }
3508 break;
3509#endif /* HAS_QUAD */
3510 case 'P':
3511 len = 1; /* assume SV is correct length */
3512 GROWING(utf8, cat, start, cur, sizeof(char *));
3513 /* Fall through! */
3514 case 'p':
3515 while (len-- > 0) {
3516 const char *aptr;
3517
3518 fromstr = NEXTFROM;
3519 SvGETMAGIC(fromstr);
3520 if (!SvOK(fromstr)) aptr = NULL;
3521 else {
3522 STRLEN n_a;
3523 /* XXX better yet, could spirit away the string to
3524 * a safe spot and hang on to it until the result
3525 * of pack() (and all copies of the result) are
3526 * gone.
3527 */
3528 if (ckWARN(WARN_PACK) &&
3529 (SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
3530 !SvREADONLY(fromstr)))) {
3531 Perl_warner(aTHX_ packWARN(WARN_PACK),
3532 "Attempt to pack pointer to temporary value");
3533 }
3534 if (SvPOK(fromstr) || SvNIOK(fromstr))
3535 aptr = SvPV_nomg_const(fromstr, n_a);
3536 else
3537 aptr = SvPV_force_flags(fromstr, n_a, 0);
3538 }
3539 DO_BO_PACK_PC(aptr);
3540 PUSH_VAR(utf8, cur, aptr);
3541 }
3542 break;
3543 case 'u': {
3544 const char *aptr, *aend;
3545 bool from_utf8;
3546
3547 fromstr = NEXTFROM;
3548 if (len <= 2) len = 45;
3549 else len = len / 3 * 3;
3550 if (len >= 64) {
3551 Perl_warner(aTHX_ packWARN(WARN_PACK),
3552 "Field too wide in 'u' format in pack");
3553 len = 63;
3554 }
3555 aptr = SvPV_const(fromstr, fromlen);
3556 from_utf8 = DO_UTF8(fromstr);
3557 if (from_utf8) {
3558 aend = aptr + fromlen;
3559 fromlen = sv_len_utf8(fromstr);
3560 } else aend = NULL; /* Unused, but keep compilers happy */
3561 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
3562 while (fromlen > 0) {
3563 U8 *end;
3564 I32 todo;
3565 U8 hunk[1+63/3*4+1];
3566
3567 if ((I32)fromlen > len)
3568 todo = len;
3569 else
3570 todo = fromlen;
3571 if (from_utf8) {
3572 char buffer[64];
3573 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3574 'u' | TYPE_IS_PACK)) {
3575 *cur = '\0';
3576 SvCUR_set(cat, cur - start);
3577 Perl_croak(aTHX_ "Assertion: string is shorter than advertised");
3578 }
3579 end = doencodes(hunk, buffer, todo);
3580 } else {
3581 end = doencodes(hunk, aptr, todo);
3582 aptr += todo;
3583 }
3584 PUSH_BYTES(utf8, cur, hunk, end-hunk);
3585 fromlen -= todo;
3586 }
3587 break;
3588 }
3589 }
3590 *cur = '\0';
3591 SvCUR_set(cat, cur - start);
3592 no_change:
3593 *symptr = lookahead;
3594 }
3595 return beglist;
3596}
3597#undef NEXTFROM
3598
3599
3600PP(pp_pack)
3601{
3602 dSP; dMARK; dORIGMARK; dTARGET;
3603 register SV *cat = TARG;
3604 STRLEN fromlen;
3605 register const char *pat = SvPVx_const(*++MARK, fromlen);
3606 register const char *patend = pat + fromlen;
3607
3608 MARK++;
3609 sv_setpvn(cat, "", 0);
3610 SvUTF8_off(cat);
3611
3612 packlist(cat, pat, patend, MARK, SP + 1);
3613
3614 SvSETMAGIC(cat);
3615 SP = ORIGMARK;
3616 PUSHs(cat);
3617 RETURN;
3618}
3619
3620/*
3621 * Local variables:
3622 * c-indentation-style: bsd
3623 * c-basic-offset: 4
3624 * indent-tabs-mode: t
3625 * End:
3626 *
3627 * ex: set ts=8 sts=4 sw=4 noet:
3628 */