This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
fix some language/country codes
[perl5.git] / pp_pack.c
... / ...
CommitLineData
1/* pp_pack.c
2 *
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
9 */
10
11/*
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
16 * some salt.
17 *
18 * [p.653 of _The Lord of the Rings_, IV/iv: "Of Herbs and Stewed Rabbit"]
19 */
20
21/* This file contains pp ("push/pop") functions that
22 * execute the opcodes that make up a perl program. A typical pp function
23 * expects to find its arguments on the stack, and usually pushes its
24 * results onto the stack, hence the 'pp' terminology. Each OP structure
25 * contains a pointer to the relevant pp_foo() function.
26 *
27 * This particular file just contains pp_pack() and pp_unpack(). See the
28 * other pp*.c files for the rest of the pp_ functions.
29 */
30
31#include "EXTERN.h"
32#define PERL_IN_PP_PACK_C
33#include "perl.h"
34
35/* Types used by pack/unpack */
36typedef enum {
37 e_no_len, /* no length */
38 e_number, /* number, [] */
39 e_star /* asterisk */
40} howlen_t;
41
42typedef struct tempsym {
43 const char* patptr; /* current template char */
44 const char* patend; /* one after last char */
45 const char* grpbeg; /* 1st char of ()-group */
46 const char* grpend; /* end of ()-group */
47 I32 code; /* template code (!<>) */
48 I32 length; /* length/repeat count */
49 howlen_t howlen; /* how length is given */
50 int level; /* () nesting level */
51 U32 flags; /* /=4, comma=2, pack=1 */
52 /* and group modifiers */
53 STRLEN strbeg; /* offset of group start */
54 struct tempsym *previous; /* previous group */
55} tempsym_t;
56
57#define TEMPSYM_INIT(symptr, p, e, f) \
58 STMT_START { \
59 (symptr)->patptr = (p); \
60 (symptr)->patend = (e); \
61 (symptr)->grpbeg = NULL; \
62 (symptr)->grpend = NULL; \
63 (symptr)->grpend = NULL; \
64 (symptr)->code = 0; \
65 (symptr)->length = 0; \
66 (symptr)->howlen = e_no_len; \
67 (symptr)->level = 0; \
68 (symptr)->flags = (f); \
69 (symptr)->strbeg = 0; \
70 (symptr)->previous = NULL; \
71 } STMT_END
72
73typedef union {
74 NV nv;
75 U8 bytes[sizeof(NV)];
76} NV_bytes;
77
78#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
79typedef union {
80 long double ld;
81 U8 bytes[sizeof(long double)];
82} ld_bytes;
83#endif
84
85#if PERL_VERSION >= 9
86# define PERL_PACK_CAN_BYTEORDER
87# define PERL_PACK_CAN_SHRIEKSIGN
88#endif
89
90#ifndef CHAR_BIT
91# define CHAR_BIT 8
92#endif
93/* Maximum number of bytes to which a byte can grow due to upgrade */
94#define UTF8_EXPAND 2
95
96/*
97 * Offset for integer pack/unpack.
98 *
99 * On architectures where I16 and I32 aren't really 16 and 32 bits,
100 * which for now are all Crays, pack and unpack have to play games.
101 */
102
103/*
104 * These values are required for portability of pack() output.
105 * If they're not right on your machine, then pack() and unpack()
106 * wouldn't work right anyway; you'll need to apply the Cray hack.
107 * (I'd like to check them with #if, but you can't use sizeof() in
108 * the preprocessor.) --???
109 */
110/*
111 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
112 defines are now in config.h. --Andy Dougherty April 1998
113 */
114#define SIZE16 2
115#define SIZE32 4
116
117/* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
118 --jhi Feb 1999 */
119
120#if U16SIZE > SIZE16 || U32SIZE > SIZE32
121# if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
122# define OFF16(p) ((char*)(p))
123# define OFF32(p) ((char*)(p))
124# else
125# if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
126# define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
127# define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
128# else
129 ++++ bad cray byte order
130# endif
131# endif
132#else
133# define OFF16(p) ((char *) (p))
134# define OFF32(p) ((char *) (p))
135#endif
136
137/* Only to be used inside a loop (see the break) */
138#define SHIFT16(utf8, s, strend, p, datumtype) STMT_START { \
139 if (utf8) { \
140 if (!uni_to_bytes(aTHX_ &(s), strend, OFF16(p), SIZE16, datumtype)) break; \
141 } else { \
142 Copy(s, OFF16(p), SIZE16, char); \
143 (s) += SIZE16; \
144 } \
145} STMT_END
146
147/* Only to be used inside a loop (see the break) */
148#define SHIFT32(utf8, s, strend, p, datumtype) STMT_START { \
149 if (utf8) { \
150 if (!uni_to_bytes(aTHX_ &(s), strend, OFF32(p), SIZE32, datumtype)) break; \
151 } else { \
152 Copy(s, OFF32(p), SIZE32, char); \
153 (s) += SIZE32; \
154 } \
155} STMT_END
156
157#define PUSH16(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF16(p), SIZE16)
158#define PUSH32(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF32(p), SIZE32)
159
160/* Only to be used inside a loop (see the break) */
161#define SHIFT_BYTES(utf8, s, strend, buf, len, datumtype) \
162STMT_START { \
163 if (utf8) { \
164 if (!uni_to_bytes(aTHX_ &s, strend, \
165 (char *) (buf), len, datumtype)) break; \
166 } else { \
167 Copy(s, (char *) (buf), len, char); \
168 s += len; \
169 } \
170} STMT_END
171
172#define SHIFT_VAR(utf8, s, strend, var, datumtype) \
173 SHIFT_BYTES(utf8, s, strend, &(var), sizeof(var), datumtype)
174
175#define PUSH_VAR(utf8, aptr, var) \
176 PUSH_BYTES(utf8, aptr, &(var), sizeof(var))
177
178/* Avoid stack overflow due to pathological templates. 100 should be plenty. */
179#define MAX_SUB_TEMPLATE_LEVEL 100
180
181/* flags (note that type modifiers can also be used as flags!) */
182#define FLAG_WAS_UTF8 0x40
183#define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
184#define FLAG_UNPACK_ONLY_ONE 0x10
185#define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
186#define FLAG_SLASH 0x04
187#define FLAG_COMMA 0x02
188#define FLAG_PACK 0x01
189
190STATIC SV *
191S_mul128(pTHX_ SV *sv, U8 m)
192{
193 STRLEN len;
194 char *s = SvPV(sv, len);
195 char *t;
196
197 PERL_ARGS_ASSERT_MUL128;
198
199 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
200 SV * const tmpNew = newSVpvs("0000000000");
201
202 sv_catsv(tmpNew, sv);
203 SvREFCNT_dec(sv); /* free old sv */
204 sv = tmpNew;
205 s = SvPV(sv, len);
206 }
207 t = s + len - 1;
208 while (!*t) /* trailing '\0'? */
209 t--;
210 while (t > s) {
211 const U32 i = ((*t - '0') << 7) + m;
212 *(t--) = '0' + (char)(i % 10);
213 m = (char)(i / 10);
214 }
215 return (sv);
216}
217
218/* Explosives and implosives. */
219
220#if 'I' == 73 && 'J' == 74
221/* On an ASCII/ISO kind of system */
222#define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
223#else
224/*
225 Some other sort of character set - use memchr() so we don't match
226 the null byte.
227 */
228#define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
229#endif
230
231/* type modifiers */
232#define TYPE_IS_SHRIEKING 0x100
233#define TYPE_IS_BIG_ENDIAN 0x200
234#define TYPE_IS_LITTLE_ENDIAN 0x400
235#define TYPE_IS_PACK 0x800
236#define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
237#define TYPE_MODIFIERS(t) ((t) & ~0xFF)
238#define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
239
240#ifdef PERL_PACK_CAN_SHRIEKSIGN
241# define SHRIEKING_ALLOWED_TYPES "sSiIlLxXnNvV@."
242#else
243# define SHRIEKING_ALLOWED_TYPES "sSiIlLxX"
244#endif
245
246#ifndef PERL_PACK_CAN_BYTEORDER
247/* Put "can't" first because it is shorter */
248# define TYPE_ENDIANNESS(t) 0
249# define TYPE_NO_ENDIANNESS(t) (t)
250
251# define ENDIANNESS_ALLOWED_TYPES ""
252
253# define DO_BO_UNPACK(var, type)
254# define DO_BO_PACK(var, type)
255# define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast)
256# define DO_BO_PACK_PTR(var, type, pre_cast, post_cast)
257# define DO_BO_UNPACK_N(var, type)
258# define DO_BO_PACK_N(var, type)
259# define DO_BO_UNPACK_P(var)
260# define DO_BO_PACK_P(var)
261# define DO_BO_UNPACK_PC(var)
262# define DO_BO_PACK_PC(var)
263
264#else /* PERL_PACK_CAN_BYTEORDER */
265
266# define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
267# define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
268
269# define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
270
271# define DO_BO_UNPACK(var, type) \
272 STMT_START { \
273 switch (TYPE_ENDIANNESS(datumtype)) { \
274 case TYPE_IS_BIG_ENDIAN: var = my_betoh ## type (var); break; \
275 case TYPE_IS_LITTLE_ENDIAN: var = my_letoh ## type (var); break; \
276 default: break; \
277 } \
278 } STMT_END
279
280# define DO_BO_PACK(var, type) \
281 STMT_START { \
282 switch (TYPE_ENDIANNESS(datumtype)) { \
283 case TYPE_IS_BIG_ENDIAN: var = my_htobe ## type (var); break; \
284 case TYPE_IS_LITTLE_ENDIAN: var = my_htole ## type (var); break; \
285 default: break; \
286 } \
287 } STMT_END
288
289# define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast) \
290 STMT_START { \
291 switch (TYPE_ENDIANNESS(datumtype)) { \
292 case TYPE_IS_BIG_ENDIAN: \
293 var = (post_cast*) my_betoh ## type ((pre_cast) var); \
294 break; \
295 case TYPE_IS_LITTLE_ENDIAN: \
296 var = (post_cast *) my_letoh ## type ((pre_cast) var); \
297 break; \
298 default: \
299 break; \
300 } \
301 } STMT_END
302
303# define DO_BO_PACK_PTR(var, type, pre_cast, post_cast) \
304 STMT_START { \
305 switch (TYPE_ENDIANNESS(datumtype)) { \
306 case TYPE_IS_BIG_ENDIAN: \
307 var = (post_cast *) my_htobe ## type ((pre_cast) var); \
308 break; \
309 case TYPE_IS_LITTLE_ENDIAN: \
310 var = (post_cast *) my_htole ## type ((pre_cast) var); \
311 break; \
312 default: \
313 break; \
314 } \
315 } STMT_END
316
317# define BO_CANT_DOIT(action, type) \
318 STMT_START { \
319 switch (TYPE_ENDIANNESS(datumtype)) { \
320 case TYPE_IS_BIG_ENDIAN: \
321 Perl_croak(aTHX_ "Can't %s big-endian %ss on this " \
322 "platform", #action, #type); \
323 break; \
324 case TYPE_IS_LITTLE_ENDIAN: \
325 Perl_croak(aTHX_ "Can't %s little-endian %ss on this " \
326 "platform", #action, #type); \
327 break; \
328 default: \
329 break; \
330 } \
331 } STMT_END
332
333# if PTRSIZE == INTSIZE
334# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, i, int, void)
335# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, i, int, void)
336# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, i, int, char)
337# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, i, int, char)
338# elif PTRSIZE == LONGSIZE
339# if LONGSIZE < IVSIZE && IVSIZE == 8
340# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, 64, IV, void)
341# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, 64, IV, void)
342# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, 64, IV, char)
343# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, 64, IV, char)
344# else
345# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, IV, void)
346# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, IV, void)
347# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, IV, char)
348# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, IV, char)
349# endif
350# elif PTRSIZE == IVSIZE
351# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, IV, void)
352# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, IV, void)
353# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, IV, char)
354# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, IV, char)
355# else
356# define DO_BO_UNPACK_P(var) BO_CANT_DOIT(unpack, pointer)
357# define DO_BO_PACK_P(var) BO_CANT_DOIT(pack, pointer)
358# define DO_BO_UNPACK_PC(var) BO_CANT_DOIT(unpack, pointer)
359# define DO_BO_PACK_PC(var) BO_CANT_DOIT(pack, pointer)
360# endif
361
362# if defined(my_htolen) && defined(my_letohn) && \
363 defined(my_htoben) && defined(my_betohn)
364# define DO_BO_UNPACK_N(var, type) \
365 STMT_START { \
366 switch (TYPE_ENDIANNESS(datumtype)) { \
367 case TYPE_IS_BIG_ENDIAN: my_betohn(&var, sizeof(type)); break;\
368 case TYPE_IS_LITTLE_ENDIAN: my_letohn(&var, sizeof(type)); break;\
369 default: break; \
370 } \
371 } STMT_END
372
373# define DO_BO_PACK_N(var, type) \
374 STMT_START { \
375 switch (TYPE_ENDIANNESS(datumtype)) { \
376 case TYPE_IS_BIG_ENDIAN: my_htoben(&var, sizeof(type)); break;\
377 case TYPE_IS_LITTLE_ENDIAN: my_htolen(&var, sizeof(type)); break;\
378 default: break; \
379 } \
380 } STMT_END
381# else
382# define DO_BO_UNPACK_N(var, type) BO_CANT_DOIT(unpack, type)
383# define DO_BO_PACK_N(var, type) BO_CANT_DOIT(pack, type)
384# endif
385
386#endif /* PERL_PACK_CAN_BYTEORDER */
387
388#define PACK_SIZE_CANNOT_CSUM 0x80
389#define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
390#define PACK_SIZE_MASK 0x3F
391
392/* These tables are regenerated by genpacksizetables.pl (and then hand pasted
393 in). You're unlikely ever to need to regenerate them. */
394
395#if TYPE_IS_SHRIEKING != 0x100
396 ++++shriek offset should be 256
397#endif
398
399typedef U8 packprops_t;
400#if 'J'-'I' == 1
401/* ASCII */
402STATIC const packprops_t packprops[512] = {
403 /* normal */
404 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
405 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
406 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
407 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
408 0, 0, 0,
409 /* C */ sizeof(unsigned char),
410#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
411 /* D */ LONG_DOUBLESIZE,
412#else
413 0,
414#endif
415 0,
416 /* F */ NVSIZE,
417 0, 0,
418 /* I */ sizeof(unsigned int),
419 /* J */ UVSIZE,
420 0,
421 /* L */ SIZE32,
422 0,
423 /* N */ SIZE32,
424 0, 0,
425#if defined(HAS_QUAD)
426 /* Q */ sizeof(Uquad_t),
427#else
428 0,
429#endif
430 0,
431 /* S */ SIZE16,
432 0,
433 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
434 /* V */ SIZE32,
435 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
436 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
437 /* c */ sizeof(char),
438 /* d */ sizeof(double),
439 0,
440 /* f */ sizeof(float),
441 0, 0,
442 /* i */ sizeof(int),
443 /* j */ IVSIZE,
444 0,
445 /* l */ SIZE32,
446 0,
447 /* n */ SIZE16,
448 0,
449 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
450#if defined(HAS_QUAD)
451 /* q */ sizeof(Quad_t),
452#else
453 0,
454#endif
455 0,
456 /* s */ SIZE16,
457 0, 0,
458 /* v */ SIZE16,
459 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
460 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
461 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
462 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
463 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
464 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
465 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
466 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
467 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
468 0, 0, 0, 0, 0, 0, 0, 0,
469 /* shrieking */
470 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
471 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
472 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
473 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
474 0, 0, 0, 0, 0, 0, 0, 0, 0,
475 /* I */ sizeof(unsigned int),
476 0, 0,
477 /* L */ sizeof(unsigned long),
478 0,
479#if defined(PERL_PACK_CAN_SHRIEKSIGN)
480 /* N */ SIZE32,
481#else
482 0,
483#endif
484 0, 0, 0, 0,
485 /* S */ sizeof(unsigned short),
486 0, 0,
487#if defined(PERL_PACK_CAN_SHRIEKSIGN)
488 /* V */ SIZE32,
489#else
490 0,
491#endif
492 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
493 0, 0,
494 /* i */ sizeof(int),
495 0, 0,
496 /* l */ sizeof(long),
497 0,
498#if defined(PERL_PACK_CAN_SHRIEKSIGN)
499 /* n */ SIZE16,
500#else
501 0,
502#endif
503 0, 0, 0, 0,
504 /* s */ sizeof(short),
505 0, 0,
506#if defined(PERL_PACK_CAN_SHRIEKSIGN)
507 /* v */ SIZE16,
508#else
509 0,
510#endif
511 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
512 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
513 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
514 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
515 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
516 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
517 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
518 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
519 0, 0, 0, 0, 0, 0, 0, 0, 0
520};
521#else
522/* EBCDIC (or bust) */
523STATIC const packprops_t packprops[512] = {
524 /* normal */
525 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
526 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
527 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
528 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
529 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
530 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
531 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
532 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
533 0, 0, 0,
534 /* c */ sizeof(char),
535 /* d */ sizeof(double),
536 0,
537 /* f */ sizeof(float),
538 0, 0,
539 /* i */ sizeof(int),
540 0, 0, 0, 0, 0, 0, 0,
541 /* j */ IVSIZE,
542 0,
543 /* l */ SIZE32,
544 0,
545 /* n */ SIZE16,
546 0,
547 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
548#if defined(HAS_QUAD)
549 /* q */ sizeof(Quad_t),
550#else
551 0,
552#endif
553 0, 0, 0, 0, 0, 0, 0, 0, 0,
554 /* s */ SIZE16,
555 0, 0,
556 /* v */ SIZE16,
557 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
558 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
559 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
560 /* C */ sizeof(unsigned char),
561#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
562 /* D */ LONG_DOUBLESIZE,
563#else
564 0,
565#endif
566 0,
567 /* F */ NVSIZE,
568 0, 0,
569 /* I */ sizeof(unsigned int),
570 0, 0, 0, 0, 0, 0, 0,
571 /* J */ UVSIZE,
572 0,
573 /* L */ SIZE32,
574 0,
575 /* N */ SIZE32,
576 0, 0,
577#if defined(HAS_QUAD)
578 /* Q */ sizeof(Uquad_t),
579#else
580 0,
581#endif
582 0, 0, 0, 0, 0, 0, 0, 0, 0,
583 /* S */ SIZE16,
584 0,
585 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
586 /* V */ SIZE32,
587 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
588 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
589 0, 0, 0, 0, 0, 0, 0, 0, 0,
590 /* shrieking */
591 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
592 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
593 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
594 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
595 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
596 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
597 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
598 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
599 0, 0, 0, 0, 0, 0, 0, 0, 0,
600 /* i */ sizeof(int),
601 0, 0, 0, 0, 0, 0, 0, 0, 0,
602 /* l */ sizeof(long),
603 0,
604#if defined(PERL_PACK_CAN_SHRIEKSIGN)
605 /* n */ SIZE16,
606#else
607 0,
608#endif
609 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
610 /* s */ sizeof(short),
611 0, 0,
612#if defined(PERL_PACK_CAN_SHRIEKSIGN)
613 /* v */ SIZE16,
614#else
615 0,
616#endif
617 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
618 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
619 0, 0, 0,
620 /* I */ sizeof(unsigned int),
621 0, 0, 0, 0, 0, 0, 0, 0, 0,
622 /* L */ sizeof(unsigned long),
623 0,
624#if defined(PERL_PACK_CAN_SHRIEKSIGN)
625 /* N */ SIZE32,
626#else
627 0,
628#endif
629 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
630 /* S */ sizeof(unsigned short),
631 0, 0,
632#if defined(PERL_PACK_CAN_SHRIEKSIGN)
633 /* V */ SIZE32,
634#else
635 0,
636#endif
637 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
638 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
639};
640#endif
641
642STATIC U8
643uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
644{
645 STRLEN retlen;
646 UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
647 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
648 /* We try to process malformed UTF-8 as much as possible (preferably with
649 warnings), but these two mean we make no progress in the string and
650 might enter an infinite loop */
651 if (retlen == (STRLEN) -1 || retlen == 0)
652 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
653 (int) TYPE_NO_MODIFIERS(datumtype));
654 if (val >= 0x100) {
655 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
656 "Character in '%c' format wrapped in unpack",
657 (int) TYPE_NO_MODIFIERS(datumtype));
658 val &= 0xff;
659 }
660 *s += retlen;
661 return (U8)val;
662}
663
664#define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
665 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
666 *(U8 *)(s)++)
667
668STATIC bool
669uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
670{
671 UV val;
672 STRLEN retlen;
673 const char *from = *s;
674 int bad = 0;
675 const U32 flags = ckWARN(WARN_UTF8) ?
676 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
677 for (;buf_len > 0; buf_len--) {
678 if (from >= end) return FALSE;
679 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
680 if (retlen == (STRLEN) -1 || retlen == 0) {
681 from += UTF8SKIP(from);
682 bad |= 1;
683 } else from += retlen;
684 if (val >= 0x100) {
685 bad |= 2;
686 val &= 0xff;
687 }
688 *(U8 *)buf++ = (U8)val;
689 }
690 /* We have enough characters for the buffer. Did we have problems ? */
691 if (bad) {
692 if (bad & 1) {
693 /* Rewalk the string fragment while warning */
694 const char *ptr;
695 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
696 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
697 if (ptr >= end) break;
698 utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
699 }
700 if (from > end) from = end;
701 }
702 if ((bad & 2))
703 Perl_ck_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
704 WARN_PACK : WARN_UNPACK),
705 "Character(s) in '%c' format wrapped in %s",
706 (int) TYPE_NO_MODIFIERS(datumtype),
707 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
708 }
709 *s = from;
710 return TRUE;
711}
712
713STATIC bool
714next_uni_uu(pTHX_ const char **s, const char *end, I32 *out)
715{
716 dVAR;
717 STRLEN retlen;
718 const UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
719 if (val >= 0x100 || !ISUUCHAR(val) ||
720 retlen == (STRLEN) -1 || retlen == 0) {
721 *out = 0;
722 return FALSE;
723 }
724 *out = PL_uudmap[val] & 077;
725 *s += retlen;
726 return TRUE;
727}
728
729STATIC char *
730S_bytes_to_uni(const U8 *start, STRLEN len, char *dest) {
731 const U8 * const end = start + len;
732
733 PERL_ARGS_ASSERT_BYTES_TO_UNI;
734
735 while (start < end) {
736 const UV uv = NATIVE_TO_ASCII(*start);
737 if (UNI_IS_INVARIANT(uv))
738 *dest++ = (char)(U8)UTF_TO_NATIVE(uv);
739 else {
740 *dest++ = (char)(U8)UTF8_EIGHT_BIT_HI(uv);
741 *dest++ = (char)(U8)UTF8_EIGHT_BIT_LO(uv);
742 }
743 start++;
744 }
745 return dest;
746}
747
748#define PUSH_BYTES(utf8, cur, buf, len) \
749STMT_START { \
750 if (utf8) \
751 (cur) = bytes_to_uni((U8 *) buf, len, (cur)); \
752 else { \
753 Copy(buf, cur, len, char); \
754 (cur) += (len); \
755 } \
756} STMT_END
757
758#define GROWING(utf8, cat, start, cur, in_len) \
759STMT_START { \
760 STRLEN glen = (in_len); \
761 if (utf8) glen *= UTF8_EXPAND; \
762 if ((cur) + glen >= (start) + SvLEN(cat)) { \
763 (start) = sv_exp_grow(cat, glen); \
764 (cur) = (start) + SvCUR(cat); \
765 } \
766} STMT_END
767
768#define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
769STMT_START { \
770 const STRLEN glen = (in_len); \
771 STRLEN gl = glen; \
772 if (utf8) gl *= UTF8_EXPAND; \
773 if ((cur) + gl >= (start) + SvLEN(cat)) { \
774 *cur = '\0'; \
775 SvCUR_set((cat), (cur) - (start)); \
776 (start) = sv_exp_grow(cat, gl); \
777 (cur) = (start) + SvCUR(cat); \
778 } \
779 PUSH_BYTES(utf8, cur, buf, glen); \
780} STMT_END
781
782#define PUSH_BYTE(utf8, s, byte) \
783STMT_START { \
784 if (utf8) { \
785 const U8 au8 = (byte); \
786 (s) = bytes_to_uni(&au8, 1, (s)); \
787 } else *(U8 *)(s)++ = (byte); \
788} STMT_END
789
790/* Only to be used inside a loop (see the break) */
791#define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
792STMT_START { \
793 STRLEN retlen; \
794 if (str >= end) break; \
795 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
796 if (retlen == (STRLEN) -1 || retlen == 0) { \
797 *cur = '\0'; \
798 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
799 } \
800 str += retlen; \
801} STMT_END
802
803static const char *_action( const tempsym_t* symptr )
804{
805 return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
806}
807
808/* Returns the sizeof() struct described by pat */
809STATIC I32
810S_measure_struct(pTHX_ tempsym_t* symptr)
811{
812 I32 total = 0;
813
814 PERL_ARGS_ASSERT_MEASURE_STRUCT;
815
816 while (next_symbol(symptr)) {
817 I32 len;
818 int size;
819
820 switch (symptr->howlen) {
821 case e_star:
822 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
823 _action( symptr ) );
824 break;
825 default:
826 /* e_no_len and e_number */
827 len = symptr->length;
828 break;
829 }
830
831 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
832 if (!size) {
833 int star;
834 /* endianness doesn't influence the size of a type */
835 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
836 default:
837 Perl_croak(aTHX_ "Invalid type '%c' in %s",
838 (int)TYPE_NO_MODIFIERS(symptr->code),
839 _action( symptr ) );
840#ifdef PERL_PACK_CAN_SHRIEKSIGN
841 case '.' | TYPE_IS_SHRIEKING:
842 case '@' | TYPE_IS_SHRIEKING:
843#endif
844 case '@':
845 case '.':
846 case '/':
847 case 'U': /* XXXX Is it correct? */
848 case 'w':
849 case 'u':
850 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
851 (int) TYPE_NO_MODIFIERS(symptr->code),
852 _action( symptr ) );
853 case '%':
854 size = 0;
855 break;
856 case '(':
857 {
858 tempsym_t savsym = *symptr;
859 symptr->patptr = savsym.grpbeg;
860 symptr->patend = savsym.grpend;
861 /* XXXX Theoretically, we need to measure many times at
862 different positions, since the subexpression may contain
863 alignment commands, but be not of aligned length.
864 Need to detect this and croak(). */
865 size = measure_struct(symptr);
866 *symptr = savsym;
867 break;
868 }
869 case 'X' | TYPE_IS_SHRIEKING:
870 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
871 */
872 if (!len) /* Avoid division by 0 */
873 len = 1;
874 len = total % len; /* Assumed: the start is aligned. */
875 /* FALL THROUGH */
876 case 'X':
877 size = -1;
878 if (total < len)
879 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
880 break;
881 case 'x' | TYPE_IS_SHRIEKING:
882 if (!len) /* Avoid division by 0 */
883 len = 1;
884 star = total % len; /* Assumed: the start is aligned. */
885 if (star) /* Other portable ways? */
886 len = len - star;
887 else
888 len = 0;
889 /* FALL THROUGH */
890 case 'x':
891 case 'A':
892 case 'Z':
893 case 'a':
894 size = 1;
895 break;
896 case 'B':
897 case 'b':
898 len = (len + 7)/8;
899 size = 1;
900 break;
901 case 'H':
902 case 'h':
903 len = (len + 1)/2;
904 size = 1;
905 break;
906
907 case 'P':
908 len = 1;
909 size = sizeof(char*);
910 break;
911 }
912 }
913 total += len * size;
914 }
915 return total;
916}
917
918
919/* locate matching closing parenthesis or bracket
920 * returns char pointer to char after match, or NULL
921 */
922STATIC const char *
923S_group_end(pTHX_ register const char *patptr, register const char *patend, char ender)
924{
925 PERL_ARGS_ASSERT_GROUP_END;
926
927 while (patptr < patend) {
928 const char c = *patptr++;
929
930 if (isSPACE(c))
931 continue;
932 else if (c == ender)
933 return patptr-1;
934 else if (c == '#') {
935 while (patptr < patend && *patptr != '\n')
936 patptr++;
937 continue;
938 } else if (c == '(')
939 patptr = group_end(patptr, patend, ')') + 1;
940 else if (c == '[')
941 patptr = group_end(patptr, patend, ']') + 1;
942 }
943 Perl_croak(aTHX_ "No group ending character '%c' found in template",
944 ender);
945 return 0;
946}
947
948
949/* Convert unsigned decimal number to binary.
950 * Expects a pointer to the first digit and address of length variable
951 * Advances char pointer to 1st non-digit char and returns number
952 */
953STATIC const char *
954S_get_num(pTHX_ register const char *patptr, I32 *lenptr )
955{
956 I32 len = *patptr++ - '0';
957
958 PERL_ARGS_ASSERT_GET_NUM;
959
960 while (isDIGIT(*patptr)) {
961 if (len >= 0x7FFFFFFF/10)
962 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
963 len = (len * 10) + (*patptr++ - '0');
964 }
965 *lenptr = len;
966 return patptr;
967}
968
969/* The marvellous template parsing routine: Using state stored in *symptr,
970 * locates next template code and count
971 */
972STATIC bool
973S_next_symbol(pTHX_ tempsym_t* symptr )
974{
975 const char* patptr = symptr->patptr;
976 const char* const patend = symptr->patend;
977
978 PERL_ARGS_ASSERT_NEXT_SYMBOL;
979
980 symptr->flags &= ~FLAG_SLASH;
981
982 while (patptr < patend) {
983 if (isSPACE(*patptr))
984 patptr++;
985 else if (*patptr == '#') {
986 patptr++;
987 while (patptr < patend && *patptr != '\n')
988 patptr++;
989 if (patptr < patend)
990 patptr++;
991 } else {
992 /* We should have found a template code */
993 I32 code = *patptr++ & 0xFF;
994 U32 inherited_modifiers = 0;
995
996 if (code == ','){ /* grandfather in commas but with a warning */
997 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
998 symptr->flags |= FLAG_COMMA;
999 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
1000 "Invalid type ',' in %s", _action( symptr ) );
1001 }
1002 continue;
1003 }
1004
1005 /* for '(', skip to ')' */
1006 if (code == '(') {
1007 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
1008 Perl_croak(aTHX_ "()-group starts with a count in %s",
1009 _action( symptr ) );
1010 symptr->grpbeg = patptr;
1011 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
1012 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
1013 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
1014 _action( symptr ) );
1015 }
1016
1017 /* look for group modifiers to inherit */
1018 if (TYPE_ENDIANNESS(symptr->flags)) {
1019 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
1020 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
1021 }
1022
1023 /* look for modifiers */
1024 while (patptr < patend) {
1025 const char *allowed;
1026 I32 modifier;
1027 switch (*patptr) {
1028 case '!':
1029 modifier = TYPE_IS_SHRIEKING;
1030 allowed = SHRIEKING_ALLOWED_TYPES;
1031 break;
1032#ifdef PERL_PACK_CAN_BYTEORDER
1033 case '>':
1034 modifier = TYPE_IS_BIG_ENDIAN;
1035 allowed = ENDIANNESS_ALLOWED_TYPES;
1036 break;
1037 case '<':
1038 modifier = TYPE_IS_LITTLE_ENDIAN;
1039 allowed = ENDIANNESS_ALLOWED_TYPES;
1040 break;
1041#endif /* PERL_PACK_CAN_BYTEORDER */
1042 default:
1043 allowed = "";
1044 modifier = 0;
1045 break;
1046 }
1047
1048 if (modifier == 0)
1049 break;
1050
1051 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
1052 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
1053 allowed, _action( symptr ) );
1054
1055 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
1056 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
1057 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
1058 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
1059 TYPE_ENDIANNESS_MASK)
1060 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
1061 *patptr, _action( symptr ) );
1062
1063 if ((code & modifier)) {
1064 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
1065 "Duplicate modifier '%c' after '%c' in %s",
1066 *patptr, (int) TYPE_NO_MODIFIERS(code),
1067 _action( symptr ) );
1068 }
1069
1070 code |= modifier;
1071 patptr++;
1072 }
1073
1074 /* inherit modifiers */
1075 code |= inherited_modifiers;
1076
1077 /* look for count and/or / */
1078 if (patptr < patend) {
1079 if (isDIGIT(*patptr)) {
1080 patptr = get_num( patptr, &symptr->length );
1081 symptr->howlen = e_number;
1082
1083 } else if (*patptr == '*') {
1084 patptr++;
1085 symptr->howlen = e_star;
1086
1087 } else if (*patptr == '[') {
1088 const char* lenptr = ++patptr;
1089 symptr->howlen = e_number;
1090 patptr = group_end( patptr, patend, ']' ) + 1;
1091 /* what kind of [] is it? */
1092 if (isDIGIT(*lenptr)) {
1093 lenptr = get_num( lenptr, &symptr->length );
1094 if( *lenptr != ']' )
1095 Perl_croak(aTHX_ "Malformed integer in [] in %s",
1096 _action( symptr ) );
1097 } else {
1098 tempsym_t savsym = *symptr;
1099 symptr->patend = patptr-1;
1100 symptr->patptr = lenptr;
1101 savsym.length = measure_struct(symptr);
1102 *symptr = savsym;
1103 }
1104 } else {
1105 symptr->howlen = e_no_len;
1106 symptr->length = 1;
1107 }
1108
1109 /* try to find / */
1110 while (patptr < patend) {
1111 if (isSPACE(*patptr))
1112 patptr++;
1113 else if (*patptr == '#') {
1114 patptr++;
1115 while (patptr < patend && *patptr != '\n')
1116 patptr++;
1117 if (patptr < patend)
1118 patptr++;
1119 } else {
1120 if (*patptr == '/') {
1121 symptr->flags |= FLAG_SLASH;
1122 patptr++;
1123 if (patptr < patend &&
1124 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
1125 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
1126 _action( symptr ) );
1127 }
1128 break;
1129 }
1130 }
1131 } else {
1132 /* at end - no count, no / */
1133 symptr->howlen = e_no_len;
1134 symptr->length = 1;
1135 }
1136
1137 symptr->code = code;
1138 symptr->patptr = patptr;
1139 return TRUE;
1140 }
1141 }
1142 symptr->patptr = patptr;
1143 return FALSE;
1144}
1145
1146/*
1147 There is no way to cleanly handle the case where we should process the
1148 string per byte in its upgraded form while it's really in downgraded form
1149 (e.g. estimates like strend-s as an upper bound for the number of
1150 characters left wouldn't work). So if we foresee the need of this
1151 (pattern starts with U or contains U0), we want to work on the encoded
1152 version of the string. Users are advised to upgrade their pack string
1153 themselves if they need to do a lot of unpacks like this on it
1154*/
1155STATIC bool
1156need_utf8(const char *pat, const char *patend)
1157{
1158 bool first = TRUE;
1159
1160 PERL_ARGS_ASSERT_NEED_UTF8;
1161
1162 while (pat < patend) {
1163 if (pat[0] == '#') {
1164 pat++;
1165 pat = (const char *) memchr(pat, '\n', patend-pat);
1166 if (!pat) return FALSE;
1167 } else if (pat[0] == 'U') {
1168 if (first || pat[1] == '0') return TRUE;
1169 } else first = FALSE;
1170 pat++;
1171 }
1172 return FALSE;
1173}
1174
1175STATIC char
1176first_symbol(const char *pat, const char *patend) {
1177 PERL_ARGS_ASSERT_FIRST_SYMBOL;
1178
1179 while (pat < patend) {
1180 if (pat[0] != '#') return pat[0];
1181 pat++;
1182 pat = (const char *) memchr(pat, '\n', patend-pat);
1183 if (!pat) return 0;
1184 pat++;
1185 }
1186 return 0;
1187}
1188
1189/*
1190=for apidoc unpackstring
1191
1192The engine implementing unpack() Perl function. C<unpackstring> puts the
1193extracted list items on the stack and returns the number of elements.
1194Issue C<PUTBACK> before and C<SPAGAIN> after the call to this function.
1195
1196=cut */
1197
1198I32
1199Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
1200{
1201 tempsym_t sym;
1202
1203 PERL_ARGS_ASSERT_UNPACKSTRING;
1204
1205 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
1206 else if (need_utf8(pat, patend)) {
1207 /* We probably should try to avoid this in case a scalar context call
1208 wouldn't get to the "U0" */
1209 STRLEN len = strend - s;
1210 s = (char *) bytes_to_utf8((U8 *) s, &len);
1211 SAVEFREEPV(s);
1212 strend = s + len;
1213 flags |= FLAG_DO_UTF8;
1214 }
1215
1216 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
1217 flags |= FLAG_PARSE_UTF8;
1218
1219 TEMPSYM_INIT(&sym, pat, patend, flags);
1220
1221 return unpack_rec(&sym, s, s, strend, NULL );
1222}
1223
1224STATIC I32
1225S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
1226{
1227 dVAR; dSP;
1228 SV *sv = NULL;
1229 const I32 start_sp_offset = SP - PL_stack_base;
1230 howlen_t howlen;
1231 I32 checksum = 0;
1232 UV cuv = 0;
1233 NV cdouble = 0.0;
1234 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
1235 bool beyond = FALSE;
1236 bool explicit_length;
1237 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
1238 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
1239
1240 PERL_ARGS_ASSERT_UNPACK_REC;
1241
1242 symptr->strbeg = s - strbeg;
1243
1244 while (next_symbol(symptr)) {
1245 packprops_t props;
1246 I32 len;
1247 I32 datumtype = symptr->code;
1248 /* do first one only unless in list context
1249 / is implemented by unpacking the count, then popping it from the
1250 stack, so must check that we're not in the middle of a / */
1251 if ( unpack_only_one
1252 && (SP - PL_stack_base == start_sp_offset + 1)
1253 && (datumtype != '/') ) /* XXX can this be omitted */
1254 break;
1255
1256 switch (howlen = symptr->howlen) {
1257 case e_star:
1258 len = strend - strbeg; /* long enough */
1259 break;
1260 default:
1261 /* e_no_len and e_number */
1262 len = symptr->length;
1263 break;
1264 }
1265
1266 explicit_length = TRUE;
1267 redo_switch:
1268 beyond = s >= strend;
1269
1270 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
1271 if (props) {
1272 /* props nonzero means we can process this letter. */
1273 const long size = props & PACK_SIZE_MASK;
1274 const long howmany = (strend - s) / size;
1275 if (len > howmany)
1276 len = howmany;
1277
1278 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
1279 if (len && unpack_only_one) len = 1;
1280 EXTEND(SP, len);
1281 EXTEND_MORTAL(len);
1282 }
1283 }
1284
1285 switch(TYPE_NO_ENDIANNESS(datumtype)) {
1286 default:
1287 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
1288
1289 case '%':
1290 if (howlen == e_no_len)
1291 len = 16; /* len is not specified */
1292 checksum = len;
1293 cuv = 0;
1294 cdouble = 0;
1295 continue;
1296 break;
1297 case '(':
1298 {
1299 tempsym_t savsym = *symptr;
1300 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
1301 symptr->flags |= group_modifiers;
1302 symptr->patend = savsym.grpend;
1303 symptr->previous = &savsym;
1304 symptr->level++;
1305 PUTBACK;
1306 if (len && unpack_only_one) len = 1;
1307 while (len--) {
1308 symptr->patptr = savsym.grpbeg;
1309 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
1310 else symptr->flags &= ~FLAG_PARSE_UTF8;
1311 unpack_rec(symptr, s, strbeg, strend, &s);
1312 if (s == strend && savsym.howlen == e_star)
1313 break; /* No way to continue */
1314 }
1315 SPAGAIN;
1316 savsym.flags = symptr->flags & ~group_modifiers;
1317 *symptr = savsym;
1318 break;
1319 }
1320#ifdef PERL_PACK_CAN_SHRIEKSIGN
1321 case '.' | TYPE_IS_SHRIEKING:
1322#endif
1323 case '.': {
1324 const char *from;
1325 SV *sv;
1326#ifdef PERL_PACK_CAN_SHRIEKSIGN
1327 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
1328#else /* PERL_PACK_CAN_SHRIEKSIGN */
1329 const bool u8 = utf8;
1330#endif
1331 if (howlen == e_star) from = strbeg;
1332 else if (len <= 0) from = s;
1333 else {
1334 tempsym_t *group = symptr;
1335
1336 while (--len && group) group = group->previous;
1337 from = group ? strbeg + group->strbeg : strbeg;
1338 }
1339 sv = from <= s ?
1340 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
1341 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
1342 mXPUSHs(sv);
1343 break;
1344 }
1345#ifdef PERL_PACK_CAN_SHRIEKSIGN
1346 case '@' | TYPE_IS_SHRIEKING:
1347#endif
1348 case '@':
1349 s = strbeg + symptr->strbeg;
1350#ifdef PERL_PACK_CAN_SHRIEKSIGN
1351 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
1352#else /* PERL_PACK_CAN_SHRIEKSIGN */
1353 if (utf8)
1354#endif
1355 {
1356 while (len > 0) {
1357 if (s >= strend)
1358 Perl_croak(aTHX_ "'@' outside of string in unpack");
1359 s += UTF8SKIP(s);
1360 len--;
1361 }
1362 if (s > strend)
1363 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
1364 } else {
1365 if (strend-s < len)
1366 Perl_croak(aTHX_ "'@' outside of string in unpack");
1367 s += len;
1368 }
1369 break;
1370 case 'X' | TYPE_IS_SHRIEKING:
1371 if (!len) /* Avoid division by 0 */
1372 len = 1;
1373 if (utf8) {
1374 const char *hop, *last;
1375 I32 l = len;
1376 hop = last = strbeg;
1377 while (hop < s) {
1378 hop += UTF8SKIP(hop);
1379 if (--l == 0) {
1380 last = hop;
1381 l = len;
1382 }
1383 }
1384 if (last > s)
1385 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1386 s = last;
1387 break;
1388 }
1389 len = (s - strbeg) % len;
1390 /* FALL THROUGH */
1391 case 'X':
1392 if (utf8) {
1393 while (len > 0) {
1394 if (s <= strbeg)
1395 Perl_croak(aTHX_ "'X' outside of string in unpack");
1396 while (--s, UTF8_IS_CONTINUATION(*s)) {
1397 if (s <= strbeg)
1398 Perl_croak(aTHX_ "'X' outside of string in unpack");
1399 }
1400 len--;
1401 }
1402 } else {
1403 if (len > s - strbeg)
1404 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1405 s -= len;
1406 }
1407 break;
1408 case 'x' | TYPE_IS_SHRIEKING: {
1409 I32 ai32;
1410 if (!len) /* Avoid division by 0 */
1411 len = 1;
1412 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1413 else ai32 = (s - strbeg) % len;
1414 if (ai32 == 0) break;
1415 len -= ai32;
1416 }
1417 /* FALL THROUGH */
1418 case 'x':
1419 if (utf8) {
1420 while (len>0) {
1421 if (s >= strend)
1422 Perl_croak(aTHX_ "'x' outside of string in unpack");
1423 s += UTF8SKIP(s);
1424 len--;
1425 }
1426 } else {
1427 if (len > strend - s)
1428 Perl_croak(aTHX_ "'x' outside of string in unpack");
1429 s += len;
1430 }
1431 break;
1432 case '/':
1433 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1434 break;
1435 case 'A':
1436 case 'Z':
1437 case 'a':
1438 if (checksum) {
1439 /* Preliminary length estimate is assumed done in 'W' */
1440 if (len > strend - s) len = strend - s;
1441 goto W_checksum;
1442 }
1443 if (utf8) {
1444 I32 l;
1445 const char *hop;
1446 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1447 if (hop >= strend) {
1448 if (hop > strend)
1449 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1450 break;
1451 }
1452 }
1453 if (hop > strend)
1454 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1455 len = hop - s;
1456 } else if (len > strend - s)
1457 len = strend - s;
1458
1459 if (datumtype == 'Z') {
1460 /* 'Z' strips stuff after first null */
1461 const char *ptr, *end;
1462 end = s + len;
1463 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
1464 sv = newSVpvn(s, ptr-s);
1465 if (howlen == e_star) /* exact for 'Z*' */
1466 len = ptr-s + (ptr != strend ? 1 : 0);
1467 } else if (datumtype == 'A') {
1468 /* 'A' strips both nulls and spaces */
1469 const char *ptr;
1470 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1471 for (ptr = s+len-1; ptr >= s; ptr--)
1472 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
1473 !is_utf8_space((U8 *) ptr)) break;
1474 if (ptr >= s) ptr += UTF8SKIP(ptr);
1475 else ptr++;
1476 if (ptr > s+len)
1477 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1478 } else {
1479 for (ptr = s+len-1; ptr >= s; ptr--)
1480 if (*ptr != 0 && !isSPACE(*ptr)) break;
1481 ptr++;
1482 }
1483 sv = newSVpvn(s, ptr-s);
1484 } else sv = newSVpvn(s, len);
1485
1486 if (utf8) {
1487 SvUTF8_on(sv);
1488 /* Undo any upgrade done due to need_utf8() */
1489 if (!(symptr->flags & FLAG_WAS_UTF8))
1490 sv_utf8_downgrade(sv, 0);
1491 }
1492 mXPUSHs(sv);
1493 s += len;
1494 break;
1495 case 'B':
1496 case 'b': {
1497 char *str;
1498 if (howlen == e_star || len > (strend - s) * 8)
1499 len = (strend - s) * 8;
1500 if (checksum) {
1501 if (utf8)
1502 while (len >= 8 && s < strend) {
1503 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
1504 len -= 8;
1505 }
1506 else
1507 while (len >= 8) {
1508 cuv += PL_bitcount[*(U8 *)s++];
1509 len -= 8;
1510 }
1511 if (len && s < strend) {
1512 U8 bits;
1513 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1514 if (datumtype == 'b')
1515 while (len-- > 0) {
1516 if (bits & 1) cuv++;
1517 bits >>= 1;
1518 }
1519 else
1520 while (len-- > 0) {
1521 if (bits & 0x80) cuv++;
1522 bits <<= 1;
1523 }
1524 }
1525 break;
1526 }
1527
1528 sv = sv_2mortal(newSV(len ? len : 1));
1529 SvPOK_on(sv);
1530 str = SvPVX(sv);
1531 if (datumtype == 'b') {
1532 U8 bits = 0;
1533 const I32 ai32 = len;
1534 for (len = 0; len < ai32; len++) {
1535 if (len & 7) bits >>= 1;
1536 else if (utf8) {
1537 if (s >= strend) break;
1538 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1539 } else bits = *(U8 *) s++;
1540 *str++ = bits & 1 ? '1' : '0';
1541 }
1542 } else {
1543 U8 bits = 0;
1544 const I32 ai32 = len;
1545 for (len = 0; len < ai32; len++) {
1546 if (len & 7) bits <<= 1;
1547 else if (utf8) {
1548 if (s >= strend) break;
1549 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1550 } else bits = *(U8 *) s++;
1551 *str++ = bits & 0x80 ? '1' : '0';
1552 }
1553 }
1554 *str = '\0';
1555 SvCUR_set(sv, str - SvPVX_const(sv));
1556 XPUSHs(sv);
1557 break;
1558 }
1559 case 'H':
1560 case 'h': {
1561 char *str = NULL;
1562 /* Preliminary length estimate, acceptable for utf8 too */
1563 if (howlen == e_star || len > (strend - s) * 2)
1564 len = (strend - s) * 2;
1565 if (!checksum) {
1566 sv = sv_2mortal(newSV(len ? len : 1));
1567 SvPOK_on(sv);
1568 str = SvPVX(sv);
1569 }
1570 if (datumtype == 'h') {
1571 U8 bits = 0;
1572 I32 ai32 = len;
1573 for (len = 0; len < ai32; len++) {
1574 if (len & 1) bits >>= 4;
1575 else if (utf8) {
1576 if (s >= strend) break;
1577 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1578 } else bits = * (U8 *) s++;
1579 if (!checksum)
1580 *str++ = PL_hexdigit[bits & 15];
1581 }
1582 } else {
1583 U8 bits = 0;
1584 const I32 ai32 = len;
1585 for (len = 0; len < ai32; len++) {
1586 if (len & 1) bits <<= 4;
1587 else if (utf8) {
1588 if (s >= strend) break;
1589 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1590 } else bits = *(U8 *) s++;
1591 if (!checksum)
1592 *str++ = PL_hexdigit[(bits >> 4) & 15];
1593 }
1594 }
1595 if (!checksum) {
1596 *str = '\0';
1597 SvCUR_set(sv, str - SvPVX_const(sv));
1598 XPUSHs(sv);
1599 }
1600 break;
1601 }
1602 case 'C':
1603 if (len == 0) {
1604 if (explicit_length)
1605 /* Switch to "character" mode */
1606 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1607 break;
1608 }
1609 /* FALL THROUGH */
1610 case 'c':
1611 while (len-- > 0 && s < strend) {
1612 int aint;
1613 if (utf8)
1614 {
1615 STRLEN retlen;
1616 aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1617 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1618 if (retlen == (STRLEN) -1 || retlen == 0)
1619 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1620 s += retlen;
1621 }
1622 else
1623 aint = *(U8 *)(s)++;
1624 if (aint >= 128 && datumtype != 'C') /* fake up signed chars */
1625 aint -= 256;
1626 if (!checksum)
1627 mPUSHi(aint);
1628 else if (checksum > bits_in_uv)
1629 cdouble += (NV)aint;
1630 else
1631 cuv += aint;
1632 }
1633 break;
1634 case 'W':
1635 W_checksum:
1636 if (utf8) {
1637 while (len-- > 0 && s < strend) {
1638 STRLEN retlen;
1639 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1640 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1641 if (retlen == (STRLEN) -1 || retlen == 0)
1642 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1643 s += retlen;
1644 if (!checksum)
1645 mPUSHu(val);
1646 else if (checksum > bits_in_uv)
1647 cdouble += (NV) val;
1648 else
1649 cuv += val;
1650 }
1651 } else if (!checksum)
1652 while (len-- > 0) {
1653 const U8 ch = *(U8 *) s++;
1654 mPUSHu(ch);
1655 }
1656 else if (checksum > bits_in_uv)
1657 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1658 else
1659 while (len-- > 0) cuv += *(U8 *) s++;
1660 break;
1661 case 'U':
1662 if (len == 0) {
1663 if (explicit_length && howlen != e_star) {
1664 /* Switch to "bytes in UTF-8" mode */
1665 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
1666 else
1667 /* Should be impossible due to the need_utf8() test */
1668 Perl_croak(aTHX_ "U0 mode on a byte string");
1669 }
1670 break;
1671 }
1672 if (len > strend - s) len = strend - s;
1673 if (!checksum) {
1674 if (len && unpack_only_one) len = 1;
1675 EXTEND(SP, len);
1676 EXTEND_MORTAL(len);
1677 }
1678 while (len-- > 0 && s < strend) {
1679 STRLEN retlen;
1680 UV auv;
1681 if (utf8) {
1682 U8 result[UTF8_MAXLEN];
1683 const char *ptr = s;
1684 STRLEN len;
1685 /* Bug: warns about bad utf8 even if we are short on bytes
1686 and will break out of the loop */
1687 if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1688 'U'))
1689 break;
1690 len = UTF8SKIP(result);
1691 if (!uni_to_bytes(aTHX_ &ptr, strend,
1692 (char *) &result[1], len-1, 'U')) break;
1693 auv = utf8n_to_uvuni(result, len, &retlen, UTF8_ALLOW_DEFAULT);
1694 s = ptr;
1695 } else {
1696 auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, UTF8_ALLOW_DEFAULT);
1697 if (retlen == (STRLEN) -1 || retlen == 0)
1698 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1699 s += retlen;
1700 }
1701 if (!checksum)
1702 mPUSHu(auv);
1703 else if (checksum > bits_in_uv)
1704 cdouble += (NV) auv;
1705 else
1706 cuv += auv;
1707 }
1708 break;
1709 case 's' | TYPE_IS_SHRIEKING:
1710#if SHORTSIZE != SIZE16
1711 while (len-- > 0) {
1712 short ashort;
1713 SHIFT_VAR(utf8, s, strend, ashort, datumtype);
1714 DO_BO_UNPACK(ashort, s);
1715 if (!checksum)
1716 mPUSHi(ashort);
1717 else if (checksum > bits_in_uv)
1718 cdouble += (NV)ashort;
1719 else
1720 cuv += ashort;
1721 }
1722 break;
1723#else
1724 /* Fallthrough! */
1725#endif
1726 case 's':
1727 while (len-- > 0) {
1728 I16 ai16;
1729
1730#if U16SIZE > SIZE16
1731 ai16 = 0;
1732#endif
1733 SHIFT16(utf8, s, strend, &ai16, datumtype);
1734 DO_BO_UNPACK(ai16, 16);
1735#if U16SIZE > SIZE16
1736 if (ai16 > 32767)
1737 ai16 -= 65536;
1738#endif
1739 if (!checksum)
1740 mPUSHi(ai16);
1741 else if (checksum > bits_in_uv)
1742 cdouble += (NV)ai16;
1743 else
1744 cuv += ai16;
1745 }
1746 break;
1747 case 'S' | TYPE_IS_SHRIEKING:
1748#if SHORTSIZE != SIZE16
1749 while (len-- > 0) {
1750 unsigned short aushort;
1751 SHIFT_VAR(utf8, s, strend, aushort, datumtype);
1752 DO_BO_UNPACK(aushort, s);
1753 if (!checksum)
1754 mPUSHu(aushort);
1755 else if (checksum > bits_in_uv)
1756 cdouble += (NV)aushort;
1757 else
1758 cuv += aushort;
1759 }
1760 break;
1761#else
1762 /* Fallthrough! */
1763#endif
1764 case 'v':
1765 case 'n':
1766 case 'S':
1767 while (len-- > 0) {
1768 U16 au16;
1769#if U16SIZE > SIZE16
1770 au16 = 0;
1771#endif
1772 SHIFT16(utf8, s, strend, &au16, datumtype);
1773 DO_BO_UNPACK(au16, 16);
1774#ifdef HAS_NTOHS
1775 if (datumtype == 'n')
1776 au16 = PerlSock_ntohs(au16);
1777#endif
1778#ifdef HAS_VTOHS
1779 if (datumtype == 'v')
1780 au16 = vtohs(au16);
1781#endif
1782 if (!checksum)
1783 mPUSHu(au16);
1784 else if (checksum > bits_in_uv)
1785 cdouble += (NV) au16;
1786 else
1787 cuv += au16;
1788 }
1789 break;
1790#ifdef PERL_PACK_CAN_SHRIEKSIGN
1791 case 'v' | TYPE_IS_SHRIEKING:
1792 case 'n' | TYPE_IS_SHRIEKING:
1793 while (len-- > 0) {
1794 I16 ai16;
1795# if U16SIZE > SIZE16
1796 ai16 = 0;
1797# endif
1798 SHIFT16(utf8, s, strend, &ai16, datumtype);
1799# ifdef HAS_NTOHS
1800 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
1801 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1802# endif /* HAS_NTOHS */
1803# ifdef HAS_VTOHS
1804 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
1805 ai16 = (I16) vtohs((U16) ai16);
1806# endif /* HAS_VTOHS */
1807 if (!checksum)
1808 mPUSHi(ai16);
1809 else if (checksum > bits_in_uv)
1810 cdouble += (NV) ai16;
1811 else
1812 cuv += ai16;
1813 }
1814 break;
1815#endif /* PERL_PACK_CAN_SHRIEKSIGN */
1816 case 'i':
1817 case 'i' | TYPE_IS_SHRIEKING:
1818 while (len-- > 0) {
1819 int aint;
1820 SHIFT_VAR(utf8, s, strend, aint, datumtype);
1821 DO_BO_UNPACK(aint, i);
1822 if (!checksum)
1823 mPUSHi(aint);
1824 else if (checksum > bits_in_uv)
1825 cdouble += (NV)aint;
1826 else
1827 cuv += aint;
1828 }
1829 break;
1830 case 'I':
1831 case 'I' | TYPE_IS_SHRIEKING:
1832 while (len-- > 0) {
1833 unsigned int auint;
1834 SHIFT_VAR(utf8, s, strend, auint, datumtype);
1835 DO_BO_UNPACK(auint, i);
1836 if (!checksum)
1837 mPUSHu(auint);
1838 else if (checksum > bits_in_uv)
1839 cdouble += (NV)auint;
1840 else
1841 cuv += auint;
1842 }
1843 break;
1844 case 'j':
1845 while (len-- > 0) {
1846 IV aiv;
1847 SHIFT_VAR(utf8, s, strend, aiv, datumtype);
1848#if IVSIZE == INTSIZE
1849 DO_BO_UNPACK(aiv, i);
1850#elif IVSIZE == LONGSIZE
1851 DO_BO_UNPACK(aiv, l);
1852#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
1853 DO_BO_UNPACK(aiv, 64);
1854#else
1855 Perl_croak(aTHX_ "'j' not supported on this platform");
1856#endif
1857 if (!checksum)
1858 mPUSHi(aiv);
1859 else if (checksum > bits_in_uv)
1860 cdouble += (NV)aiv;
1861 else
1862 cuv += aiv;
1863 }
1864 break;
1865 case 'J':
1866 while (len-- > 0) {
1867 UV auv;
1868 SHIFT_VAR(utf8, s, strend, auv, datumtype);
1869#if IVSIZE == INTSIZE
1870 DO_BO_UNPACK(auv, i);
1871#elif IVSIZE == LONGSIZE
1872 DO_BO_UNPACK(auv, l);
1873#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
1874 DO_BO_UNPACK(auv, 64);
1875#else
1876 Perl_croak(aTHX_ "'J' not supported on this platform");
1877#endif
1878 if (!checksum)
1879 mPUSHu(auv);
1880 else if (checksum > bits_in_uv)
1881 cdouble += (NV)auv;
1882 else
1883 cuv += auv;
1884 }
1885 break;
1886 case 'l' | TYPE_IS_SHRIEKING:
1887#if LONGSIZE != SIZE32
1888 while (len-- > 0) {
1889 long along;
1890 SHIFT_VAR(utf8, s, strend, along, datumtype);
1891 DO_BO_UNPACK(along, l);
1892 if (!checksum)
1893 mPUSHi(along);
1894 else if (checksum > bits_in_uv)
1895 cdouble += (NV)along;
1896 else
1897 cuv += along;
1898 }
1899 break;
1900#else
1901 /* Fallthrough! */
1902#endif
1903 case 'l':
1904 while (len-- > 0) {
1905 I32 ai32;
1906#if U32SIZE > SIZE32
1907 ai32 = 0;
1908#endif
1909 SHIFT32(utf8, s, strend, &ai32, datumtype);
1910 DO_BO_UNPACK(ai32, 32);
1911#if U32SIZE > SIZE32
1912 if (ai32 > 2147483647) ai32 -= 4294967296;
1913#endif
1914 if (!checksum)
1915 mPUSHi(ai32);
1916 else if (checksum > bits_in_uv)
1917 cdouble += (NV)ai32;
1918 else
1919 cuv += ai32;
1920 }
1921 break;
1922 case 'L' | TYPE_IS_SHRIEKING:
1923#if LONGSIZE != SIZE32
1924 while (len-- > 0) {
1925 unsigned long aulong;
1926 SHIFT_VAR(utf8, s, strend, aulong, datumtype);
1927 DO_BO_UNPACK(aulong, l);
1928 if (!checksum)
1929 mPUSHu(aulong);
1930 else if (checksum > bits_in_uv)
1931 cdouble += (NV)aulong;
1932 else
1933 cuv += aulong;
1934 }
1935 break;
1936#else
1937 /* Fall through! */
1938#endif
1939 case 'V':
1940 case 'N':
1941 case 'L':
1942 while (len-- > 0) {
1943 U32 au32;
1944#if U32SIZE > SIZE32
1945 au32 = 0;
1946#endif
1947 SHIFT32(utf8, s, strend, &au32, datumtype);
1948 DO_BO_UNPACK(au32, 32);
1949#ifdef HAS_NTOHL
1950 if (datumtype == 'N')
1951 au32 = PerlSock_ntohl(au32);
1952#endif
1953#ifdef HAS_VTOHL
1954 if (datumtype == 'V')
1955 au32 = vtohl(au32);
1956#endif
1957 if (!checksum)
1958 mPUSHu(au32);
1959 else if (checksum > bits_in_uv)
1960 cdouble += (NV)au32;
1961 else
1962 cuv += au32;
1963 }
1964 break;
1965#ifdef PERL_PACK_CAN_SHRIEKSIGN
1966 case 'V' | TYPE_IS_SHRIEKING:
1967 case 'N' | TYPE_IS_SHRIEKING:
1968 while (len-- > 0) {
1969 I32 ai32;
1970# if U32SIZE > SIZE32
1971 ai32 = 0;
1972# endif
1973 SHIFT32(utf8, s, strend, &ai32, datumtype);
1974# ifdef HAS_NTOHL
1975 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1976 ai32 = (I32)PerlSock_ntohl((U32)ai32);
1977# endif
1978# ifdef HAS_VTOHL
1979 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1980 ai32 = (I32)vtohl((U32)ai32);
1981# endif
1982 if (!checksum)
1983 mPUSHi(ai32);
1984 else if (checksum > bits_in_uv)
1985 cdouble += (NV)ai32;
1986 else
1987 cuv += ai32;
1988 }
1989 break;
1990#endif /* PERL_PACK_CAN_SHRIEKSIGN */
1991 case 'p':
1992 while (len-- > 0) {
1993 const char *aptr;
1994 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
1995 DO_BO_UNPACK_PC(aptr);
1996 /* newSVpv generates undef if aptr is NULL */
1997 mPUSHs(newSVpv(aptr, 0));
1998 }
1999 break;
2000 case 'w':
2001 {
2002 UV auv = 0;
2003 U32 bytes = 0;
2004
2005 while (len > 0 && s < strend) {
2006 U8 ch;
2007 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
2008 auv = (auv << 7) | (ch & 0x7f);
2009 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
2010 if (ch < 0x80) {
2011 bytes = 0;
2012 mPUSHu(auv);
2013 len--;
2014 auv = 0;
2015 continue;
2016 }
2017 if (++bytes >= sizeof(UV)) { /* promote to string */
2018 const char *t;
2019
2020 sv = Perl_newSVpvf(aTHX_ "%.*"UVuf, (int)TYPE_DIGITS(UV), auv);
2021 while (s < strend) {
2022 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
2023 sv = mul128(sv, (U8)(ch & 0x7f));
2024 if (!(ch & 0x80)) {
2025 bytes = 0;
2026 break;
2027 }
2028 }
2029 t = SvPV_nolen_const(sv);
2030 while (*t == '0')
2031 t++;
2032 sv_chop(sv, t);
2033 mPUSHs(sv);
2034 len--;
2035 auv = 0;
2036 }
2037 }
2038 if ((s >= strend) && bytes)
2039 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
2040 }
2041 break;
2042 case 'P':
2043 if (symptr->howlen == e_star)
2044 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
2045 EXTEND(SP, 1);
2046 if (s + sizeof(char*) <= strend) {
2047 char *aptr;
2048 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
2049 DO_BO_UNPACK_PC(aptr);
2050 /* newSVpvn generates undef if aptr is NULL */
2051 PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP));
2052 }
2053 break;
2054#ifdef HAS_QUAD
2055 case 'q':
2056 while (len-- > 0) {
2057 Quad_t aquad;
2058 SHIFT_VAR(utf8, s, strend, aquad, datumtype);
2059 DO_BO_UNPACK(aquad, 64);
2060 if (!checksum)
2061 mPUSHs(aquad >= IV_MIN && aquad <= IV_MAX ?
2062 newSViv((IV)aquad) : newSVnv((NV)aquad));
2063 else if (checksum > bits_in_uv)
2064 cdouble += (NV)aquad;
2065 else
2066 cuv += aquad;
2067 }
2068 break;
2069 case 'Q':
2070 while (len-- > 0) {
2071 Uquad_t auquad;
2072 SHIFT_VAR(utf8, s, strend, auquad, datumtype);
2073 DO_BO_UNPACK(auquad, 64);
2074 if (!checksum)
2075 mPUSHs(auquad <= UV_MAX ?
2076 newSVuv((UV)auquad) : newSVnv((NV)auquad));
2077 else if (checksum > bits_in_uv)
2078 cdouble += (NV)auquad;
2079 else
2080 cuv += auquad;
2081 }
2082 break;
2083#endif /* HAS_QUAD */
2084 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
2085 case 'f':
2086 while (len-- > 0) {
2087 float afloat;
2088 SHIFT_VAR(utf8, s, strend, afloat, datumtype);
2089 DO_BO_UNPACK_N(afloat, float);
2090 if (!checksum)
2091 mPUSHn(afloat);
2092 else
2093 cdouble += afloat;
2094 }
2095 break;
2096 case 'd':
2097 while (len-- > 0) {
2098 double adouble;
2099 SHIFT_VAR(utf8, s, strend, adouble, datumtype);
2100 DO_BO_UNPACK_N(adouble, double);
2101 if (!checksum)
2102 mPUSHn(adouble);
2103 else
2104 cdouble += adouble;
2105 }
2106 break;
2107 case 'F':
2108 while (len-- > 0) {
2109 NV_bytes anv;
2110 SHIFT_BYTES(utf8, s, strend, anv.bytes, sizeof(anv.bytes), datumtype);
2111 DO_BO_UNPACK_N(anv.nv, NV);
2112 if (!checksum)
2113 mPUSHn(anv.nv);
2114 else
2115 cdouble += anv.nv;
2116 }
2117 break;
2118#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2119 case 'D':
2120 while (len-- > 0) {
2121 ld_bytes aldouble;
2122 SHIFT_BYTES(utf8, s, strend, aldouble.bytes, sizeof(aldouble.bytes), datumtype);
2123 DO_BO_UNPACK_N(aldouble.ld, long double);
2124 if (!checksum)
2125 mPUSHn(aldouble.ld);
2126 else
2127 cdouble += aldouble.ld;
2128 }
2129 break;
2130#endif
2131 case 'u':
2132 if (!checksum) {
2133 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
2134 sv = sv_2mortal(newSV(l));
2135 if (l) SvPOK_on(sv);
2136 }
2137 if (utf8) {
2138 while (next_uni_uu(aTHX_ &s, strend, &len)) {
2139 I32 a, b, c, d;
2140 char hunk[3];
2141
2142 while (len > 0) {
2143 next_uni_uu(aTHX_ &s, strend, &a);
2144 next_uni_uu(aTHX_ &s, strend, &b);
2145 next_uni_uu(aTHX_ &s, strend, &c);
2146 next_uni_uu(aTHX_ &s, strend, &d);
2147 hunk[0] = (char)((a << 2) | (b >> 4));
2148 hunk[1] = (char)((b << 4) | (c >> 2));
2149 hunk[2] = (char)((c << 6) | d);
2150 if (!checksum)
2151 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2152 len -= 3;
2153 }
2154 if (s < strend) {
2155 if (*s == '\n') {
2156 s++;
2157 }
2158 else {
2159 /* possible checksum byte */
2160 const char *skip = s+UTF8SKIP(s);
2161 if (skip < strend && *skip == '\n')
2162 s = skip+1;
2163 }
2164 }
2165 }
2166 } else {
2167 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
2168 I32 a, b, c, d;
2169 char hunk[3];
2170
2171 len = PL_uudmap[*(U8*)s++] & 077;
2172 while (len > 0) {
2173 if (s < strend && ISUUCHAR(*s))
2174 a = PL_uudmap[*(U8*)s++] & 077;
2175 else
2176 a = 0;
2177 if (s < strend && ISUUCHAR(*s))
2178 b = PL_uudmap[*(U8*)s++] & 077;
2179 else
2180 b = 0;
2181 if (s < strend && ISUUCHAR(*s))
2182 c = PL_uudmap[*(U8*)s++] & 077;
2183 else
2184 c = 0;
2185 if (s < strend && ISUUCHAR(*s))
2186 d = PL_uudmap[*(U8*)s++] & 077;
2187 else
2188 d = 0;
2189 hunk[0] = (char)((a << 2) | (b >> 4));
2190 hunk[1] = (char)((b << 4) | (c >> 2));
2191 hunk[2] = (char)((c << 6) | d);
2192 if (!checksum)
2193 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2194 len -= 3;
2195 }
2196 if (*s == '\n')
2197 s++;
2198 else /* possible checksum byte */
2199 if (s + 1 < strend && s[1] == '\n')
2200 s += 2;
2201 }
2202 }
2203 if (!checksum)
2204 XPUSHs(sv);
2205 break;
2206 }
2207
2208 if (checksum) {
2209 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
2210 (checksum > bits_in_uv &&
2211 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
2212 NV trouble, anv;
2213
2214 anv = (NV) (1 << (checksum & 15));
2215 while (checksum >= 16) {
2216 checksum -= 16;
2217 anv *= 65536.0;
2218 }
2219 while (cdouble < 0.0)
2220 cdouble += anv;
2221 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
2222 sv = newSVnv(cdouble);
2223 }
2224 else {
2225 if (checksum < bits_in_uv) {
2226 UV mask = ((UV)1 << checksum) - 1;
2227 cuv &= mask;
2228 }
2229 sv = newSVuv(cuv);
2230 }
2231 mXPUSHs(sv);
2232 checksum = 0;
2233 }
2234
2235 if (symptr->flags & FLAG_SLASH){
2236 if (SP - PL_stack_base - start_sp_offset <= 0)
2237 break;
2238 if( next_symbol(symptr) ){
2239 if( symptr->howlen == e_number )
2240 Perl_croak(aTHX_ "Count after length/code in unpack" );
2241 if( beyond ){
2242 /* ...end of char buffer then no decent length available */
2243 Perl_croak(aTHX_ "length/code after end of string in unpack" );
2244 } else {
2245 /* take top of stack (hope it's numeric) */
2246 len = POPi;
2247 if( len < 0 )
2248 Perl_croak(aTHX_ "Negative '/' count in unpack" );
2249 }
2250 } else {
2251 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
2252 }
2253 datumtype = symptr->code;
2254 explicit_length = FALSE;
2255 goto redo_switch;
2256 }
2257 }
2258
2259 if (new_s)
2260 *new_s = s;
2261 PUTBACK;
2262 return SP - PL_stack_base - start_sp_offset;
2263}
2264
2265PP(pp_unpack)
2266{
2267 dVAR;
2268 dSP;
2269 dPOPPOPssrl;
2270 I32 gimme = GIMME_V;
2271 STRLEN llen;
2272 STRLEN rlen;
2273 const char *pat = SvPV_const(left, llen);
2274 const char *s = SvPV_const(right, rlen);
2275 const char *strend = s + rlen;
2276 const char *patend = pat + llen;
2277 I32 cnt;
2278
2279 PUTBACK;
2280 cnt = unpackstring(pat, patend, s, strend,
2281 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
2282 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
2283
2284 SPAGAIN;
2285 if ( !cnt && gimme == G_SCALAR )
2286 PUSHs(&PL_sv_undef);
2287 RETURN;
2288}
2289
2290STATIC U8 *
2291doencodes(U8 *h, const char *s, I32 len)
2292{
2293 *h++ = PL_uuemap[len];
2294 while (len > 2) {
2295 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2296 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
2297 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
2298 *h++ = PL_uuemap[(077 & (s[2] & 077))];
2299 s += 3;
2300 len -= 3;
2301 }
2302 if (len > 0) {
2303 const char r = (len > 1 ? s[1] : '\0');
2304 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2305 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
2306 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
2307 *h++ = PL_uuemap[0];
2308 }
2309 *h++ = '\n';
2310 return h;
2311}
2312
2313STATIC SV *
2314S_is_an_int(pTHX_ const char *s, STRLEN l)
2315{
2316 SV *result = newSVpvn(s, l);
2317 char *const result_c = SvPV_nolen(result); /* convenience */
2318 char *out = result_c;
2319 bool skip = 1;
2320 bool ignore = 0;
2321
2322 PERL_ARGS_ASSERT_IS_AN_INT;
2323
2324 while (*s) {
2325 switch (*s) {
2326 case ' ':
2327 break;
2328 case '+':
2329 if (!skip) {
2330 SvREFCNT_dec(result);
2331 return (NULL);
2332 }
2333 break;
2334 case '0':
2335 case '1':
2336 case '2':
2337 case '3':
2338 case '4':
2339 case '5':
2340 case '6':
2341 case '7':
2342 case '8':
2343 case '9':
2344 skip = 0;
2345 if (!ignore) {
2346 *(out++) = *s;
2347 }
2348 break;
2349 case '.':
2350 ignore = 1;
2351 break;
2352 default:
2353 SvREFCNT_dec(result);
2354 return (NULL);
2355 }
2356 s++;
2357 }
2358 *(out++) = '\0';
2359 SvCUR_set(result, out - result_c);
2360 return (result);
2361}
2362
2363/* pnum must be '\0' terminated */
2364STATIC int
2365S_div128(pTHX_ SV *pnum, bool *done)
2366{
2367 STRLEN len;
2368 char * const s = SvPV(pnum, len);
2369 char *t = s;
2370 int m = 0;
2371
2372 PERL_ARGS_ASSERT_DIV128;
2373
2374 *done = 1;
2375 while (*t) {
2376 const int i = m * 10 + (*t - '0');
2377 const int r = (i >> 7); /* r < 10 */
2378 m = i & 0x7F;
2379 if (r) {
2380 *done = 0;
2381 }
2382 *(t++) = '0' + r;
2383 }
2384 *(t++) = '\0';
2385 SvCUR_set(pnum, (STRLEN) (t - s));
2386 return (m);
2387}
2388
2389/*
2390=for apidoc packlist
2391
2392The engine implementing pack() Perl function.
2393
2394=cut
2395*/
2396
2397void
2398Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, register SV **beglist, SV **endlist )
2399{
2400 dVAR;
2401 tempsym_t sym;
2402
2403 PERL_ARGS_ASSERT_PACKLIST;
2404
2405 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
2406
2407 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
2408 Also make sure any UTF8 flag is loaded */
2409 SvPV_force_nolen(cat);
2410 if (DO_UTF8(cat))
2411 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
2412
2413 (void)pack_rec( cat, &sym, beglist, endlist );
2414}
2415
2416/* like sv_utf8_upgrade, but also repoint the group start markers */
2417STATIC void
2418marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2419 STRLEN len;
2420 tempsym_t *group;
2421 const char *from_ptr, *from_start, *from_end, **marks, **m;
2422 char *to_start, *to_ptr;
2423
2424 if (SvUTF8(sv)) return;
2425
2426 from_start = SvPVX_const(sv);
2427 from_end = from_start + SvCUR(sv);
2428 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2429 if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
2430 if (from_ptr == from_end) {
2431 /* Simple case: no character needs to be changed */
2432 SvUTF8_on(sv);
2433 return;
2434 }
2435
2436 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
2437 Newx(to_start, len, char);
2438 Copy(from_start, to_start, from_ptr-from_start, char);
2439 to_ptr = to_start + (from_ptr-from_start);
2440
2441 Newx(marks, sym_ptr->level+2, const char *);
2442 for (group=sym_ptr; group; group = group->previous)
2443 marks[group->level] = from_start + group->strbeg;
2444 marks[sym_ptr->level+1] = from_end+1;
2445 for (m = marks; *m < from_ptr; m++)
2446 *m = to_start + (*m-from_start);
2447
2448 for (;from_ptr < from_end; from_ptr++) {
2449 while (*m == from_ptr) *m++ = to_ptr;
2450 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
2451 }
2452 *to_ptr = 0;
2453
2454 while (*m == from_ptr) *m++ = to_ptr;
2455 if (m != marks + sym_ptr->level+1) {
2456 Safefree(marks);
2457 Safefree(to_start);
2458 Perl_croak(aTHX_ "panic: marks beyond string end, m=%p, marks=%p, "
2459 "level=%d", m, marks, sym_ptr->level);
2460 }
2461 for (group=sym_ptr; group; group = group->previous)
2462 group->strbeg = marks[group->level] - to_start;
2463 Safefree(marks);
2464
2465 if (SvOOK(sv)) {
2466 if (SvIVX(sv)) {
2467 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
2468 from_start -= SvIVX(sv);
2469 SvIV_set(sv, 0);
2470 }
2471 SvFLAGS(sv) &= ~SVf_OOK;
2472 }
2473 if (SvLEN(sv) != 0)
2474 Safefree(from_start);
2475 SvPV_set(sv, to_start);
2476 SvCUR_set(sv, to_ptr - to_start);
2477 SvLEN_set(sv, len);
2478 SvUTF8_on(sv);
2479}
2480
2481/* Exponential string grower. Makes string extension effectively O(n)
2482 needed says how many extra bytes we need (not counting the final '\0')
2483 Only grows the string if there is an actual lack of space
2484*/
2485STATIC char *
2486S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
2487 const STRLEN cur = SvCUR(sv);
2488 const STRLEN len = SvLEN(sv);
2489 STRLEN extend;
2490
2491 PERL_ARGS_ASSERT_SV_EXP_GROW;
2492
2493 if (len - cur > needed) return SvPVX(sv);
2494 extend = needed > len ? needed : len;
2495 return SvGROW(sv, len+extend+1);
2496}
2497
2498STATIC
2499SV **
2500S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
2501{
2502 dVAR;
2503 tempsym_t lookahead;
2504 I32 items = endlist - beglist;
2505 bool found = next_symbol(symptr);
2506 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
2507 bool warn_utf8 = ckWARN(WARN_UTF8);
2508
2509 PERL_ARGS_ASSERT_PACK_REC;
2510
2511 if (symptr->level == 0 && found && symptr->code == 'U') {
2512 marked_upgrade(aTHX_ cat, symptr);
2513 symptr->flags |= FLAG_DO_UTF8;
2514 utf8 = 0;
2515 }
2516 symptr->strbeg = SvCUR(cat);
2517
2518 while (found) {
2519 SV *fromstr;
2520 STRLEN fromlen;
2521 I32 len;
2522 SV *lengthcode = NULL;
2523 I32 datumtype = symptr->code;
2524 howlen_t howlen = symptr->howlen;
2525 char *start = SvPVX(cat);
2526 char *cur = start + SvCUR(cat);
2527
2528#define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2529
2530 switch (howlen) {
2531 case e_star:
2532 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2533 0 : items;
2534 break;
2535 default:
2536 /* e_no_len and e_number */
2537 len = symptr->length;
2538 break;
2539 }
2540
2541 if (len) {
2542 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
2543
2544 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2545 /* We can process this letter. */
2546 STRLEN size = props & PACK_SIZE_MASK;
2547 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2548 }
2549 }
2550
2551 /* Look ahead for next symbol. Do we have code/code? */
2552 lookahead = *symptr;
2553 found = next_symbol(&lookahead);
2554 if (symptr->flags & FLAG_SLASH) {
2555 IV count;
2556 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
2557 if (strchr("aAZ", lookahead.code)) {
2558 if (lookahead.howlen == e_number) count = lookahead.length;
2559 else {
2560 if (items > 0) {
2561 if (SvGAMAGIC(*beglist)) {
2562 /* Avoid reading the active data more than once
2563 by copying it to a temporary. */
2564 STRLEN len;
2565 const char *const pv = SvPV_const(*beglist, len);
2566 SV *const temp
2567 = newSVpvn_flags(pv, len,
2568 SVs_TEMP | SvUTF8(*beglist));
2569 *beglist = temp;
2570 }
2571 count = DO_UTF8(*beglist) ?
2572 sv_len_utf8(*beglist) : sv_len(*beglist);
2573 }
2574 else count = 0;
2575 if (lookahead.code == 'Z') count++;
2576 }
2577 } else {
2578 if (lookahead.howlen == e_number && lookahead.length < items)
2579 count = lookahead.length;
2580 else count = items;
2581 }
2582 lookahead.howlen = e_number;
2583 lookahead.length = count;
2584 lengthcode = sv_2mortal(newSViv(count));
2585 }
2586
2587 /* Code inside the switch must take care to properly update
2588 cat (CUR length and '\0' termination) if it updated *cur and
2589 doesn't simply leave using break */
2590 switch(TYPE_NO_ENDIANNESS(datumtype)) {
2591 default:
2592 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2593 (int) TYPE_NO_MODIFIERS(datumtype));
2594 case '%':
2595 Perl_croak(aTHX_ "'%%' may not be used in pack");
2596 {
2597 char *from;
2598#ifdef PERL_PACK_CAN_SHRIEKSIGN
2599 case '.' | TYPE_IS_SHRIEKING:
2600#endif
2601 case '.':
2602 if (howlen == e_star) from = start;
2603 else if (len == 0) from = cur;
2604 else {
2605 tempsym_t *group = symptr;
2606
2607 while (--len && group) group = group->previous;
2608 from = group ? start + group->strbeg : start;
2609 }
2610 fromstr = NEXTFROM;
2611 len = SvIV(fromstr);
2612 goto resize;
2613#ifdef PERL_PACK_CAN_SHRIEKSIGN
2614 case '@' | TYPE_IS_SHRIEKING:
2615#endif
2616 case '@':
2617 from = start + symptr->strbeg;
2618 resize:
2619#ifdef PERL_PACK_CAN_SHRIEKSIGN
2620 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
2621#else /* PERL_PACK_CAN_SHRIEKSIGN */
2622 if (utf8)
2623#endif
2624 if (len >= 0) {
2625 while (len && from < cur) {
2626 from += UTF8SKIP(from);
2627 len--;
2628 }
2629 if (from > cur)
2630 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2631 if (len) {
2632 /* Here we know from == cur */
2633 grow:
2634 GROWING(0, cat, start, cur, len);
2635 Zero(cur, len, char);
2636 cur += len;
2637 } else if (from < cur) {
2638 len = cur - from;
2639 goto shrink;
2640 } else goto no_change;
2641 } else {
2642 cur = from;
2643 len = -len;
2644 goto utf8_shrink;
2645 }
2646 else {
2647 len -= cur - from;
2648 if (len > 0) goto grow;
2649 if (len == 0) goto no_change;
2650 len = -len;
2651 goto shrink;
2652 }
2653 break;
2654 }
2655 case '(': {
2656 tempsym_t savsym = *symptr;
2657 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2658 symptr->flags |= group_modifiers;
2659 symptr->patend = savsym.grpend;
2660 symptr->level++;
2661 symptr->previous = &lookahead;
2662 while (len--) {
2663 U32 was_utf8;
2664 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2665 else symptr->flags &= ~FLAG_PARSE_UTF8;
2666 was_utf8 = SvUTF8(cat);
2667 symptr->patptr = savsym.grpbeg;
2668 beglist = pack_rec(cat, symptr, beglist, endlist);
2669 if (SvUTF8(cat) != was_utf8)
2670 /* This had better be an upgrade while in utf8==0 mode */
2671 utf8 = 1;
2672
2673 if (savsym.howlen == e_star && beglist == endlist)
2674 break; /* No way to continue */
2675 }
2676 items = endlist - beglist;
2677 lookahead.flags = symptr->flags & ~group_modifiers;
2678 goto no_change;
2679 }
2680 case 'X' | TYPE_IS_SHRIEKING:
2681 if (!len) /* Avoid division by 0 */
2682 len = 1;
2683 if (utf8) {
2684 char *hop, *last;
2685 I32 l = len;
2686 hop = last = start;
2687 while (hop < cur) {
2688 hop += UTF8SKIP(hop);
2689 if (--l == 0) {
2690 last = hop;
2691 l = len;
2692 }
2693 }
2694 if (last > cur)
2695 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2696 cur = last;
2697 break;
2698 }
2699 len = (cur-start) % len;
2700 /* FALL THROUGH */
2701 case 'X':
2702 if (utf8) {
2703 if (len < 1) goto no_change;
2704 utf8_shrink:
2705 while (len > 0) {
2706 if (cur <= start)
2707 Perl_croak(aTHX_ "'%c' outside of string in pack",
2708 (int) TYPE_NO_MODIFIERS(datumtype));
2709 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2710 if (cur <= start)
2711 Perl_croak(aTHX_ "'%c' outside of string in pack",
2712 (int) TYPE_NO_MODIFIERS(datumtype));
2713 }
2714 len--;
2715 }
2716 } else {
2717 shrink:
2718 if (cur - start < len)
2719 Perl_croak(aTHX_ "'%c' outside of string in pack",
2720 (int) TYPE_NO_MODIFIERS(datumtype));
2721 cur -= len;
2722 }
2723 if (cur < start+symptr->strbeg) {
2724 /* Make sure group starts don't point into the void */
2725 tempsym_t *group;
2726 const STRLEN length = cur-start;
2727 for (group = symptr;
2728 group && length < group->strbeg;
2729 group = group->previous) group->strbeg = length;
2730 lookahead.strbeg = length;
2731 }
2732 break;
2733 case 'x' | TYPE_IS_SHRIEKING: {
2734 I32 ai32;
2735 if (!len) /* Avoid division by 0 */
2736 len = 1;
2737 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
2738 else ai32 = (cur - start) % len;
2739 if (ai32 == 0) goto no_change;
2740 len -= ai32;
2741 }
2742 /* FALL THROUGH */
2743 case 'x':
2744 goto grow;
2745 case 'A':
2746 case 'Z':
2747 case 'a': {
2748 const char *aptr;
2749
2750 fromstr = NEXTFROM;
2751 aptr = SvPV_const(fromstr, fromlen);
2752 if (DO_UTF8(fromstr)) {
2753 const char *end, *s;
2754
2755 if (!utf8 && !SvUTF8(cat)) {
2756 marked_upgrade(aTHX_ cat, symptr);
2757 lookahead.flags |= FLAG_DO_UTF8;
2758 lookahead.strbeg = symptr->strbeg;
2759 utf8 = 1;
2760 start = SvPVX(cat);
2761 cur = start + SvCUR(cat);
2762 }
2763 if (howlen == e_star) {
2764 if (utf8) goto string_copy;
2765 len = fromlen+1;
2766 }
2767 s = aptr;
2768 end = aptr + fromlen;
2769 fromlen = datumtype == 'Z' ? len-1 : len;
2770 while ((I32) fromlen > 0 && s < end) {
2771 s += UTF8SKIP(s);
2772 fromlen--;
2773 }
2774 if (s > end)
2775 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2776 if (utf8) {
2777 len = fromlen;
2778 if (datumtype == 'Z') len++;
2779 fromlen = s-aptr;
2780 len += fromlen;
2781
2782 goto string_copy;
2783 }
2784 fromlen = len - fromlen;
2785 if (datumtype == 'Z') fromlen--;
2786 if (howlen == e_star) {
2787 len = fromlen;
2788 if (datumtype == 'Z') len++;
2789 }
2790 GROWING(0, cat, start, cur, len);
2791 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
2792 datumtype | TYPE_IS_PACK))
2793 Perl_croak(aTHX_ "panic: predicted utf8 length not available, "
2794 "for '%c', aptr=%p end=%p cur=%p, fromlen=%"UVuf,
2795 (int)datumtype, aptr, end, cur, (UV)fromlen);
2796 cur += fromlen;
2797 len -= fromlen;
2798 } else if (utf8) {
2799 if (howlen == e_star) {
2800 len = fromlen;
2801 if (datumtype == 'Z') len++;
2802 }
2803 if (len <= (I32) fromlen) {
2804 fromlen = len;
2805 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2806 }
2807 /* assumes a byte expands to at most UTF8_EXPAND bytes on
2808 upgrade, so:
2809 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2810 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
2811 len -= fromlen;
2812 while (fromlen > 0) {
2813 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
2814 aptr++;
2815 fromlen--;
2816 }
2817 } else {
2818 string_copy:
2819 if (howlen == e_star) {
2820 len = fromlen;
2821 if (datumtype == 'Z') len++;
2822 }
2823 if (len <= (I32) fromlen) {
2824 fromlen = len;
2825 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2826 }
2827 GROWING(0, cat, start, cur, len);
2828 Copy(aptr, cur, fromlen, char);
2829 cur += fromlen;
2830 len -= fromlen;
2831 }
2832 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2833 cur += len;
2834 SvTAINT(cat);
2835 break;
2836 }
2837 case 'B':
2838 case 'b': {
2839 const char *str, *end;
2840 I32 l, field_len;
2841 U8 bits;
2842 bool utf8_source;
2843 U32 utf8_flags;
2844
2845 fromstr = NEXTFROM;
2846 str = SvPV_const(fromstr, fromlen);
2847 end = str + fromlen;
2848 if (DO_UTF8(fromstr)) {
2849 utf8_source = TRUE;
2850 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2851 } else {
2852 utf8_source = FALSE;
2853 utf8_flags = 0; /* Unused, but keep compilers happy */
2854 }
2855 if (howlen == e_star) len = fromlen;
2856 field_len = (len+7)/8;
2857 GROWING(utf8, cat, start, cur, field_len);
2858 if (len > (I32)fromlen) len = fromlen;
2859 bits = 0;
2860 l = 0;
2861 if (datumtype == 'B')
2862 while (l++ < len) {
2863 if (utf8_source) {
2864 UV val = 0;
2865 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2866 bits |= val & 1;
2867 } else bits |= *str++ & 1;
2868 if (l & 7) bits <<= 1;
2869 else {
2870 PUSH_BYTE(utf8, cur, bits);
2871 bits = 0;
2872 }
2873 }
2874 else
2875 /* datumtype == 'b' */
2876 while (l++ < len) {
2877 if (utf8_source) {
2878 UV val = 0;
2879 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2880 if (val & 1) bits |= 0x80;
2881 } else if (*str++ & 1)
2882 bits |= 0x80;
2883 if (l & 7) bits >>= 1;
2884 else {
2885 PUSH_BYTE(utf8, cur, bits);
2886 bits = 0;
2887 }
2888 }
2889 l--;
2890 if (l & 7) {
2891 if (datumtype == 'B')
2892 bits <<= 7 - (l & 7);
2893 else
2894 bits >>= 7 - (l & 7);
2895 PUSH_BYTE(utf8, cur, bits);
2896 l += 7;
2897 }
2898 /* Determine how many chars are left in the requested field */
2899 l /= 8;
2900 if (howlen == e_star) field_len = 0;
2901 else field_len -= l;
2902 Zero(cur, field_len, char);
2903 cur += field_len;
2904 break;
2905 }
2906 case 'H':
2907 case 'h': {
2908 const char *str, *end;
2909 I32 l, field_len;
2910 U8 bits;
2911 bool utf8_source;
2912 U32 utf8_flags;
2913
2914 fromstr = NEXTFROM;
2915 str = SvPV_const(fromstr, fromlen);
2916 end = str + fromlen;
2917 if (DO_UTF8(fromstr)) {
2918 utf8_source = TRUE;
2919 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2920 } else {
2921 utf8_source = FALSE;
2922 utf8_flags = 0; /* Unused, but keep compilers happy */
2923 }
2924 if (howlen == e_star) len = fromlen;
2925 field_len = (len+1)/2;
2926 GROWING(utf8, cat, start, cur, field_len);
2927 if (!utf8 && len > (I32)fromlen) len = fromlen;
2928 bits = 0;
2929 l = 0;
2930 if (datumtype == 'H')
2931 while (l++ < len) {
2932 if (utf8_source) {
2933 UV val = 0;
2934 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2935 if (val < 256 && isALPHA(val))
2936 bits |= (val + 9) & 0xf;
2937 else
2938 bits |= val & 0xf;
2939 } else if (isALPHA(*str))
2940 bits |= (*str++ + 9) & 0xf;
2941 else
2942 bits |= *str++ & 0xf;
2943 if (l & 1) bits <<= 4;
2944 else {
2945 PUSH_BYTE(utf8, cur, bits);
2946 bits = 0;
2947 }
2948 }
2949 else
2950 while (l++ < len) {
2951 if (utf8_source) {
2952 UV val = 0;
2953 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2954 if (val < 256 && isALPHA(val))
2955 bits |= ((val + 9) & 0xf) << 4;
2956 else
2957 bits |= (val & 0xf) << 4;
2958 } else if (isALPHA(*str))
2959 bits |= ((*str++ + 9) & 0xf) << 4;
2960 else
2961 bits |= (*str++ & 0xf) << 4;
2962 if (l & 1) bits >>= 4;
2963 else {
2964 PUSH_BYTE(utf8, cur, bits);
2965 bits = 0;
2966 }
2967 }
2968 l--;
2969 if (l & 1) {
2970 PUSH_BYTE(utf8, cur, bits);
2971 l++;
2972 }
2973 /* Determine how many chars are left in the requested field */
2974 l /= 2;
2975 if (howlen == e_star) field_len = 0;
2976 else field_len -= l;
2977 Zero(cur, field_len, char);
2978 cur += field_len;
2979 break;
2980 }
2981 case 'c':
2982 while (len-- > 0) {
2983 IV aiv;
2984 fromstr = NEXTFROM;
2985 aiv = SvIV(fromstr);
2986 if ((-128 > aiv || aiv > 127))
2987 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2988 "Character in 'c' format wrapped in pack");
2989 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
2990 }
2991 break;
2992 case 'C':
2993 if (len == 0) {
2994 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2995 break;
2996 }
2997 while (len-- > 0) {
2998 IV aiv;
2999 fromstr = NEXTFROM;
3000 aiv = SvIV(fromstr);
3001 if ((0 > aiv || aiv > 0xff))
3002 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3003 "Character in 'C' format wrapped in pack");
3004 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
3005 }
3006 break;
3007 case 'W': {
3008 char *end;
3009 U8 in_bytes = (U8)IN_BYTES;
3010
3011 end = start+SvLEN(cat)-1;
3012 if (utf8) end -= UTF8_MAXLEN-1;
3013 while (len-- > 0) {
3014 UV auv;
3015 fromstr = NEXTFROM;
3016 auv = SvUV(fromstr);
3017 if (in_bytes) auv = auv % 0x100;
3018 if (utf8) {
3019 W_utf8:
3020 if (cur > end) {
3021 *cur = '\0';
3022 SvCUR_set(cat, cur - start);
3023
3024 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3025 end = start+SvLEN(cat)-UTF8_MAXLEN;
3026 }
3027 cur = (char *) uvuni_to_utf8_flags((U8 *) cur,
3028 NATIVE_TO_UNI(auv),
3029 warn_utf8 ?
3030 0 : UNICODE_ALLOW_ANY);
3031 } else {
3032 if (auv >= 0x100) {
3033 if (!SvUTF8(cat)) {
3034 *cur = '\0';
3035 SvCUR_set(cat, cur - start);
3036 marked_upgrade(aTHX_ cat, symptr);
3037 lookahead.flags |= FLAG_DO_UTF8;
3038 lookahead.strbeg = symptr->strbeg;
3039 utf8 = 1;
3040 start = SvPVX(cat);
3041 cur = start + SvCUR(cat);
3042 end = start+SvLEN(cat)-UTF8_MAXLEN;
3043 goto W_utf8;
3044 }
3045 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3046 "Character in 'W' format wrapped in pack");
3047 auv &= 0xff;
3048 }
3049 if (cur >= end) {
3050 *cur = '\0';
3051 SvCUR_set(cat, cur - start);
3052 GROWING(0, cat, start, cur, len+1);
3053 end = start+SvLEN(cat)-1;
3054 }
3055 *(U8 *) cur++ = (U8)auv;
3056 }
3057 }
3058 break;
3059 }
3060 case 'U': {
3061 char *end;
3062
3063 if (len == 0) {
3064 if (!(symptr->flags & FLAG_DO_UTF8)) {
3065 marked_upgrade(aTHX_ cat, symptr);
3066 lookahead.flags |= FLAG_DO_UTF8;
3067 lookahead.strbeg = symptr->strbeg;
3068 }
3069 utf8 = 0;
3070 goto no_change;
3071 }
3072
3073 end = start+SvLEN(cat);
3074 if (!utf8) end -= UTF8_MAXLEN;
3075 while (len-- > 0) {
3076 UV auv;
3077 fromstr = NEXTFROM;
3078 auv = SvUV(fromstr);
3079 if (utf8) {
3080 U8 buffer[UTF8_MAXLEN], *endb;
3081 endb = uvuni_to_utf8_flags(buffer, auv,
3082 warn_utf8 ?
3083 0 : UNICODE_ALLOW_ANY);
3084 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
3085 *cur = '\0';
3086 SvCUR_set(cat, cur - start);
3087 GROWING(0, cat, start, cur,
3088 len+(endb-buffer)*UTF8_EXPAND);
3089 end = start+SvLEN(cat);
3090 }
3091 cur = bytes_to_uni(buffer, endb-buffer, cur);
3092 } else {
3093 if (cur >= end) {
3094 *cur = '\0';
3095 SvCUR_set(cat, cur - start);
3096 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3097 end = start+SvLEN(cat)-UTF8_MAXLEN;
3098 }
3099 cur = (char *) uvuni_to_utf8_flags((U8 *) cur, auv,
3100 warn_utf8 ?
3101 0 : UNICODE_ALLOW_ANY);
3102 }
3103 }
3104 break;
3105 }
3106 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
3107 case 'f':
3108 while (len-- > 0) {
3109 float afloat;
3110 NV anv;
3111 fromstr = NEXTFROM;
3112 anv = SvNV(fromstr);
3113#ifdef __VOS__
3114 /* VOS does not automatically map a floating-point overflow
3115 during conversion from double to float into infinity, so we
3116 do it by hand. This code should either be generalized for
3117 any OS that needs it, or removed if and when VOS implements
3118 posix-976 (suggestion to support mapping to infinity).
3119 Paul.Green@stratus.com 02-04-02. */
3120{
3121extern const float _float_constants[];
3122 if (anv > FLT_MAX)
3123 afloat = _float_constants[0]; /* single prec. inf. */
3124 else if (anv < -FLT_MAX)
3125 afloat = _float_constants[0]; /* single prec. inf. */
3126 else afloat = (float) anv;
3127}
3128#else /* __VOS__ */
3129# if defined(VMS) && !defined(__IEEE_FP)
3130 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
3131 * on Alpha; fake it if we don't have them.
3132 */
3133 if (anv > FLT_MAX)
3134 afloat = FLT_MAX;
3135 else if (anv < -FLT_MAX)
3136 afloat = -FLT_MAX;
3137 else afloat = (float)anv;
3138# else
3139 afloat = (float)anv;
3140# endif
3141#endif /* __VOS__ */
3142 DO_BO_PACK_N(afloat, float);
3143 PUSH_VAR(utf8, cur, afloat);
3144 }
3145 break;
3146 case 'd':
3147 while (len-- > 0) {
3148 double adouble;
3149 NV anv;
3150 fromstr = NEXTFROM;
3151 anv = SvNV(fromstr);
3152#ifdef __VOS__
3153 /* VOS does not automatically map a floating-point overflow
3154 during conversion from long double to double into infinity,
3155 so we do it by hand. This code should either be generalized
3156 for any OS that needs it, or removed if and when VOS
3157 implements posix-976 (suggestion to support mapping to
3158 infinity). Paul.Green@stratus.com 02-04-02. */
3159{
3160extern const double _double_constants[];
3161 if (anv > DBL_MAX)
3162 adouble = _double_constants[0]; /* double prec. inf. */
3163 else if (anv < -DBL_MAX)
3164 adouble = _double_constants[0]; /* double prec. inf. */
3165 else adouble = (double) anv;
3166}
3167#else /* __VOS__ */
3168# if defined(VMS) && !defined(__IEEE_FP)
3169 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
3170 * on Alpha; fake it if we don't have them.
3171 */
3172 if (anv > DBL_MAX)
3173 adouble = DBL_MAX;
3174 else if (anv < -DBL_MAX)
3175 adouble = -DBL_MAX;
3176 else adouble = (double)anv;
3177# else
3178 adouble = (double)anv;
3179# endif
3180#endif /* __VOS__ */
3181 DO_BO_PACK_N(adouble, double);
3182 PUSH_VAR(utf8, cur, adouble);
3183 }
3184 break;
3185 case 'F': {
3186 NV_bytes anv;
3187 Zero(&anv, 1, NV); /* can be long double with unused bits */
3188 while (len-- > 0) {
3189 fromstr = NEXTFROM;
3190#ifdef __GNUC__
3191 /* to work round a gcc/x86 bug; don't use SvNV */
3192 anv.nv = sv_2nv(fromstr);
3193#else
3194 anv.nv = SvNV(fromstr);
3195#endif
3196 DO_BO_PACK_N(anv, NV);
3197 PUSH_BYTES(utf8, cur, anv.bytes, sizeof(anv.bytes));
3198 }
3199 break;
3200 }
3201#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
3202 case 'D': {
3203 ld_bytes aldouble;
3204 /* long doubles can have unused bits, which may be nonzero */
3205 Zero(&aldouble, 1, long double);
3206 while (len-- > 0) {
3207 fromstr = NEXTFROM;
3208# ifdef __GNUC__
3209 /* to work round a gcc/x86 bug; don't use SvNV */
3210 aldouble.ld = (long double)sv_2nv(fromstr);
3211# else
3212 aldouble.ld = (long double)SvNV(fromstr);
3213# endif
3214 DO_BO_PACK_N(aldouble, long double);
3215 PUSH_BYTES(utf8, cur, aldouble.bytes, sizeof(aldouble.bytes));
3216 }
3217 break;
3218 }
3219#endif
3220#ifdef PERL_PACK_CAN_SHRIEKSIGN
3221 case 'n' | TYPE_IS_SHRIEKING:
3222#endif
3223 case 'n':
3224 while (len-- > 0) {
3225 I16 ai16;
3226 fromstr = NEXTFROM;
3227 ai16 = (I16)SvIV(fromstr);
3228#ifdef HAS_HTONS
3229 ai16 = PerlSock_htons(ai16);
3230#endif
3231 PUSH16(utf8, cur, &ai16);
3232 }
3233 break;
3234#ifdef PERL_PACK_CAN_SHRIEKSIGN
3235 case 'v' | TYPE_IS_SHRIEKING:
3236#endif
3237 case 'v':
3238 while (len-- > 0) {
3239 I16 ai16;
3240 fromstr = NEXTFROM;
3241 ai16 = (I16)SvIV(fromstr);
3242#ifdef HAS_HTOVS
3243 ai16 = htovs(ai16);
3244#endif
3245 PUSH16(utf8, cur, &ai16);
3246 }
3247 break;
3248 case 'S' | TYPE_IS_SHRIEKING:
3249#if SHORTSIZE != SIZE16
3250 while (len-- > 0) {
3251 unsigned short aushort;
3252 fromstr = NEXTFROM;
3253 aushort = SvUV(fromstr);
3254 DO_BO_PACK(aushort, s);
3255 PUSH_VAR(utf8, cur, aushort);
3256 }
3257 break;
3258#else
3259 /* Fall through! */
3260#endif
3261 case 'S':
3262 while (len-- > 0) {
3263 U16 au16;
3264 fromstr = NEXTFROM;
3265 au16 = (U16)SvUV(fromstr);
3266 DO_BO_PACK(au16, 16);
3267 PUSH16(utf8, cur, &au16);
3268 }
3269 break;
3270 case 's' | TYPE_IS_SHRIEKING:
3271#if SHORTSIZE != SIZE16
3272 while (len-- > 0) {
3273 short ashort;
3274 fromstr = NEXTFROM;
3275 ashort = SvIV(fromstr);
3276 DO_BO_PACK(ashort, s);
3277 PUSH_VAR(utf8, cur, ashort);
3278 }
3279 break;
3280#else
3281 /* Fall through! */
3282#endif
3283 case 's':
3284 while (len-- > 0) {
3285 I16 ai16;
3286 fromstr = NEXTFROM;
3287 ai16 = (I16)SvIV(fromstr);
3288 DO_BO_PACK(ai16, 16);
3289 PUSH16(utf8, cur, &ai16);
3290 }
3291 break;
3292 case 'I':
3293 case 'I' | TYPE_IS_SHRIEKING:
3294 while (len-- > 0) {
3295 unsigned int auint;
3296 fromstr = NEXTFROM;
3297 auint = SvUV(fromstr);
3298 DO_BO_PACK(auint, i);
3299 PUSH_VAR(utf8, cur, auint);
3300 }
3301 break;
3302 case 'j':
3303 while (len-- > 0) {
3304 IV aiv;
3305 fromstr = NEXTFROM;
3306 aiv = SvIV(fromstr);
3307#if IVSIZE == INTSIZE
3308 DO_BO_PACK(aiv, i);
3309#elif IVSIZE == LONGSIZE
3310 DO_BO_PACK(aiv, l);
3311#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
3312 DO_BO_PACK(aiv, 64);
3313#else
3314 Perl_croak(aTHX_ "'j' not supported on this platform");
3315#endif
3316 PUSH_VAR(utf8, cur, aiv);
3317 }
3318 break;
3319 case 'J':
3320 while (len-- > 0) {
3321 UV auv;
3322 fromstr = NEXTFROM;
3323 auv = SvUV(fromstr);
3324#if UVSIZE == INTSIZE
3325 DO_BO_PACK(auv, i);
3326#elif UVSIZE == LONGSIZE
3327 DO_BO_PACK(auv, l);
3328#elif defined(HAS_QUAD) && UVSIZE == U64SIZE
3329 DO_BO_PACK(auv, 64);
3330#else
3331 Perl_croak(aTHX_ "'J' not supported on this platform");
3332#endif
3333 PUSH_VAR(utf8, cur, auv);
3334 }
3335 break;
3336 case 'w':
3337 while (len-- > 0) {
3338 NV anv;
3339 fromstr = NEXTFROM;
3340 anv = SvNV(fromstr);
3341
3342 if (anv < 0) {
3343 *cur = '\0';
3344 SvCUR_set(cat, cur - start);
3345 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
3346 }
3347
3348 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
3349 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
3350 any negative IVs will have already been got by the croak()
3351 above. IOK is untrue for fractions, so we test them
3352 against UV_MAX_P1. */
3353 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
3354 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
3355 char *in = buf + sizeof(buf);
3356 UV auv = SvUV(fromstr);
3357
3358 do {
3359 *--in = (char)((auv & 0x7f) | 0x80);
3360 auv >>= 7;
3361 } while (auv);
3362 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
3363 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3364 in, (buf + sizeof(buf)) - in);
3365 } else if (SvPOKp(fromstr))
3366 goto w_string;
3367 else if (SvNOKp(fromstr)) {
3368 /* 10**NV_MAX_10_EXP is the largest power of 10
3369 so 10**(NV_MAX_10_EXP+1) is definitely unrepresentable
3370 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
3371 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
3372 And with that many bytes only Inf can overflow.
3373 Some C compilers are strict about integral constant
3374 expressions so we conservatively divide by a slightly
3375 smaller integer instead of multiplying by the exact
3376 floating-point value.
3377 */
3378#ifdef NV_MAX_10_EXP
3379 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
3380 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
3381#else
3382 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
3383 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
3384#endif
3385 char *in = buf + sizeof(buf);
3386
3387 anv = Perl_floor(anv);
3388 do {
3389 const NV next = Perl_floor(anv / 128);
3390 if (in <= buf) /* this cannot happen ;-) */
3391 Perl_croak(aTHX_ "Cannot compress integer in pack");
3392 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
3393 anv = next;
3394 } while (anv > 0);
3395 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
3396 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3397 in, (buf + sizeof(buf)) - in);
3398 } else {
3399 const char *from;
3400 char *result, *in;
3401 SV *norm;
3402 STRLEN len;
3403 bool done;
3404
3405 w_string:
3406 /* Copy string and check for compliance */
3407 from = SvPV_const(fromstr, len);
3408 if ((norm = is_an_int(from, len)) == NULL)
3409 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
3410
3411 Newx(result, len, char);
3412 in = result + len;
3413 done = FALSE;
3414 while (!done) *--in = div128(norm, &done) | 0x80;
3415 result[len - 1] &= 0x7F; /* clear continue bit */
3416 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3417 in, (result + len) - in);
3418 Safefree(result);
3419 SvREFCNT_dec(norm); /* free norm */
3420 }
3421 }
3422 break;
3423 case 'i':
3424 case 'i' | TYPE_IS_SHRIEKING:
3425 while (len-- > 0) {
3426 int aint;
3427 fromstr = NEXTFROM;
3428 aint = SvIV(fromstr);
3429 DO_BO_PACK(aint, i);
3430 PUSH_VAR(utf8, cur, aint);
3431 }
3432 break;
3433#ifdef PERL_PACK_CAN_SHRIEKSIGN
3434 case 'N' | TYPE_IS_SHRIEKING:
3435#endif
3436 case 'N':
3437 while (len-- > 0) {
3438 U32 au32;
3439 fromstr = NEXTFROM;
3440 au32 = SvUV(fromstr);
3441#ifdef HAS_HTONL
3442 au32 = PerlSock_htonl(au32);
3443#endif
3444 PUSH32(utf8, cur, &au32);
3445 }
3446 break;
3447#ifdef PERL_PACK_CAN_SHRIEKSIGN
3448 case 'V' | TYPE_IS_SHRIEKING:
3449#endif
3450 case 'V':
3451 while (len-- > 0) {
3452 U32 au32;
3453 fromstr = NEXTFROM;
3454 au32 = SvUV(fromstr);
3455#ifdef HAS_HTOVL
3456 au32 = htovl(au32);
3457#endif
3458 PUSH32(utf8, cur, &au32);
3459 }
3460 break;
3461 case 'L' | TYPE_IS_SHRIEKING:
3462#if LONGSIZE != SIZE32
3463 while (len-- > 0) {
3464 unsigned long aulong;
3465 fromstr = NEXTFROM;
3466 aulong = SvUV(fromstr);
3467 DO_BO_PACK(aulong, l);
3468 PUSH_VAR(utf8, cur, aulong);
3469 }
3470 break;
3471#else
3472 /* Fall though! */
3473#endif
3474 case 'L':
3475 while (len-- > 0) {
3476 U32 au32;
3477 fromstr = NEXTFROM;
3478 au32 = SvUV(fromstr);
3479 DO_BO_PACK(au32, 32);
3480 PUSH32(utf8, cur, &au32);
3481 }
3482 break;
3483 case 'l' | TYPE_IS_SHRIEKING:
3484#if LONGSIZE != SIZE32
3485 while (len-- > 0) {
3486 long along;
3487 fromstr = NEXTFROM;
3488 along = SvIV(fromstr);
3489 DO_BO_PACK(along, l);
3490 PUSH_VAR(utf8, cur, along);
3491 }
3492 break;
3493#else
3494 /* Fall though! */
3495#endif
3496 case 'l':
3497 while (len-- > 0) {
3498 I32 ai32;
3499 fromstr = NEXTFROM;
3500 ai32 = SvIV(fromstr);
3501 DO_BO_PACK(ai32, 32);
3502 PUSH32(utf8, cur, &ai32);
3503 }
3504 break;
3505#ifdef HAS_QUAD
3506 case 'Q':
3507 while (len-- > 0) {
3508 Uquad_t auquad;
3509 fromstr = NEXTFROM;
3510 auquad = (Uquad_t) SvUV(fromstr);
3511 DO_BO_PACK(auquad, 64);
3512 PUSH_VAR(utf8, cur, auquad);
3513 }
3514 break;
3515 case 'q':
3516 while (len-- > 0) {
3517 Quad_t aquad;
3518 fromstr = NEXTFROM;
3519 aquad = (Quad_t)SvIV(fromstr);
3520 DO_BO_PACK(aquad, 64);
3521 PUSH_VAR(utf8, cur, aquad);
3522 }
3523 break;
3524#endif /* HAS_QUAD */
3525 case 'P':
3526 len = 1; /* assume SV is correct length */
3527 GROWING(utf8, cat, start, cur, sizeof(char *));
3528 /* Fall through! */
3529 case 'p':
3530 while (len-- > 0) {
3531 const char *aptr;
3532
3533 fromstr = NEXTFROM;
3534 SvGETMAGIC(fromstr);
3535 if (!SvOK(fromstr)) aptr = NULL;
3536 else {
3537 /* XXX better yet, could spirit away the string to
3538 * a safe spot and hang on to it until the result
3539 * of pack() (and all copies of the result) are
3540 * gone.
3541 */
3542 if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
3543 !SvREADONLY(fromstr)))) {
3544 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3545 "Attempt to pack pointer to temporary value");
3546 }
3547 if (SvPOK(fromstr) || SvNIOK(fromstr))
3548 aptr = SvPV_nomg_const_nolen(fromstr);
3549 else
3550 aptr = SvPV_force_flags_nolen(fromstr, 0);
3551 }
3552 DO_BO_PACK_PC(aptr);
3553 PUSH_VAR(utf8, cur, aptr);
3554 }
3555 break;
3556 case 'u': {
3557 const char *aptr, *aend;
3558 bool from_utf8;
3559
3560 fromstr = NEXTFROM;
3561 if (len <= 2) len = 45;
3562 else len = len / 3 * 3;
3563 if (len >= 64) {
3564 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3565 "Field too wide in 'u' format in pack");
3566 len = 63;
3567 }
3568 aptr = SvPV_const(fromstr, fromlen);
3569 from_utf8 = DO_UTF8(fromstr);
3570 if (from_utf8) {
3571 aend = aptr + fromlen;
3572 fromlen = sv_len_utf8(fromstr);
3573 } else aend = NULL; /* Unused, but keep compilers happy */
3574 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
3575 while (fromlen > 0) {
3576 U8 *end;
3577 I32 todo;
3578 U8 hunk[1+63/3*4+1];
3579
3580 if ((I32)fromlen > len)
3581 todo = len;
3582 else
3583 todo = fromlen;
3584 if (from_utf8) {
3585 char buffer[64];
3586 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3587 'u' | TYPE_IS_PACK)) {
3588 *cur = '\0';
3589 SvCUR_set(cat, cur - start);
3590 Perl_croak(aTHX_ "panic: string is shorter than advertised, "
3591 "aptr=%p, aend=%p, buffer=%p, todo=%ld",
3592 aptr, aend, buffer, (long) todo);
3593 }
3594 end = doencodes(hunk, buffer, todo);
3595 } else {
3596 end = doencodes(hunk, aptr, todo);
3597 aptr += todo;
3598 }
3599 PUSH_BYTES(utf8, cur, hunk, end-hunk);
3600 fromlen -= todo;
3601 }
3602 break;
3603 }
3604 }
3605 *cur = '\0';
3606 SvCUR_set(cat, cur - start);
3607 no_change:
3608 *symptr = lookahead;
3609 }
3610 return beglist;
3611}
3612#undef NEXTFROM
3613
3614
3615PP(pp_pack)
3616{
3617 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
3618 register SV *cat = TARG;
3619 STRLEN fromlen;
3620 SV *pat_sv = *++MARK;
3621 register const char *pat = SvPV_const(pat_sv, fromlen);
3622 register const char *patend = pat + fromlen;
3623
3624 MARK++;
3625 sv_setpvs(cat, "");
3626 SvUTF8_off(cat);
3627
3628 packlist(cat, pat, patend, MARK, SP + 1);
3629
3630 SvSETMAGIC(cat);
3631 SP = ORIGMARK;
3632 PUSHs(cat);
3633 RETURN;
3634}
3635
3636/*
3637 * Local variables:
3638 * c-indentation-style: bsd
3639 * c-basic-offset: 4
3640 * indent-tabs-mode: nil
3641 * End:
3642 *
3643 * ex: set ts=8 sts=4 sw=4 et:
3644 */