This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
[perl #40427] Segfault in pack
[perl5.git] / pp_pack.c
<
CommitLineData
a6ec74c1
JH
1/* pp_pack.c
2 *
4bb101f2 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
acde74e1 4 * 2000, 2001, 2002, 2003, 2004, 2005, 2006, by Larry Wall and others
a6ec74c1
JH
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
9 */
10
d31a8517
AT
11/*
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
16 * some salt.
17 */
18
166f8a29
DM
19/* This file contains pp ("push/pop") functions that
20 * execute the opcodes that make up a perl program. A typical pp function
21 * expects to find its arguments on the stack, and usually pushes its
22 * results onto the stack, hence the 'pp' terminology. Each OP structure
23 * contains a pointer to the relevant pp_foo() function.
24 *
25 * This particular file just contains pp_pack() and pp_unpack(). See the
26 * other pp*.c files for the rest of the pp_ functions.
27 */
28
29
a6ec74c1
JH
30#include "EXTERN.h"
31#define PERL_IN_PP_PACK_C
32#include "perl.h"
33
f7fe979e
AL
34/* Types used by pack/unpack */
35typedef enum {
36 e_no_len, /* no length */
37 e_number, /* number, [] */
38 e_star /* asterisk */
39} howlen_t;
40
41typedef struct tempsym {
42 const char* patptr; /* current template char */
43 const char* patend; /* one after last char */
44 const char* grpbeg; /* 1st char of ()-group */
45 const char* grpend; /* end of ()-group */
46 I32 code; /* template code (!<>) */
47 I32 length; /* length/repeat count */
48 howlen_t howlen; /* how length is given */
49 int level; /* () nesting level */
50 U32 flags; /* /=4, comma=2, pack=1 */
51 /* and group modifiers */
52 STRLEN strbeg; /* offset of group start */
53 struct tempsym *previous; /* previous group */
54} tempsym_t;
55
56#define TEMPSYM_INIT(symptr, p, e, f) \
57 STMT_START { \
58 (symptr)->patptr = (p); \
59 (symptr)->patend = (e); \
60 (symptr)->grpbeg = NULL; \
61 (symptr)->grpend = NULL; \
62 (symptr)->grpend = NULL; \
63 (symptr)->code = 0; \
64 (symptr)->length = 0; \
10edeb5d 65 (symptr)->howlen = e_no_len; \
f7fe979e
AL
66 (symptr)->level = 0; \
67 (symptr)->flags = (f); \
68 (symptr)->strbeg = 0; \
69 (symptr)->previous = NULL; \
70 } STMT_END
71
7212898e 72#if PERL_VERSION >= 9
f337b084
TH
73# define PERL_PACK_CAN_BYTEORDER
74# define PERL_PACK_CAN_SHRIEKSIGN
75#endif
76
77#ifndef CHAR_BIT
78# define CHAR_BIT 8
7212898e 79#endif
3473cf63
RGS
80/* Maximum number of bytes to which a byte can grow due to upgrade */
81#define UTF8_EXPAND 2
7212898e 82
a6ec74c1 83/*
a6ec74c1
JH
84 * Offset for integer pack/unpack.
85 *
86 * On architectures where I16 and I32 aren't really 16 and 32 bits,
87 * which for now are all Crays, pack and unpack have to play games.
88 */
89
90/*
91 * These values are required for portability of pack() output.
92 * If they're not right on your machine, then pack() and unpack()
93 * wouldn't work right anyway; you'll need to apply the Cray hack.
94 * (I'd like to check them with #if, but you can't use sizeof() in
95 * the preprocessor.) --???
96 */
97/*
98 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
99 defines are now in config.h. --Andy Dougherty April 1998
100 */
101#define SIZE16 2
102#define SIZE32 4
103
104/* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
105 --jhi Feb 1999 */
106
1109a392
MHM
107#if U16SIZE > SIZE16 || U32SIZE > SIZE32
108# if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
08ca2aa3
TH
109# define OFF16(p) ((char*)(p))
110# define OFF32(p) ((char*)(p))
a6ec74c1 111# else
1109a392 112# if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
a6ec74c1
JH
113# define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
114# define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
115# else
08ca2aa3 116 ++++ bad cray byte order
a6ec74c1
JH
117# endif
118# endif
a6ec74c1 119#else
08ca2aa3
TH
120# define OFF16(p) ((char *) (p))
121# define OFF32(p) ((char *) (p))
a6ec74c1
JH
122#endif
123
f337b084
TH
124/* Only to be used inside a loop (see the break) */
125#define SHIFT16(utf8, s, strend, p, datumtype) STMT_START { \
126 if (utf8) { \
127 if (!uni_to_bytes(aTHX_ &(s), strend, OFF16(p), SIZE16, datumtype)) break; \
128 } else { \
129 Copy(s, OFF16(p), SIZE16, char); \
130 (s) += SIZE16; \
131 } \
132} STMT_END
133
134/* Only to be used inside a loop (see the break) */
135#define SHIFT32(utf8, s, strend, p, datumtype) STMT_START { \
136 if (utf8) { \
137 if (!uni_to_bytes(aTHX_ &(s), strend, OFF32(p), SIZE32, datumtype)) break; \
138 } else { \
139 Copy(s, OFF32(p), SIZE32, char); \
140 (s) += SIZE32; \
141 } \
142} STMT_END
143
144#define PUSH16(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF16(p), SIZE16)
145#define PUSH32(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF32(p), SIZE32)
08ca2aa3
TH
146
147/* Only to be used inside a loop (see the break) */
f337b084 148#define SHIFT_VAR(utf8, s, strend, var, datumtype) \
08ca2aa3
TH
149STMT_START { \
150 if (utf8) { \
f337b084
TH
151 if (!uni_to_bytes(aTHX_ &s, strend, \
152 (char *) &var, sizeof(var), datumtype)) break;\
08ca2aa3
TH
153 } else { \
154 Copy(s, (char *) &var, sizeof(var), char); \
155 s += sizeof(var); \
156 } \
08ca2aa3
TH
157} STMT_END
158
f337b084 159#define PUSH_VAR(utf8, aptr, var) \
230e1fce 160 PUSH_BYTES(utf8, aptr, &(var), sizeof(var))
f337b084 161
49704364
WL
162/* Avoid stack overflow due to pathological templates. 100 should be plenty. */
163#define MAX_SUB_TEMPLATE_LEVEL 100
164
66c611c5 165/* flags (note that type modifiers can also be used as flags!) */
f337b084
TH
166#define FLAG_WAS_UTF8 0x40
167#define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
49704364 168#define FLAG_UNPACK_ONLY_ONE 0x10
f337b084 169#define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
49704364
WL
170#define FLAG_SLASH 0x04
171#define FLAG_COMMA 0x02
172#define FLAG_PACK 0x01
173
a6ec74c1
JH
174STATIC SV *
175S_mul128(pTHX_ SV *sv, U8 m)
176{
177 STRLEN len;
178 char *s = SvPV(sv, len);
179 char *t;
a6ec74c1
JH
180
181 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
396482e1 182 SV * const tmpNew = newSVpvs("0000000000");
a6ec74c1
JH
183
184 sv_catsv(tmpNew, sv);
185 SvREFCNT_dec(sv); /* free old sv */
186 sv = tmpNew;
187 s = SvPV(sv, len);
188 }
189 t = s + len - 1;
190 while (!*t) /* trailing '\0'? */
191 t--;
192 while (t > s) {
f7fe979e 193 const U32 i = ((*t - '0') << 7) + m;
eb160463
GS
194 *(t--) = '0' + (char)(i % 10);
195 m = (char)(i / 10);
a6ec74c1
JH
196 }
197 return (sv);
198}
199
200/* Explosives and implosives. */
201
202#if 'I' == 73 && 'J' == 74
203/* On an ASCII/ISO kind of system */
204#define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
205#else
206/*
207 Some other sort of character set - use memchr() so we don't match
208 the null byte.
209 */
210#define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
211#endif
212
66c611c5 213/* type modifiers */
62f95557 214#define TYPE_IS_SHRIEKING 0x100
1109a392
MHM
215#define TYPE_IS_BIG_ENDIAN 0x200
216#define TYPE_IS_LITTLE_ENDIAN 0x400
f337b084 217#define TYPE_IS_PACK 0x800
1109a392 218#define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
66c611c5 219#define TYPE_MODIFIERS(t) ((t) & ~0xFF)
1109a392
MHM
220#define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
221
7212898e 222#ifdef PERL_PACK_CAN_SHRIEKSIGN
28be1210 223# define SHRIEKING_ALLOWED_TYPES "sSiIlLxXnNvV@."
7212898e 224#else
28be1210 225# define SHRIEKING_ALLOWED_TYPES "sSiIlLxX"
7212898e
NC
226#endif
227
228#ifndef PERL_PACK_CAN_BYTEORDER
229/* Put "can't" first because it is shorter */
230# define TYPE_ENDIANNESS(t) 0
231# define TYPE_NO_ENDIANNESS(t) (t)
232
233# define ENDIANNESS_ALLOWED_TYPES ""
234
235# define DO_BO_UNPACK(var, type)
236# define DO_BO_PACK(var, type)
07409e01
NC
237# define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast)
238# define DO_BO_PACK_PTR(var, type, pre_cast, post_cast)
7212898e
NC
239# define DO_BO_UNPACK_N(var, type)
240# define DO_BO_PACK_N(var, type)
241# define DO_BO_UNPACK_P(var)
242# define DO_BO_PACK_P(var)
45f723e1
NC
243# define DO_BO_UNPACK_PC(var)
244# define DO_BO_PACK_PC(var)
66c611c5 245
f337b084 246#else /* PERL_PACK_CAN_BYTEORDER */
7212898e
NC
247
248# define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
249# define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
250
251# define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
252
253# define DO_BO_UNPACK(var, type) \
1109a392 254 STMT_START { \
66c611c5 255 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
256 case TYPE_IS_BIG_ENDIAN: var = my_betoh ## type (var); break; \
257 case TYPE_IS_LITTLE_ENDIAN: var = my_letoh ## type (var); break; \
258 default: break; \
259 } \
260 } STMT_END
261
7212898e 262# define DO_BO_PACK(var, type) \
1109a392 263 STMT_START { \
66c611c5 264 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
265 case TYPE_IS_BIG_ENDIAN: var = my_htobe ## type (var); break; \
266 case TYPE_IS_LITTLE_ENDIAN: var = my_htole ## type (var); break; \
267 default: break; \
268 } \
269 } STMT_END
270
07409e01 271# define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast) \
1109a392 272 STMT_START { \
66c611c5 273 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392 274 case TYPE_IS_BIG_ENDIAN: \
07409e01 275 var = (post_cast*) my_betoh ## type ((pre_cast) var); \
1109a392
MHM
276 break; \
277 case TYPE_IS_LITTLE_ENDIAN: \
07409e01 278 var = (post_cast *) my_letoh ## type ((pre_cast) var); \
1109a392
MHM
279 break; \
280 default: \
281 break; \
282 } \
283 } STMT_END
284
07409e01 285# define DO_BO_PACK_PTR(var, type, pre_cast, post_cast) \
1109a392 286 STMT_START { \
66c611c5 287 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392 288 case TYPE_IS_BIG_ENDIAN: \
07409e01 289 var = (post_cast *) my_htobe ## type ((pre_cast) var); \
1109a392
MHM
290 break; \
291 case TYPE_IS_LITTLE_ENDIAN: \
07409e01 292 var = (post_cast *) my_htole ## type ((pre_cast) var); \
1109a392
MHM
293 break; \
294 default: \
295 break; \
296 } \
297 } STMT_END
298
7212898e 299# define BO_CANT_DOIT(action, type) \
66c611c5
MHM
300 STMT_START { \
301 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
302 case TYPE_IS_BIG_ENDIAN: \
303 Perl_croak(aTHX_ "Can't %s big-endian %ss on this " \
304 "platform", #action, #type); \
305 break; \
306 case TYPE_IS_LITTLE_ENDIAN: \
307 Perl_croak(aTHX_ "Can't %s little-endian %ss on this " \
308 "platform", #action, #type); \
309 break; \
310 default: \
311 break; \
312 } \
313 } STMT_END
314
7212898e 315# if PTRSIZE == INTSIZE
07409e01
NC
316# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, i, int, void)
317# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, i, int, void)
318# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, i, int, char)
319# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, i, int, char)
7212898e 320# elif PTRSIZE == LONGSIZE
07409e01
NC
321# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, long, void)
322# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, long, void)
323# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, long, char)
324# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, long, char)
2b00a750
JD
325# elif PTRSIZE == IVSIZE
326# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, IV, void)
327# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, IV, void)
328# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, IV, char)
329# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, IV, char)
7212898e
NC
330# else
331# define DO_BO_UNPACK_P(var) BO_CANT_DOIT(unpack, pointer)
332# define DO_BO_PACK_P(var) BO_CANT_DOIT(pack, pointer)
45f723e1
NC
333# define DO_BO_UNPACK_PC(var) BO_CANT_DOIT(unpack, pointer)
334# define DO_BO_PACK_PC(var) BO_CANT_DOIT(pack, pointer)
7212898e 335# endif
1109a392 336
7212898e 337# if defined(my_htolen) && defined(my_letohn) && \
1109a392 338 defined(my_htoben) && defined(my_betohn)
7212898e 339# define DO_BO_UNPACK_N(var, type) \
1109a392 340 STMT_START { \
66c611c5 341 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
342 case TYPE_IS_BIG_ENDIAN: my_betohn(&var, sizeof(type)); break;\
343 case TYPE_IS_LITTLE_ENDIAN: my_letohn(&var, sizeof(type)); break;\
344 default: break; \
345 } \
346 } STMT_END
347
7212898e 348# define DO_BO_PACK_N(var, type) \
1109a392 349 STMT_START { \
66c611c5 350 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
351 case TYPE_IS_BIG_ENDIAN: my_htoben(&var, sizeof(type)); break;\
352 case TYPE_IS_LITTLE_ENDIAN: my_htolen(&var, sizeof(type)); break;\
353 default: break; \
354 } \
355 } STMT_END
7212898e
NC
356# else
357# define DO_BO_UNPACK_N(var, type) BO_CANT_DOIT(unpack, type)
358# define DO_BO_PACK_N(var, type) BO_CANT_DOIT(pack, type)
359# endif
360
f337b084 361#endif /* PERL_PACK_CAN_BYTEORDER */
62f95557 362
78d46eaa 363#define PACK_SIZE_CANNOT_CSUM 0x80
f337b084 364#define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
78d46eaa
NC
365#define PACK_SIZE_MASK 0x3F
366
78d46eaa
NC
367/* These tables are regenerated by genpacksizetables.pl (and then hand pasted
368 in). You're unlikely ever to need to regenerate them. */
a7a3cfaa
TH
369
370#if TYPE_IS_SHRIEKING != 0x100
371 ++++shriek offset should be 256
372#endif
373
374typedef U8 packprops_t;
78d46eaa
NC
375#if 'J'-'I' == 1
376/* ASCII */
f43a9a31 377STATIC const packprops_t packprops[512] = {
a7a3cfaa
TH
378 /* normal */
379 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
380 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
381 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
382 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
383 0, 0, 0,
384 /* C */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
80a13697 385#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
a7a3cfaa 386 /* D */ LONG_DOUBLESIZE,
80a13697 387#else
a7a3cfaa 388 0,
80a13697 389#endif
a7a3cfaa
TH
390 0,
391 /* F */ NVSIZE,
392 0, 0,
393 /* I */ sizeof(unsigned int),
394 /* J */ UVSIZE,
395 0,
396 /* L */ SIZE32,
397 0,
398 /* N */ SIZE32,
399 0, 0,
80a13697 400#if defined(HAS_QUAD)
a7a3cfaa 401 /* Q */ sizeof(Uquad_t),
80a13697 402#else
a7a3cfaa 403 0,
80a13697 404#endif
a7a3cfaa
TH
405 0,
406 /* S */ SIZE16,
407 0,
408 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
409 /* V */ SIZE32,
410 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
411 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
412 /* c */ sizeof(char),
413 /* d */ sizeof(double),
414 0,
415 /* f */ sizeof(float),
416 0, 0,
417 /* i */ sizeof(int),
418 /* j */ IVSIZE,
419 0,
420 /* l */ SIZE32,
421 0,
422 /* n */ SIZE16,
423 0,
424 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
80a13697 425#if defined(HAS_QUAD)
a7a3cfaa 426 /* q */ sizeof(Quad_t),
80a13697 427#else
a7a3cfaa 428 0,
80a13697 429#endif
a7a3cfaa
TH
430 0,
431 /* s */ SIZE16,
432 0, 0,
433 /* v */ SIZE16,
434 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
435 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
436 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
437 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
438 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
439 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
440 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
441 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
442 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
443 0, 0, 0, 0, 0, 0, 0, 0,
444 /* shrieking */
445 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
446 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
447 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
448 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
449 0, 0, 0, 0, 0, 0, 0, 0, 0,
450 /* I */ sizeof(unsigned int),
451 0, 0,
452 /* L */ sizeof(unsigned long),
453 0,
7212898e 454#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 455 /* N */ SIZE32,
7212898e 456#else
a7a3cfaa 457 0,
7212898e 458#endif
a7a3cfaa
TH
459 0, 0, 0, 0,
460 /* S */ sizeof(unsigned short),
461 0, 0,
7212898e 462#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 463 /* V */ SIZE32,
7212898e 464#else
a7a3cfaa 465 0,
7212898e 466#endif
a7a3cfaa
TH
467 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
468 0, 0,
469 /* i */ sizeof(int),
470 0, 0,
471 /* l */ sizeof(long),
472 0,
7212898e 473#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 474 /* n */ SIZE16,
7212898e 475#else
a7a3cfaa 476 0,
7212898e 477#endif
a7a3cfaa
TH
478 0, 0, 0, 0,
479 /* s */ sizeof(short),
480 0, 0,
7212898e 481#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 482 /* v */ SIZE16,
7212898e 483#else
a7a3cfaa 484 0,
7212898e 485#endif
a7a3cfaa
TH
486 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
487 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
488 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
489 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
490 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
491 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
492 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
493 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
494 0, 0, 0, 0, 0, 0, 0, 0, 0
78d46eaa
NC
495};
496#else
497/* EBCDIC (or bust) */
f43a9a31 498STATIC const packprops_t packprops[512] = {
a7a3cfaa
TH
499 /* normal */
500 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
501 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
502 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
503 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
504 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
505 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
506 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
507 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
508 0, 0, 0,
509 /* c */ sizeof(char),
510 /* d */ sizeof(double),
511 0,
512 /* f */ sizeof(float),
513 0, 0,
514 /* i */ sizeof(int),
515 0, 0, 0, 0, 0, 0, 0,
516 /* j */ IVSIZE,
517 0,
518 /* l */ SIZE32,
519 0,
520 /* n */ SIZE16,
521 0,
522 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
80a13697 523#if defined(HAS_QUAD)
a7a3cfaa 524 /* q */ sizeof(Quad_t),
80a13697 525#else
a7a3cfaa 526 0,
80a13697 527#endif
a7a3cfaa
TH
528 0, 0, 0, 0, 0, 0, 0, 0, 0,
529 /* s */ SIZE16,
530 0, 0,
531 /* v */ SIZE16,
532 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
533 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
534 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
535 /* C */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
80a13697 536#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
a7a3cfaa 537 /* D */ LONG_DOUBLESIZE,
80a13697 538#else
a7a3cfaa 539 0,
80a13697 540#endif
a7a3cfaa
TH
541 0,
542 /* F */ NVSIZE,
543 0, 0,
544 /* I */ sizeof(unsigned int),
545 0, 0, 0, 0, 0, 0, 0,
546 /* J */ UVSIZE,
547 0,
548 /* L */ SIZE32,
549 0,
550 /* N */ SIZE32,
551 0, 0,
80a13697 552#if defined(HAS_QUAD)
a7a3cfaa 553 /* Q */ sizeof(Uquad_t),
80a13697 554#else
a7a3cfaa 555 0,
80a13697 556#endif
a7a3cfaa
TH
557 0, 0, 0, 0, 0, 0, 0, 0, 0,
558 /* S */ SIZE16,
559 0,
560 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
561 /* V */ SIZE32,
562 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
563 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
564 0, 0, 0, 0, 0, 0, 0, 0, 0,
565 /* shrieking */
566 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
567 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
568 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
569 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
570 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
571 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
572 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
573 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
574 0, 0, 0, 0, 0, 0, 0, 0, 0,
575 /* i */ sizeof(int),
576 0, 0, 0, 0, 0, 0, 0, 0, 0,
577 /* l */ sizeof(long),
578 0,
7212898e 579#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 580 /* n */ SIZE16,
7212898e 581#else
a7a3cfaa 582 0,
7212898e 583#endif
a7a3cfaa
TH
584 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
585 /* s */ sizeof(short),
586 0, 0,
7212898e 587#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 588 /* v */ SIZE16,
7212898e 589#else
a7a3cfaa 590 0,
7212898e 591#endif
a7a3cfaa
TH
592 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
593 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
594 0, 0, 0,
595 /* I */ sizeof(unsigned int),
596 0, 0, 0, 0, 0, 0, 0, 0, 0,
597 /* L */ sizeof(unsigned long),
598 0,
7212898e 599#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 600 /* N */ SIZE32,
7212898e 601#else
a7a3cfaa 602 0,
7212898e 603#endif
a7a3cfaa
TH
604 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
605 /* S */ sizeof(unsigned short),
606 0, 0,
7212898e 607#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 608 /* V */ SIZE32,
7212898e 609#else
a7a3cfaa 610 0,
7212898e 611#endif
a7a3cfaa
TH
612 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
613 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
78d46eaa
NC
614};
615#endif
616
08ca2aa3 617STATIC U8
f7fe979e 618uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
08ca2aa3 619{
08ca2aa3 620 STRLEN retlen;
0bcc34c2 621 UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
f337b084 622 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
08ca2aa3
TH
623 /* We try to process malformed UTF-8 as much as possible (preferrably with
624 warnings), but these two mean we make no progress in the string and
625 might enter an infinite loop */
626 if (retlen == (STRLEN) -1 || retlen == 0)
f337b084
TH
627 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
628 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3 629 if (val >= 0x100) {
f337b084 630 if (ckWARN(WARN_UNPACK))
08ca2aa3
TH
631 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
632 "Character in '%c' format wrapped in unpack",
f337b084 633 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3
TH
634 val &= 0xff;
635 }
636 *s += retlen;
fe2774ed 637 return (U8)val;
08ca2aa3
TH
638}
639
f337b084
TH
640#define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
641 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
08ca2aa3
TH
642 *(U8 *)(s)++)
643
644STATIC bool
f7fe979e 645uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
08ca2aa3
TH
646{
647 UV val;
648 STRLEN retlen;
f7fe979e 649 const char *from = *s;
08ca2aa3 650 int bad = 0;
f7fe979e 651 const U32 flags = ckWARN(WARN_UTF8) ?
08ca2aa3
TH
652 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
653 for (;buf_len > 0; buf_len--) {
654 if (from >= end) return FALSE;
f337b084 655 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
08ca2aa3
TH
656 if (retlen == (STRLEN) -1 || retlen == 0) {
657 from += UTF8SKIP(from);
658 bad |= 1;
659 } else from += retlen;
660 if (val >= 0x100) {
661 bad |= 2;
662 val &= 0xff;
663 }
fe2774ed 664 *(U8 *)buf++ = (U8)val;
08ca2aa3
TH
665 }
666 /* We have enough characters for the buffer. Did we have problems ? */
667 if (bad) {
668 if (bad & 1) {
669 /* Rewalk the string fragment while warning */
f7fe979e 670 const char *ptr;
9e27e96a 671 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
08ca2aa3
TH
672 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
673 if (ptr >= end) break;
f337b084 674 utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
08ca2aa3
TH
675 }
676 if (from > end) from = end;
677 }
678 if ((bad & 2) && ckWARN(WARN_UNPACK))
fc241834 679 Perl_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
f337b084
TH
680 WARN_PACK : WARN_UNPACK),
681 "Character(s) in '%c' format wrapped in %s",
fc241834 682 (int) TYPE_NO_MODIFIERS(datumtype),
f337b084 683 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
08ca2aa3
TH
684 }
685 *s = from;
686 return TRUE;
687}
688
689STATIC bool
f7fe979e 690next_uni_uu(pTHX_ const char **s, const char *end, I32 *out)
08ca2aa3 691{
97aff369 692 dVAR;
08ca2aa3 693 STRLEN retlen;
0bcc34c2 694 const UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
08ca2aa3
TH
695 if (val >= 0x100 || !ISUUCHAR(val) ||
696 retlen == (STRLEN) -1 || retlen == 0) {
697 *out = 0;
698 return FALSE;
699 }
700 *out = PL_uudmap[val] & 077;
f337b084 701 *s += retlen;
08ca2aa3
TH
702 return TRUE;
703}
78d46eaa 704
64844641 705STATIC char *
14333449 706S_bytes_to_uni(const U8 *start, STRLEN len, char *dest) {
0bcc34c2 707 const U8 * const end = start + len;
64844641 708
f337b084 709 while (start < end) {
48fa4626
TS
710 const UV uv = NATIVE_TO_ASCII(*start);
711 if (UNI_IS_INVARIANT(uv))
712 *dest++ = (char)(U8)UTF_TO_NATIVE(uv);
713 else {
714 *dest++ = (char)(U8)UTF8_EIGHT_BIT_HI(uv);
715 *dest++ = (char)(U8)UTF8_EIGHT_BIT_LO(uv);
f337b084
TH
716 }
717 start++;
718 }
64844641 719 return dest;
f337b084
TH
720}
721
230e1fce
NC
722#define PUSH_BYTES(utf8, cur, buf, len) \
723STMT_START { \
64844641
AL
724 if (utf8) \
725 (cur) = bytes_to_uni((U8 *) buf, len, (cur)); \
230e1fce
NC
726 else { \
727 Copy(buf, cur, len, char); \
728 (cur) += (len); \
729 } \
f337b084
TH
730} STMT_END
731
732#define GROWING(utf8, cat, start, cur, in_len) \
733STMT_START { \
734 STRLEN glen = (in_len); \
3473cf63 735 if (utf8) glen *= UTF8_EXPAND; \
f337b084 736 if ((cur) + glen >= (start) + SvLEN(cat)) { \
0bd48802 737 (start) = sv_exp_grow(cat, glen); \
f337b084
TH
738 (cur) = (start) + SvCUR(cat); \
739 } \
740} STMT_END
741
742#define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
743STMT_START { \
f7fe979e 744 const STRLEN glen = (in_len); \
f337b084 745 STRLEN gl = glen; \
3473cf63 746 if (utf8) gl *= UTF8_EXPAND; \
f337b084
TH
747 if ((cur) + gl >= (start) + SvLEN(cat)) { \
748 *cur = '\0'; \
b162af07 749 SvCUR_set((cat), (cur) - (start)); \
0bd48802 750 (start) = sv_exp_grow(cat, gl); \
f337b084
TH
751 (cur) = (start) + SvCUR(cat); \
752 } \
753 PUSH_BYTES(utf8, cur, buf, glen); \
754} STMT_END
755
756#define PUSH_BYTE(utf8, s, byte) \
757STMT_START { \
758 if (utf8) { \
f7fe979e 759 const U8 au8 = (byte); \
64844641 760 (s) = bytes_to_uni(&au8, 1, (s)); \
f337b084
TH
761 } else *(U8 *)(s)++ = (byte); \
762} STMT_END
763
764/* Only to be used inside a loop (see the break) */
765#define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
766STMT_START { \
767 STRLEN retlen; \
768 if (str >= end) break; \
769 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
770 if (retlen == (STRLEN) -1 || retlen == 0) { \
771 *cur = '\0'; \
772 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
773 } \
774 str += retlen; \
775} STMT_END
776
f7fe979e
AL
777static const char *_action( const tempsym_t* symptr )
778{
10edeb5d 779 return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
f7fe979e
AL
780}
781
206947d2 782/* Returns the sizeof() struct described by pat */
028d1f6d 783STATIC I32
f337b084 784S_measure_struct(pTHX_ tempsym_t* symptr)
206947d2 785{
f337b084 786 I32 total = 0;
206947d2 787
49704364 788 while (next_symbol(symptr)) {
f337b084 789 I32 len;
f7fe979e 790 int size;
f337b084
TH
791
792 switch (symptr->howlen) {
fc241834 793 case e_star:
49704364 794 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
f7fe979e 795 _action( symptr ) );
49704364 796 break;
f337b084
TH
797 default:
798 /* e_no_len and e_number */
799 len = symptr->length;
800 break;
49704364
WL
801 }
802
a7a3cfaa 803 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
80a13697 804 if (!size) {
f7fe979e 805 int star;
80a13697
NC
806 /* endianness doesn't influence the size of a type */
807 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
808 default:
809 Perl_croak(aTHX_ "Invalid type '%c' in %s",
810 (int)TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 811 _action( symptr ) );
28be1210
TH
812#ifdef PERL_PACK_CAN_SHRIEKSIGN
813 case '.' | TYPE_IS_SHRIEKING:
814 case '@' | TYPE_IS_SHRIEKING:
815#endif
80a13697 816 case '@':
28be1210 817 case '.':
80a13697
NC
818 case '/':
819 case 'U': /* XXXX Is it correct? */
820 case 'w':
821 case 'u':
822 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
28be1210 823 (int) TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 824 _action( symptr ) );
80a13697
NC
825 case '%':
826 size = 0;
827 break;
828 case '(':
fc241834
RGS
829 {
830 tempsym_t savsym = *symptr;
831 symptr->patptr = savsym.grpbeg;
832 symptr->patend = savsym.grpend;
833 /* XXXX Theoretically, we need to measure many times at
834 different positions, since the subexpression may contain
835 alignment commands, but be not of aligned length.
836 Need to detect this and croak(). */
837 size = measure_struct(symptr);
838 *symptr = savsym;
839 break;
840 }
80a13697
NC
841 case 'X' | TYPE_IS_SHRIEKING:
842 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
843 */
844 if (!len) /* Avoid division by 0 */
845 len = 1;
846 len = total % len; /* Assumed: the start is aligned. */
847 /* FALL THROUGH */
848 case 'X':
849 size = -1;
850 if (total < len)
f7fe979e 851 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
80a13697
NC
852 break;
853 case 'x' | TYPE_IS_SHRIEKING:
854 if (!len) /* Avoid division by 0 */
855 len = 1;
856 star = total % len; /* Assumed: the start is aligned. */
857 if (star) /* Other portable ways? */
858 len = len - star;
859 else
860 len = 0;
861 /* FALL THROUGH */
862 case 'x':
863 case 'A':
864 case 'Z':
865 case 'a':
80a13697
NC
866 size = 1;
867 break;
868 case 'B':
869 case 'b':
870 len = (len + 7)/8;
871 size = 1;
872 break;
873 case 'H':
874 case 'h':
875 len = (len + 1)/2;
876 size = 1;
877 break;
78d46eaa 878
80a13697
NC
879 case 'P':
880 len = 1;
881 size = sizeof(char*);
78d46eaa
NC
882 break;
883 }
206947d2
IZ
884 }
885 total += len * size;
886 }
887 return total;
888}
889
49704364
WL
890
891/* locate matching closing parenthesis or bracket
892 * returns char pointer to char after match, or NULL
893 */
f7fe979e
AL
894STATIC const char *
895S_group_end(pTHX_ register const char *patptr, register const char *patend, char ender)
18529408 896{
49704364 897 while (patptr < patend) {
f7fe979e 898 const char c = *patptr++;
49704364
WL
899
900 if (isSPACE(c))
901 continue;
902 else if (c == ender)
903 return patptr-1;
904 else if (c == '#') {
905 while (patptr < patend && *patptr != '\n')
906 patptr++;
907 continue;
908 } else if (c == '(')
909 patptr = group_end(patptr, patend, ')') + 1;
910 else if (c == '[')
911 patptr = group_end(patptr, patend, ']') + 1;
18529408 912 }
49704364
WL
913 Perl_croak(aTHX_ "No group ending character '%c' found in template",
914 ender);
915 return 0;
18529408
IZ
916}
917
49704364
WL
918
919/* Convert unsigned decimal number to binary.
920 * Expects a pointer to the first digit and address of length variable
921 * Advances char pointer to 1st non-digit char and returns number
fc241834 922 */
f7fe979e
AL
923STATIC const char *
924S_get_num(pTHX_ register const char *patptr, I32 *lenptr )
49704364
WL
925{
926 I32 len = *patptr++ - '0';
927 while (isDIGIT(*patptr)) {
928 if (len >= 0x7FFFFFFF/10)
929 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
930 len = (len * 10) + (*patptr++ - '0');
931 }
932 *lenptr = len;
933 return patptr;
934}
935
936/* The marvellous template parsing routine: Using state stored in *symptr,
937 * locates next template code and count
938 */
939STATIC bool
f337b084 940S_next_symbol(pTHX_ tempsym_t* symptr )
18529408 941{
f7fe979e 942 const char* patptr = symptr->patptr;
0bcc34c2 943 const char* const patend = symptr->patend;
49704364
WL
944
945 symptr->flags &= ~FLAG_SLASH;
946
947 while (patptr < patend) {
948 if (isSPACE(*patptr))
949 patptr++;
950 else if (*patptr == '#') {
951 patptr++;
952 while (patptr < patend && *patptr != '\n')
953 patptr++;
954 if (patptr < patend)
955 patptr++;
956 } else {
fc241834 957 /* We should have found a template code */
49704364 958 I32 code = *patptr++ & 0xFF;
66c611c5 959 U32 inherited_modifiers = 0;
49704364
WL
960
961 if (code == ','){ /* grandfather in commas but with a warning */
962 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
963 symptr->flags |= FLAG_COMMA;
964 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
f7fe979e 965 "Invalid type ',' in %s", _action( symptr ) );
49704364
WL
966 }
967 continue;
968 }
fc241834 969
49704364 970 /* for '(', skip to ')' */
fc241834 971 if (code == '(') {
49704364
WL
972 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
973 Perl_croak(aTHX_ "()-group starts with a count in %s",
f7fe979e 974 _action( symptr ) );
49704364
WL
975 symptr->grpbeg = patptr;
976 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
977 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
978 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
f7fe979e 979 _action( symptr ) );
49704364
WL
980 }
981
66c611c5
MHM
982 /* look for group modifiers to inherit */
983 if (TYPE_ENDIANNESS(symptr->flags)) {
984 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
985 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
986 }
987
1109a392
MHM
988 /* look for modifiers */
989 while (patptr < patend) {
b7787f18
AL
990 const char *allowed;
991 I32 modifier;
1109a392
MHM
992 switch (*patptr) {
993 case '!':
994 modifier = TYPE_IS_SHRIEKING;
7212898e 995 allowed = SHRIEKING_ALLOWED_TYPES;
1109a392 996 break;
7212898e 997#ifdef PERL_PACK_CAN_BYTEORDER
1109a392
MHM
998 case '>':
999 modifier = TYPE_IS_BIG_ENDIAN;
66c611c5 1000 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392
MHM
1001 break;
1002 case '<':
1003 modifier = TYPE_IS_LITTLE_ENDIAN;
66c611c5 1004 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392 1005 break;
f337b084 1006#endif /* PERL_PACK_CAN_BYTEORDER */
1109a392 1007 default:
b7787f18
AL
1008 allowed = "";
1009 modifier = 0;
1109a392
MHM
1010 break;
1011 }
66c611c5 1012
1109a392
MHM
1013 if (modifier == 0)
1014 break;
66c611c5 1015
1109a392
MHM
1016 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
1017 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
f7fe979e 1018 allowed, _action( symptr ) );
66c611c5
MHM
1019
1020 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
1109a392 1021 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
f7fe979e 1022 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
66c611c5
MHM
1023 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
1024 TYPE_ENDIANNESS_MASK)
1025 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
f7fe979e 1026 *patptr, _action( symptr ) );
66c611c5 1027
041457d9 1028 if ((code & modifier) && ckWARN(WARN_UNPACK)) {
1109a392
MHM
1029 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
1030 "Duplicate modifier '%c' after '%c' in %s",
1031 *patptr, (int) TYPE_NO_MODIFIERS(code),
f7fe979e 1032 _action( symptr ) );
1109a392 1033 }
66c611c5 1034
1109a392
MHM
1035 code |= modifier;
1036 patptr++;
49704364
WL
1037 }
1038
66c611c5
MHM
1039 /* inherit modifiers */
1040 code |= inherited_modifiers;
1041
fc241834 1042 /* look for count and/or / */
49704364
WL
1043 if (patptr < patend) {
1044 if (isDIGIT(*patptr)) {
1045 patptr = get_num( patptr, &symptr->length );
1046 symptr->howlen = e_number;
1047
1048 } else if (*patptr == '*') {
1049 patptr++;
1050 symptr->howlen = e_star;
1051
1052 } else if (*patptr == '[') {
f7fe979e 1053 const char* lenptr = ++patptr;
49704364
WL
1054 symptr->howlen = e_number;
1055 patptr = group_end( patptr, patend, ']' ) + 1;
1056 /* what kind of [] is it? */
1057 if (isDIGIT(*lenptr)) {
1058 lenptr = get_num( lenptr, &symptr->length );
1059 if( *lenptr != ']' )
1060 Perl_croak(aTHX_ "Malformed integer in [] in %s",
f7fe979e 1061 _action( symptr ) );
49704364
WL
1062 } else {
1063 tempsym_t savsym = *symptr;
1064 symptr->patend = patptr-1;
1065 symptr->patptr = lenptr;
1066 savsym.length = measure_struct(symptr);
1067 *symptr = savsym;
1068 }
1069 } else {
1070 symptr->howlen = e_no_len;
1071 symptr->length = 1;
1072 }
1073
1074 /* try to find / */
1075 while (patptr < patend) {
1076 if (isSPACE(*patptr))
1077 patptr++;
1078 else if (*patptr == '#') {
1079 patptr++;
1080 while (patptr < patend && *patptr != '\n')
1081 patptr++;
1082 if (patptr < patend)
1083 patptr++;
1084 } else {
66c611c5 1085 if (*patptr == '/') {
49704364
WL
1086 symptr->flags |= FLAG_SLASH;
1087 patptr++;
66c611c5
MHM
1088 if (patptr < patend &&
1089 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
49704364 1090 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
f7fe979e 1091 _action( symptr ) );
49704364
WL
1092 }
1093 break;
1094 }
18529408 1095 }
49704364
WL
1096 } else {
1097 /* at end - no count, no / */
1098 symptr->howlen = e_no_len;
1099 symptr->length = 1;
1100 }
1101
1102 symptr->code = code;
fc241834 1103 symptr->patptr = patptr;
49704364 1104 return TRUE;
18529408 1105 }
49704364 1106 }
fc241834 1107 symptr->patptr = patptr;
49704364 1108 return FALSE;
18529408
IZ
1109}
1110
18529408 1111/*
fc241834 1112 There is no way to cleanly handle the case where we should process the
08ca2aa3 1113 string per byte in its upgraded form while it's really in downgraded form
fc241834
RGS
1114 (e.g. estimates like strend-s as an upper bound for the number of
1115 characters left wouldn't work). So if we foresee the need of this
1116 (pattern starts with U or contains U0), we want to work on the encoded
1117 version of the string. Users are advised to upgrade their pack string
08ca2aa3
TH
1118 themselves if they need to do a lot of unpacks like this on it
1119*/
fc241834 1120STATIC bool
08ca2aa3
TH
1121need_utf8(const char *pat, const char *patend)
1122{
1123 bool first = TRUE;
1124 while (pat < patend) {
1125 if (pat[0] == '#') {
1126 pat++;
f7fe979e 1127 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
1128 if (!pat) return FALSE;
1129 } else if (pat[0] == 'U') {
1130 if (first || pat[1] == '0') return TRUE;
1131 } else first = FALSE;
1132 pat++;
1133 }
1134 return FALSE;
1135}
1136
1137STATIC char
1138first_symbol(const char *pat, const char *patend) {
1139 while (pat < patend) {
1140 if (pat[0] != '#') return pat[0];
1141 pat++;
f7fe979e 1142 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
1143 if (!pat) return 0;
1144 pat++;
1145 }
1146 return 0;
1147}
1148
1149/*
7accc089
JH
1150=for apidoc unpackstring
1151
608d3aed
WL
1152The engine implementing unpack() Perl function. C<unpackstring> puts the
1153extracted list items on the stack and returns the number of elements.
1154Issue C<PUTBACK> before and C<SPAGAIN> after the call to this function.
7accc089
JH
1155
1156=cut */
1157
1158I32
f7fe979e 1159Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
7accc089 1160{
f7fe979e 1161 tempsym_t sym;
08ca2aa3 1162
f337b084 1163 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
08ca2aa3
TH
1164 else if (need_utf8(pat, patend)) {
1165 /* We probably should try to avoid this in case a scalar context call
1166 wouldn't get to the "U0" */
1167 STRLEN len = strend - s;
230e1fce 1168 s = (char *) bytes_to_utf8((U8 *) s, &len);
08ca2aa3
TH
1169 SAVEFREEPV(s);
1170 strend = s + len;
f337b084 1171 flags |= FLAG_DO_UTF8;
08ca2aa3
TH
1172 }
1173
f337b084
TH
1174 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
1175 flags |= FLAG_PARSE_UTF8;
08ca2aa3 1176
f7fe979e 1177 TEMPSYM_INIT(&sym, pat, patend, flags);
7accc089
JH
1178
1179 return unpack_rec(&sym, s, s, strend, NULL );
1180}
1181
49704364
WL
1182STATIC
1183I32
f7fe979e 1184S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
49704364 1185{
27da23d5 1186 dVAR; dSP;
18529408 1187 SV *sv;
f7fe979e 1188 const I32 start_sp_offset = SP - PL_stack_base;
49704364 1189 howlen_t howlen;
a6ec74c1 1190
a6ec74c1 1191 I32 checksum = 0;
92d41999 1192 UV cuv = 0;
a6ec74c1 1193 NV cdouble = 0.0;
f337b084 1194 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
49704364 1195 bool beyond = FALSE;
21c16052 1196 bool explicit_length;
9e27e96a 1197 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
f337b084 1198 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
28be1210 1199 symptr->strbeg = s - strbeg;
49704364 1200
49704364 1201 while (next_symbol(symptr)) {
a7a3cfaa 1202 packprops_t props;
9e27e96a 1203 I32 len;
f337b084 1204 I32 datumtype = symptr->code;
206947d2 1205 /* do first one only unless in list context
08ca2aa3 1206 / is implemented by unpacking the count, then popping it from the
206947d2 1207 stack, so must check that we're not in the middle of a / */
49704364 1208 if ( unpack_only_one
206947d2 1209 && (SP - PL_stack_base == start_sp_offset + 1)
49704364 1210 && (datumtype != '/') ) /* XXX can this be omitted */
206947d2 1211 break;
49704364 1212
f337b084 1213 switch (howlen = symptr->howlen) {
fc241834
RGS
1214 case e_star:
1215 len = strend - strbeg; /* long enough */
49704364 1216 break;
f337b084
TH
1217 default:
1218 /* e_no_len and e_number */
1219 len = symptr->length;
1220 break;
49704364 1221 }
18529408 1222
21c16052 1223 explicit_length = TRUE;
a6ec74c1 1224 redo_switch:
49704364 1225 beyond = s >= strend;
a7a3cfaa
TH
1226
1227 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
1228 if (props) {
1229 /* props nonzero means we can process this letter. */
9e27e96a
AL
1230 const long size = props & PACK_SIZE_MASK;
1231 const long howmany = (strend - s) / size;
a7a3cfaa
TH
1232 if (len > howmany)
1233 len = howmany;
1234
1235 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
1236 if (len && unpack_only_one) len = 1;
1237 EXTEND(SP, len);
1238 EXTEND_MORTAL(len);
78d46eaa
NC
1239 }
1240 }
a7a3cfaa 1241
1109a392 1242 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 1243 default:
1109a392 1244 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
49704364 1245
a6ec74c1 1246 case '%':
49704364 1247 if (howlen == e_no_len)
18529408 1248 len = 16; /* len is not specified */
a6ec74c1 1249 checksum = len;
92d41999 1250 cuv = 0;
a6ec74c1 1251 cdouble = 0;
18529408 1252 continue;
a6ec74c1 1253 break;
18529408
IZ
1254 case '(':
1255 {
49704364 1256 tempsym_t savsym = *symptr;
9e27e96a 1257 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
66c611c5 1258 symptr->flags |= group_modifiers;
49704364 1259 symptr->patend = savsym.grpend;
28be1210 1260 symptr->previous = &savsym;
49704364 1261 symptr->level++;
18529408
IZ
1262 PUTBACK;
1263 while (len--) {
49704364 1264 symptr->patptr = savsym.grpbeg;
f337b084
TH
1265 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
1266 else symptr->flags &= ~FLAG_PARSE_UTF8;
08ca2aa3
TH
1267 unpack_rec(symptr, s, strbeg, strend, &s);
1268 if (s == strend && savsym.howlen == e_star)
49704364 1269 break; /* No way to continue */
18529408
IZ
1270 }
1271 SPAGAIN;
28be1210 1272 savsym.flags = symptr->flags & ~group_modifiers;
49704364 1273 *symptr = savsym;
18529408
IZ
1274 break;
1275 }
28be1210
TH
1276#ifdef PERL_PACK_CAN_SHRIEKSIGN
1277 case '.' | TYPE_IS_SHRIEKING:
1278#endif
1279 case '.': {
9e27e96a 1280 const char *from;
28be1210
TH
1281 SV *sv;
1282#ifdef PERL_PACK_CAN_SHRIEKSIGN
9e27e96a 1283 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
28be1210 1284#else /* PERL_PACK_CAN_SHRIEKSIGN */
9e27e96a 1285 const bool u8 = utf8;
28be1210
TH
1286#endif
1287 if (howlen == e_star) from = strbeg;
1288 else if (len <= 0) from = s;
1289 else {
1290 tempsym_t *group = symptr;
1291
1292 while (--len && group) group = group->previous;
1293 from = group ? strbeg + group->strbeg : strbeg;
1294 }
1295 sv = from <= s ?
00646304
CB
1296 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
1297 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
28be1210
TH
1298 XPUSHs(sv_2mortal(sv));
1299 break;
1300 }
1301#ifdef PERL_PACK_CAN_SHRIEKSIGN
1302 case '@' | TYPE_IS_SHRIEKING:
1303#endif
a6ec74c1 1304 case '@':
28be1210
TH
1305 s = strbeg + symptr->strbeg;
1306#ifdef PERL_PACK_CAN_SHRIEKSIGN
1307 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
1308#else /* PERL_PACK_CAN_SHRIEKSIGN */
1309 if (utf8)
1310#endif
1311 {
08ca2aa3
TH
1312 while (len > 0) {
1313 if (s >= strend)
1314 Perl_croak(aTHX_ "'@' outside of string in unpack");
1315 s += UTF8SKIP(s);
1316 len--;
1317 }
1318 if (s > strend)
1319 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
1320 } else {
28be1210 1321 if (strend-s < len)
fc241834 1322 Perl_croak(aTHX_ "'@' outside of string in unpack");
28be1210 1323 s += len;
08ca2aa3 1324 }
a6ec74c1 1325 break;
62f95557
IZ
1326 case 'X' | TYPE_IS_SHRIEKING:
1327 if (!len) /* Avoid division by 0 */
1328 len = 1;
08ca2aa3 1329 if (utf8) {
f7fe979e 1330 const char *hop, *last;
f337b084
TH
1331 I32 l = len;
1332 hop = last = strbeg;
1333 while (hop < s) {
1334 hop += UTF8SKIP(hop);
1335 if (--l == 0) {
08ca2aa3 1336 last = hop;
f337b084
TH
1337 l = len;
1338 }
fc241834 1339 }
f337b084
TH
1340 if (last > s)
1341 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
08ca2aa3
TH
1342 s = last;
1343 break;
f337b084
TH
1344 }
1345 len = (s - strbeg) % len;
62f95557 1346 /* FALL THROUGH */
a6ec74c1 1347 case 'X':
08ca2aa3
TH
1348 if (utf8) {
1349 while (len > 0) {
1350 if (s <= strbeg)
1351 Perl_croak(aTHX_ "'X' outside of string in unpack");
f337b084 1352 while (--s, UTF8_IS_CONTINUATION(*s)) {
08ca2aa3
TH
1353 if (s <= strbeg)
1354 Perl_croak(aTHX_ "'X' outside of string in unpack");
1355 }
1356 len--;
1357 }
1358 } else {
fc241834
RGS
1359 if (len > s - strbeg)
1360 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1361 s -= len;
08ca2aa3 1362 }
a6ec74c1 1363 break;
9e27e96a
AL
1364 case 'x' | TYPE_IS_SHRIEKING: {
1365 I32 ai32;
62f95557
IZ
1366 if (!len) /* Avoid division by 0 */
1367 len = 1;
230e1fce
NC
1368 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1369 else ai32 = (s - strbeg) % len;
08ca2aa3
TH
1370 if (ai32 == 0) break;
1371 len -= ai32;
9e27e96a 1372 }
62f95557 1373 /* FALL THROUGH */
a6ec74c1 1374 case 'x':
08ca2aa3
TH
1375 if (utf8) {
1376 while (len>0) {
1377 if (s >= strend)
1378 Perl_croak(aTHX_ "'x' outside of string in unpack");
1379 s += UTF8SKIP(s);
1380 len--;
1381 }
1382 } else {
fc241834
RGS
1383 if (len > strend - s)
1384 Perl_croak(aTHX_ "'x' outside of string in unpack");
1385 s += len;
f337b084 1386 }
a6ec74c1
JH
1387 break;
1388 case '/':
49704364
WL
1389 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1390 break;
a6ec74c1
JH
1391 case 'A':
1392 case 'Z':
1393 case 'a':
08ca2aa3
TH
1394 if (checksum) {
1395 /* Preliminary length estimate is assumed done in 'W' */
1396 if (len > strend - s) len = strend - s;
1397 goto W_checksum;
1398 }
1399 if (utf8) {
1400 I32 l;
f7fe979e 1401 const char *hop;
08ca2aa3
TH
1402 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1403 if (hop >= strend) {
1404 if (hop > strend)
1405 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1406 break;
fc241834 1407 }
a6ec74c1 1408 }
08ca2aa3
TH
1409 if (hop > strend)
1410 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1411 len = hop - s;
1412 } else if (len > strend - s)
1413 len = strend - s;
1414
1415 if (datumtype == 'Z') {
1416 /* 'Z' strips stuff after first null */
f7fe979e 1417 const char *ptr, *end;
f337b084
TH
1418 end = s + len;
1419 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
08ca2aa3
TH
1420 sv = newSVpvn(s, ptr-s);
1421 if (howlen == e_star) /* exact for 'Z*' */
1422 len = ptr-s + (ptr != strend ? 1 : 0);
1423 } else if (datumtype == 'A') {
1424 /* 'A' strips both nulls and spaces */
f7fe979e 1425 const char *ptr;
18bdf90a
TH
1426 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1427 for (ptr = s+len-1; ptr >= s; ptr--)
1428 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
230e1fce 1429 !is_utf8_space((U8 *) ptr)) break;
18bdf90a
TH
1430 if (ptr >= s) ptr += UTF8SKIP(ptr);
1431 else ptr++;
28be1210 1432 if (ptr > s+len)
18bdf90a
TH
1433 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1434 } else {
1435 for (ptr = s+len-1; ptr >= s; ptr--)
1436 if (*ptr != 0 && !isSPACE(*ptr)) break;
1437 ptr++;
1438 }
08ca2aa3
TH
1439 sv = newSVpvn(s, ptr-s);
1440 } else sv = newSVpvn(s, len);
1441
1442 if (utf8) {
1443 SvUTF8_on(sv);
1444 /* Undo any upgrade done due to need_utf8() */
f337b084 1445 if (!(symptr->flags & FLAG_WAS_UTF8))
08ca2aa3 1446 sv_utf8_downgrade(sv, 0);
a6ec74c1
JH
1447 }
1448 XPUSHs(sv_2mortal(sv));
08ca2aa3 1449 s += len;
a6ec74c1
JH
1450 break;
1451 case 'B':
08ca2aa3
TH
1452 case 'b': {
1453 char *str;
49704364 1454 if (howlen == e_star || len > (strend - s) * 8)
a6ec74c1
JH
1455 len = (strend - s) * 8;
1456 if (checksum) {
1457 if (!PL_bitcount) {
08ca2aa3 1458 int bits;
a02a5408 1459 Newxz(PL_bitcount, 256, char);
a6ec74c1
JH
1460 for (bits = 1; bits < 256; bits++) {
1461 if (bits & 1) PL_bitcount[bits]++;
1462 if (bits & 2) PL_bitcount[bits]++;
1463 if (bits & 4) PL_bitcount[bits]++;
1464 if (bits & 8) PL_bitcount[bits]++;
1465 if (bits & 16) PL_bitcount[bits]++;
1466 if (bits & 32) PL_bitcount[bits]++;
1467 if (bits & 64) PL_bitcount[bits]++;
1468 if (bits & 128) PL_bitcount[bits]++;
1469 }
1470 }
f337b084 1471 if (utf8)
08ca2aa3 1472 while (len >= 8 && s < strend) {
f337b084 1473 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
08ca2aa3
TH
1474 len -= 8;
1475 }
f337b084 1476 else
fc241834 1477 while (len >= 8) {
08ca2aa3 1478 cuv += PL_bitcount[*(U8 *)s++];
fc241834
RGS
1479 len -= 8;
1480 }
08ca2aa3
TH
1481 if (len && s < strend) {
1482 U8 bits;
f337b084
TH
1483 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1484 if (datumtype == 'b')
a6ec74c1 1485 while (len-- > 0) {
92d41999 1486 if (bits & 1) cuv++;
a6ec74c1
JH
1487 bits >>= 1;
1488 }
f337b084 1489 else
a6ec74c1 1490 while (len-- > 0) {
08ca2aa3 1491 if (bits & 0x80) cuv++;
a6ec74c1
JH
1492 bits <<= 1;
1493 }
fc241834 1494 }
a6ec74c1
JH
1495 break;
1496 }
08ca2aa3 1497
561b68a9 1498 sv = sv_2mortal(newSV(len ? len : 1));
a6ec74c1
JH
1499 SvPOK_on(sv);
1500 str = SvPVX(sv);
1501 if (datumtype == 'b') {
f337b084 1502 U8 bits = 0;
f7fe979e 1503 const I32 ai32 = len;
08ca2aa3
TH
1504 for (len = 0; len < ai32; len++) {
1505 if (len & 7) bits >>= 1;
1506 else if (utf8) {
1507 if (s >= strend) break;
f337b084 1508 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1509 } else bits = *(U8 *) s++;
1510 *str++ = bits & 1 ? '1' : '0';
a6ec74c1 1511 }
08ca2aa3 1512 } else {
f337b084 1513 U8 bits = 0;
f7fe979e 1514 const I32 ai32 = len;
08ca2aa3
TH
1515 for (len = 0; len < ai32; len++) {
1516 if (len & 7) bits <<= 1;
1517 else if (utf8) {
1518 if (s >= strend) break;
f337b084 1519 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1520 } else bits = *(U8 *) s++;
1521 *str++ = bits & 0x80 ? '1' : '0';
a6ec74c1
JH
1522 }
1523 }
1524 *str = '\0';
aa07b2f6 1525 SvCUR_set(sv, str - SvPVX_const(sv));
08ca2aa3 1526 XPUSHs(sv);
a6ec74c1 1527 break;
08ca2aa3 1528 }
a6ec74c1 1529 case 'H':
08ca2aa3
TH
1530 case 'h': {
1531 char *str;
fc241834 1532 /* Preliminary length estimate, acceptable for utf8 too */
49704364 1533 if (howlen == e_star || len > (strend - s) * 2)
a6ec74c1 1534 len = (strend - s) * 2;
561b68a9 1535 sv = sv_2mortal(newSV(len ? len : 1));
a6ec74c1
JH
1536 SvPOK_on(sv);
1537 str = SvPVX(sv);
1538 if (datumtype == 'h') {
f337b084 1539 U8 bits = 0;
9e27e96a 1540 I32 ai32 = len;
fc241834
RGS
1541 for (len = 0; len < ai32; len++) {
1542 if (len & 1) bits >>= 4;
1543 else if (utf8) {
1544 if (s >= strend) break;
f337b084 1545 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
fc241834 1546 } else bits = * (U8 *) s++;
a6ec74c1
JH
1547 *str++ = PL_hexdigit[bits & 15];
1548 }
08ca2aa3 1549 } else {
f337b084 1550 U8 bits = 0;
f7fe979e 1551 const I32 ai32 = len;
08ca2aa3
TH
1552 for (len = 0; len < ai32; len++) {
1553 if (len & 1) bits <<= 4;
1554 else if (utf8) {
1555 if (s >= strend) break;
f337b084 1556 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3 1557 } else bits = *(U8 *) s++;
a6ec74c1
JH
1558 *str++ = PL_hexdigit[(bits >> 4) & 15];
1559 }
1560 }
1561 *str = '\0';
aa07b2f6 1562 SvCUR_set(sv, str - SvPVX_const(sv));
08ca2aa3 1563 XPUSHs(sv);
a6ec74c1 1564 break;
08ca2aa3 1565 }
a6ec74c1 1566 case 'c':
73cb7263 1567 while (len-- > 0) {
f337b084 1568 int aint = SHIFT_BYTE(utf8, s, strend, datumtype);
73cb7263
NC
1569 if (aint >= 128) /* fake up signed chars */
1570 aint -= 256;
08ca2aa3 1571 if (!checksum)
ac7f3b1b 1572 PUSHs(sv_2mortal(newSViv((IV)aint)));
73cb7263
NC
1573 else if (checksum > bits_in_uv)
1574 cdouble += (NV)aint;
1575 else
1576 cuv += aint;
a6ec74c1
JH
1577 }
1578 break;
1579 case 'C':
08ca2aa3
TH
1580 case 'W':
1581 W_checksum:
35bcd338 1582 if (len == 0) {
fc241834 1583 if (explicit_length && datumtype == 'C')
08ca2aa3 1584 /* Switch to "character" mode */
f337b084 1585 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
35bcd338
JH
1586 break;
1587 }
fc241834 1588 if (datumtype == 'C' ?
f337b084
TH
1589 (symptr->flags & FLAG_DO_UTF8) &&
1590 !(symptr->flags & FLAG_WAS_UTF8) : utf8) {
08ca2aa3 1591 while (len-- > 0 && s < strend) {
08ca2aa3 1592 STRLEN retlen;
f7fe979e 1593 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
f337b084 1594 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
08ca2aa3
TH
1595 if (retlen == (STRLEN) -1 || retlen == 0)
1596 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1597 s += retlen;
1598 if (!checksum)
1599 PUSHs(sv_2mortal(newSVuv((UV) val)));
1600 else if (checksum > bits_in_uv)
1601 cdouble += (NV) val;
d6d3e8bd 1602 else
08ca2aa3 1603 cuv += val;
fc241834 1604 }
08ca2aa3 1605 } else if (!checksum)
a6ec74c1 1606 while (len-- > 0) {
f7fe979e 1607 const U8 ch = *(U8 *) s++;
08ca2aa3 1608 PUSHs(sv_2mortal(newSVuv((UV) ch)));
a6ec74c1 1609 }
08ca2aa3
TH
1610 else if (checksum > bits_in_uv)
1611 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1612 else
1613 while (len-- > 0) cuv += *(U8 *) s++;
a6ec74c1
JH
1614 break;
1615 case 'U':
35bcd338 1616 if (len == 0) {
08ca2aa3
TH
1617 if (explicit_length) {
1618 /* Switch to "bytes in UTF-8" mode */
f337b084 1619 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
08ca2aa3
TH
1620 else
1621 /* Should be impossible due to the need_utf8() test */
1622 Perl_croak(aTHX_ "U0 mode on a byte string");
1623 }
35bcd338
JH
1624 break;
1625 }
08ca2aa3 1626 if (len > strend - s) len = strend - s;
fc241834 1627 if (!checksum) {
08ca2aa3
TH
1628 if (len && unpack_only_one) len = 1;
1629 EXTEND(SP, len);
1630 EXTEND_MORTAL(len);
fc241834 1631 }
08ca2aa3
TH
1632 while (len-- > 0 && s < strend) {
1633 STRLEN retlen;
1634 UV auv;
1635 if (utf8) {
1636 U8 result[UTF8_MAXLEN];
f7fe979e 1637 const char *ptr = s;
08ca2aa3 1638 STRLEN len;
08ca2aa3
TH
1639 /* Bug: warns about bad utf8 even if we are short on bytes
1640 and will break out of the loop */
230e1fce
NC
1641 if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1642 'U'))
08ca2aa3
TH
1643 break;
1644 len = UTF8SKIP(result);
fc241834 1645 if (!uni_to_bytes(aTHX_ &ptr, strend,
230e1fce 1646 (char *) &result[1], len-1, 'U')) break;
08ca2aa3
TH
1647 auv = utf8n_to_uvuni(result, len, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1648 s = ptr;
1649 } else {
1650 auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1651 if (retlen == (STRLEN) -1 || retlen == 0)
1652 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1653 s += retlen;
1654 }
1655 if (!checksum)
1656 PUSHs(sv_2mortal(newSVuv((UV) auv)));
73cb7263 1657 else if (checksum > bits_in_uv)
08ca2aa3 1658 cdouble += (NV) auv;
73cb7263 1659 else
08ca2aa3 1660 cuv += auv;
a6ec74c1
JH
1661 }
1662 break;
49704364
WL
1663 case 's' | TYPE_IS_SHRIEKING:
1664#if SHORTSIZE != SIZE16
73cb7263 1665 while (len-- > 0) {
08ca2aa3 1666 short ashort;
f337b084
TH
1667 SHIFT_VAR(utf8, s, strend, ashort, datumtype);
1668 DO_BO_UNPACK(ashort, s);
08ca2aa3 1669 if (!checksum)
ac7f3b1b 1670 PUSHs(sv_2mortal(newSViv((IV)ashort)));
73cb7263
NC
1671 else if (checksum > bits_in_uv)
1672 cdouble += (NV)ashort;
1673 else
1674 cuv += ashort;
49704364
WL
1675 }
1676 break;
1677#else
1678 /* Fallthrough! */
a6ec74c1 1679#endif
49704364 1680 case 's':
73cb7263 1681 while (len-- > 0) {
08ca2aa3
TH
1682 I16 ai16;
1683
1684#if U16SIZE > SIZE16
1685 ai16 = 0;
1686#endif
f337b084 1687 SHIFT16(utf8, s, strend, &ai16, datumtype);
73cb7263 1688 DO_BO_UNPACK(ai16, 16);
1109a392 1689#if U16SIZE > SIZE16
73cb7263
NC
1690 if (ai16 > 32767)
1691 ai16 -= 65536;
a6ec74c1 1692#endif
08ca2aa3 1693 if (!checksum)
ac7f3b1b 1694 PUSHs(sv_2mortal(newSViv((IV)ai16)));
73cb7263
NC
1695 else if (checksum > bits_in_uv)
1696 cdouble += (NV)ai16;
1697 else
1698 cuv += ai16;
a6ec74c1
JH
1699 }
1700 break;
49704364
WL
1701 case 'S' | TYPE_IS_SHRIEKING:
1702#if SHORTSIZE != SIZE16
73cb7263 1703 while (len-- > 0) {
08ca2aa3 1704 unsigned short aushort;
f337b084
TH
1705 SHIFT_VAR(utf8, s, strend, aushort, datumtype);
1706 DO_BO_UNPACK(aushort, s);
08ca2aa3
TH
1707 if (!checksum)
1708 PUSHs(sv_2mortal(newSVuv((UV) aushort)));
73cb7263
NC
1709 else if (checksum > bits_in_uv)
1710 cdouble += (NV)aushort;
1711 else
1712 cuv += aushort;
49704364
WL
1713 }
1714 break;
1715#else
1716 /* Fallhrough! */
1717#endif
a6ec74c1
JH
1718 case 'v':
1719 case 'n':
1720 case 'S':
73cb7263 1721 while (len-- > 0) {
08ca2aa3
TH
1722 U16 au16;
1723#if U16SIZE > SIZE16
1724 au16 = 0;
1725#endif
f337b084 1726 SHIFT16(utf8, s, strend, &au16, datumtype);
08ca2aa3 1727 DO_BO_UNPACK(au16, 16);
a6ec74c1 1728#ifdef HAS_NTOHS
73cb7263
NC
1729 if (datumtype == 'n')
1730 au16 = PerlSock_ntohs(au16);
a6ec74c1
JH
1731#endif
1732#ifdef HAS_VTOHS
73cb7263
NC
1733 if (datumtype == 'v')
1734 au16 = vtohs(au16);
a6ec74c1 1735#endif
08ca2aa3
TH
1736 if (!checksum)
1737 PUSHs(sv_2mortal(newSVuv((UV)au16)));
73cb7263 1738 else if (checksum > bits_in_uv)
f337b084 1739 cdouble += (NV) au16;
73cb7263
NC
1740 else
1741 cuv += au16;
a6ec74c1
JH
1742 }
1743 break;
7212898e 1744#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7
MHM
1745 case 'v' | TYPE_IS_SHRIEKING:
1746 case 'n' | TYPE_IS_SHRIEKING:
73cb7263 1747 while (len-- > 0) {
08ca2aa3
TH
1748 I16 ai16;
1749# if U16SIZE > SIZE16
1750 ai16 = 0;
1751# endif
f337b084 1752 SHIFT16(utf8, s, strend, &ai16, datumtype);
08ca2aa3 1753# ifdef HAS_NTOHS
73cb7263 1754 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
08ca2aa3
TH
1755 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1756# endif /* HAS_NTOHS */
1757# ifdef HAS_VTOHS
73cb7263 1758 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
08ca2aa3
TH
1759 ai16 = (I16) vtohs((U16) ai16);
1760# endif /* HAS_VTOHS */
1761 if (!checksum)
ac7f3b1b 1762 PUSHs(sv_2mortal(newSViv((IV)ai16)));
73cb7263 1763 else if (checksum > bits_in_uv)
08ca2aa3 1764 cdouble += (NV) ai16;
73cb7263
NC
1765 else
1766 cuv += ai16;
068bd2e7
MHM
1767 }
1768 break;
08ca2aa3 1769#endif /* PERL_PACK_CAN_SHRIEKSIGN */
a6ec74c1 1770 case 'i':
49704364 1771 case 'i' | TYPE_IS_SHRIEKING:
73cb7263 1772 while (len-- > 0) {
08ca2aa3 1773 int aint;
f337b084
TH
1774 SHIFT_VAR(utf8, s, strend, aint, datumtype);
1775 DO_BO_UNPACK(aint, i);
08ca2aa3 1776 if (!checksum)
ac7f3b1b 1777 PUSHs(sv_2mortal(newSViv((IV)aint)));
73cb7263
NC
1778 else if (checksum > bits_in_uv)
1779 cdouble += (NV)aint;
1780 else
1781 cuv += aint;
a6ec74c1
JH
1782 }
1783 break;
1784 case 'I':
49704364 1785 case 'I' | TYPE_IS_SHRIEKING:
73cb7263 1786 while (len-- > 0) {
08ca2aa3 1787 unsigned int auint;
f337b084
TH
1788 SHIFT_VAR(utf8, s, strend, auint, datumtype);
1789 DO_BO_UNPACK(auint, i);
08ca2aa3 1790 if (!checksum)
ac7f3b1b 1791 PUSHs(sv_2mortal(newSVuv((UV)auint)));
73cb7263
NC
1792 else if (checksum > bits_in_uv)
1793 cdouble += (NV)auint;
1794 else
1795 cuv += auint;
a6ec74c1
JH
1796 }
1797 break;
92d41999 1798 case 'j':
73cb7263 1799 while (len-- > 0) {
08ca2aa3 1800 IV aiv;
f337b084 1801 SHIFT_VAR(utf8, s, strend, aiv, datumtype);
1109a392 1802#if IVSIZE == INTSIZE
f337b084 1803 DO_BO_UNPACK(aiv, i);
1109a392 1804#elif IVSIZE == LONGSIZE
f337b084 1805 DO_BO_UNPACK(aiv, l);
1109a392 1806#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
f337b084 1807 DO_BO_UNPACK(aiv, 64);
08ca2aa3
TH
1808#else
1809 Perl_croak(aTHX_ "'j' not supported on this platform");
1109a392 1810#endif
08ca2aa3 1811 if (!checksum)
ac7f3b1b 1812 PUSHs(sv_2mortal(newSViv(aiv)));
73cb7263
NC
1813 else if (checksum > bits_in_uv)
1814 cdouble += (NV)aiv;
1815 else
1816 cuv += aiv;
92d41999
JH
1817 }
1818 break;
1819 case 'J':
73cb7263 1820 while (len-- > 0) {
08ca2aa3 1821 UV auv;
f337b084 1822 SHIFT_VAR(utf8, s, strend, auv, datumtype);
08ca2aa3 1823#if IVSIZE == INTSIZE
f337b084 1824 DO_BO_UNPACK(auv, i);
08ca2aa3 1825#elif IVSIZE == LONGSIZE
f337b084 1826 DO_BO_UNPACK(auv, l);
08ca2aa3 1827#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
f337b084 1828 DO_BO_UNPACK(auv, 64);
08ca2aa3
TH
1829#else
1830 Perl_croak(aTHX_ "'J' not supported on this platform");
1109a392 1831#endif
08ca2aa3 1832 if (!checksum)
ac7f3b1b 1833 PUSHs(sv_2mortal(newSVuv(auv)));
73cb7263
NC
1834 else if (checksum > bits_in_uv)
1835 cdouble += (NV)auv;
1836 else
1837 cuv += auv;
92d41999
JH
1838 }
1839 break;
49704364
WL
1840 case 'l' | TYPE_IS_SHRIEKING:
1841#if LONGSIZE != SIZE32
73cb7263 1842 while (len-- > 0) {
08ca2aa3 1843 long along;
f337b084
TH
1844 SHIFT_VAR(utf8, s, strend, along, datumtype);
1845 DO_BO_UNPACK(along, l);
08ca2aa3 1846 if (!checksum)
ac7f3b1b 1847 PUSHs(sv_2mortal(newSViv((IV)along)));
73cb7263
NC
1848 else if (checksum > bits_in_uv)
1849 cdouble += (NV)along;
1850 else
1851 cuv += along;
49704364
WL
1852 }
1853 break;
1854#else
1855 /* Fallthrough! */
a6ec74c1 1856#endif
49704364 1857 case 'l':
73cb7263 1858 while (len-- > 0) {
08ca2aa3
TH
1859 I32 ai32;
1860#if U32SIZE > SIZE32
1861 ai32 = 0;
1862#endif
f337b084 1863 SHIFT32(utf8, s, strend, &ai32, datumtype);
73cb7263 1864 DO_BO_UNPACK(ai32, 32);
25a9bd2a 1865#if U32SIZE > SIZE32
08ca2aa3 1866 if (ai32 > 2147483647) ai32 -= 4294967296;
a6ec74c1 1867#endif
08ca2aa3 1868 if (!checksum)
ac7f3b1b 1869 PUSHs(sv_2mortal(newSViv((IV)ai32)));
73cb7263
NC
1870 else if (checksum > bits_in_uv)
1871 cdouble += (NV)ai32;
1872 else
1873 cuv += ai32;
a6ec74c1
JH
1874 }
1875 break;
49704364
WL
1876 case 'L' | TYPE_IS_SHRIEKING:
1877#if LONGSIZE != SIZE32
73cb7263 1878 while (len-- > 0) {
08ca2aa3 1879 unsigned long aulong;
f337b084
TH
1880 SHIFT_VAR(utf8, s, strend, aulong, datumtype);
1881 DO_BO_UNPACK(aulong, l);
08ca2aa3 1882 if (!checksum)
ac7f3b1b 1883 PUSHs(sv_2mortal(newSVuv((UV)aulong)));
73cb7263
NC
1884 else if (checksum > bits_in_uv)
1885 cdouble += (NV)aulong;
1886 else
1887 cuv += aulong;
49704364
WL
1888 }
1889 break;
1890#else
1891 /* Fall through! */
1892#endif
a6ec74c1
JH
1893 case 'V':
1894 case 'N':
1895 case 'L':
73cb7263 1896 while (len-- > 0) {
08ca2aa3
TH
1897 U32 au32;
1898#if U32SIZE > SIZE32
1899 au32 = 0;
1900#endif
f337b084 1901 SHIFT32(utf8, s, strend, &au32, datumtype);
08ca2aa3 1902 DO_BO_UNPACK(au32, 32);
a6ec74c1 1903#ifdef HAS_NTOHL
73cb7263
NC
1904 if (datumtype == 'N')
1905 au32 = PerlSock_ntohl(au32);
a6ec74c1
JH
1906#endif
1907#ifdef HAS_VTOHL
73cb7263
NC
1908 if (datumtype == 'V')
1909 au32 = vtohl(au32);
a6ec74c1 1910#endif
08ca2aa3 1911 if (!checksum)
fc241834
RGS
1912 PUSHs(sv_2mortal(newSVuv((UV)au32)));
1913 else if (checksum > bits_in_uv)
1914 cdouble += (NV)au32;
1915 else
1916 cuv += au32;
a6ec74c1
JH
1917 }
1918 break;
7212898e 1919#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7
MHM
1920 case 'V' | TYPE_IS_SHRIEKING:
1921 case 'N' | TYPE_IS_SHRIEKING:
73cb7263 1922 while (len-- > 0) {
08ca2aa3
TH
1923 I32 ai32;
1924# if U32SIZE > SIZE32
1925 ai32 = 0;
1926# endif
f337b084 1927 SHIFT32(utf8, s, strend, &ai32, datumtype);
08ca2aa3 1928# ifdef HAS_NTOHL
73cb7263
NC
1929 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1930 ai32 = (I32)PerlSock_ntohl((U32)ai32);
08ca2aa3
TH
1931# endif
1932# ifdef HAS_VTOHL
73cb7263
NC
1933 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1934 ai32 = (I32)vtohl((U32)ai32);
08ca2aa3
TH
1935# endif
1936 if (!checksum)
ac7f3b1b 1937 PUSHs(sv_2mortal(newSViv((IV)ai32)));
73cb7263
NC
1938 else if (checksum > bits_in_uv)
1939 cdouble += (NV)ai32;
1940 else
1941 cuv += ai32;
068bd2e7
MHM
1942 }
1943 break;
08ca2aa3 1944#endif /* PERL_PACK_CAN_SHRIEKSIGN */
a6ec74c1 1945 case 'p':
a6ec74c1 1946 while (len-- > 0) {
f7fe979e 1947 const char *aptr;
f337b084 1948 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
07409e01 1949 DO_BO_UNPACK_PC(aptr);
c4c5f44a
NC
1950 /* newSVpv generates undef if aptr is NULL */
1951 PUSHs(sv_2mortal(newSVpv(aptr, 0)));
a6ec74c1
JH
1952 }
1953 break;
1954 case 'w':
a6ec74c1
JH
1955 {
1956 UV auv = 0;
1957 U32 bytes = 0;
fc241834 1958
08ca2aa3
TH
1959 while (len > 0 && s < strend) {
1960 U8 ch;
f337b084 1961 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3 1962 auv = (auv << 7) | (ch & 0x7f);
a6ec74c1 1963 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
08ca2aa3 1964 if (ch < 0x80) {
a6ec74c1 1965 bytes = 0;
ac7f3b1b 1966 PUSHs(sv_2mortal(newSVuv(auv)));
a6ec74c1
JH
1967 len--;
1968 auv = 0;
08ca2aa3 1969 continue;
a6ec74c1 1970 }
08ca2aa3 1971 if (++bytes >= sizeof(UV)) { /* promote to string */
10516c54 1972 const char *t;
a6ec74c1 1973
f5992bc4 1974 sv = Perl_newSVpvf(aTHX_ "%.*"UVuf, (int)TYPE_DIGITS(UV), auv);
a6ec74c1 1975 while (s < strend) {
f337b084 1976 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3
TH
1977 sv = mul128(sv, (U8)(ch & 0x7f));
1978 if (!(ch & 0x80)) {
a6ec74c1
JH
1979 bytes = 0;
1980 break;
1981 }
1982 }
10516c54 1983 t = SvPV_nolen_const(sv);
a6ec74c1
JH
1984 while (*t == '0')
1985 t++;
1986 sv_chop(sv, t);
1987 PUSHs(sv_2mortal(sv));
1988 len--;
1989 auv = 0;
1990 }
1991 }
1992 if ((s >= strend) && bytes)
49704364 1993 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
a6ec74c1
JH
1994 }
1995 break;
1996 case 'P':
49704364
WL
1997 if (symptr->howlen == e_star)
1998 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
a6ec74c1 1999 EXTEND(SP, 1);
2d3e0934 2000 if (s + sizeof(char*) <= strend) {
08ca2aa3 2001 char *aptr;
f337b084 2002 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
07409e01 2003 DO_BO_UNPACK_PC(aptr);
fc241834
RGS
2004 /* newSVpvn generates undef if aptr is NULL */
2005 PUSHs(sv_2mortal(newSVpvn(aptr, len)));
08ca2aa3 2006 }
a6ec74c1
JH
2007 break;
2008#ifdef HAS_QUAD
2009 case 'q':
73cb7263 2010 while (len-- > 0) {
08ca2aa3 2011 Quad_t aquad;
f337b084
TH
2012 SHIFT_VAR(utf8, s, strend, aquad, datumtype);
2013 DO_BO_UNPACK(aquad, 64);
08ca2aa3
TH
2014 if (!checksum)
2015 PUSHs(sv_2mortal(aquad >= IV_MIN && aquad <= IV_MAX ?
ac7f3b1b 2016 newSViv((IV)aquad) : newSVnv((NV)aquad)));
73cb7263
NC
2017 else if (checksum > bits_in_uv)
2018 cdouble += (NV)aquad;
2019 else
2020 cuv += aquad;
2021 }
a6ec74c1
JH
2022 break;
2023 case 'Q':
73cb7263 2024 while (len-- > 0) {
08ca2aa3 2025 Uquad_t auquad;
f337b084
TH
2026 SHIFT_VAR(utf8, s, strend, auquad, datumtype);
2027 DO_BO_UNPACK(auquad, 64);
08ca2aa3
TH
2028 if (!checksum)
2029 PUSHs(sv_2mortal(auquad <= UV_MAX ?
2030 newSVuv((UV)auquad):newSVnv((NV)auquad)));
73cb7263
NC
2031 else if (checksum > bits_in_uv)
2032 cdouble += (NV)auquad;
2033 else
2034 cuv += auquad;
a6ec74c1
JH
2035 }
2036 break;
08ca2aa3 2037#endif /* HAS_QUAD */
a6ec74c1
JH
2038 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
2039 case 'f':
73cb7263 2040 while (len-- > 0) {
08ca2aa3 2041 float afloat;
f337b084 2042 SHIFT_VAR(utf8, s, strend, afloat, datumtype);
08ca2aa3
TH
2043 DO_BO_UNPACK_N(afloat, float);
2044 if (!checksum)
2045 PUSHs(sv_2mortal(newSVnv((NV)afloat)));
2046 else
73cb7263 2047 cdouble += afloat;
fc241834 2048 }
a6ec74c1
JH
2049 break;
2050 case 'd':
73cb7263 2051 while (len-- > 0) {
08ca2aa3 2052 double adouble;
f337b084 2053 SHIFT_VAR(utf8, s, strend, adouble, datumtype);
08ca2aa3
TH
2054 DO_BO_UNPACK_N(adouble, double);
2055 if (!checksum)
2056 PUSHs(sv_2mortal(newSVnv((NV)adouble)));
2057 else
73cb7263 2058 cdouble += adouble;
fc241834 2059 }
a6ec74c1 2060 break;
92d41999 2061 case 'F':
73cb7263 2062 while (len-- > 0) {
08ca2aa3 2063 NV anv;
f337b084 2064 SHIFT_VAR(utf8, s, strend, anv, datumtype);
08ca2aa3
TH
2065 DO_BO_UNPACK_N(anv, NV);
2066 if (!checksum)
2067 PUSHs(sv_2mortal(newSVnv(anv)));
2068 else
73cb7263 2069 cdouble += anv;
fc241834 2070 }
92d41999
JH
2071 break;
2072#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2073 case 'D':
73cb7263 2074 while (len-- > 0) {
08ca2aa3 2075 long double aldouble;
f337b084 2076 SHIFT_VAR(utf8, s, strend, aldouble, datumtype);
08ca2aa3
TH
2077 DO_BO_UNPACK_N(aldouble, long double);
2078 if (!checksum)
2079 PUSHs(sv_2mortal(newSVnv((NV)aldouble)));
2080 else
2081 cdouble += aldouble;
92d41999
JH
2082 }
2083 break;
2084#endif
a6ec74c1
JH
2085 case 'u':
2086 /* MKS:
2087 * Initialise the decode mapping. By using a table driven
2088 * algorithm, the code will be character-set independent
2089 * (and just as fast as doing character arithmetic)
2090 */
10edeb5d 2091 if (PL_uudmap[(U8)'M'] == 0) {
2d3e0934 2092 size_t i;
a6ec74c1 2093
2d3e0934 2094 for (i = 0; i < sizeof(PL_uuemap); ++i)
a6ec74c1
JH
2095 PL_uudmap[(U8)PL_uuemap[i]] = i;
2096 /*
2097 * Because ' ' and '`' map to the same value,
2098 * we need to decode them both the same.
2099 */
10edeb5d 2100 PL_uudmap[(U8)' '] = 0;
a6ec74c1 2101 }
08ca2aa3 2102 {
f7fe979e 2103 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
561b68a9 2104 sv = sv_2mortal(newSV(l));
08ca2aa3
TH
2105 if (l) SvPOK_on(sv);
2106 }
2107 if (utf8) {
2108 while (next_uni_uu(aTHX_ &s, strend, &len)) {
2109 I32 a, b, c, d;
2110 char hunk[4];
2111
2112 hunk[3] = '\0';
2113 while (len > 0) {
2114 next_uni_uu(aTHX_ &s, strend, &a);
2115 next_uni_uu(aTHX_ &s, strend, &b);
2116 next_uni_uu(aTHX_ &s, strend, &c);
2117 next_uni_uu(aTHX_ &s, strend, &d);
2118 hunk[0] = (char)((a << 2) | (b >> 4));
2119 hunk[1] = (char)((b << 4) | (c >> 2));
2120 hunk[2] = (char)((c << 6) | d);
2121 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2122 len -= 3;
2123 }
2124 if (s < strend) {
f7fe979e
AL
2125 if (*s == '\n') {
2126 s++;
2127 }
08ca2aa3
TH
2128 else {
2129 /* possible checksum byte */
f7fe979e
AL
2130 const char *skip = s+UTF8SKIP(s);
2131 if (skip < strend && *skip == '\n')
2132 s = skip+1;
08ca2aa3
TH
2133 }
2134 }
2135 }
2136 } else {
fc241834
RGS
2137 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
2138 I32 a, b, c, d;
2139 char hunk[4];
a6ec74c1 2140
fc241834
RGS
2141 hunk[3] = '\0';
2142 len = PL_uudmap[*(U8*)s++] & 077;
2143 while (len > 0) {
2144 if (s < strend && ISUUCHAR(*s))
2145 a = PL_uudmap[*(U8*)s++] & 077;
2146 else
2147 a = 0;
2148 if (s < strend && ISUUCHAR(*s))
2149 b = PL_uudmap[*(U8*)s++] & 077;
2150 else
2151 b = 0;
2152 if (s < strend && ISUUCHAR(*s))
2153 c = PL_uudmap[*(U8*)s++] & 077;
2154 else
2155 c = 0;
2156 if (s < strend && ISUUCHAR(*s))
2157 d = PL_uudmap[*(U8*)s++] & 077;
2158 else
2159 d = 0;
2160 hunk[0] = (char)((a << 2) | (b >> 4));
2161 hunk[1] = (char)((b << 4) | (c >> 2));
2162 hunk[2] = (char)((c << 6) | d);
2163 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2164 len -= 3;
2165 }
2166 if (*s == '\n')
2167 s++;
2168 else /* possible checksum byte */
2169 if (s + 1 < strend && s[1] == '\n')
2170 s += 2;
a6ec74c1 2171 }
08ca2aa3
TH
2172 }
2173 XPUSHs(sv);
a6ec74c1
JH
2174 break;
2175 }
49704364 2176
a6ec74c1 2177 if (checksum) {
1109a392 2178 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
92d41999 2179 (checksum > bits_in_uv &&
08ca2aa3
TH
2180 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
2181 NV trouble, anv;
a6ec74c1 2182
08ca2aa3 2183 anv = (NV) (1 << (checksum & 15));
a6ec74c1
JH
2184 while (checksum >= 16) {
2185 checksum -= 16;
08ca2aa3 2186 anv *= 65536.0;
a6ec74c1 2187 }
a6ec74c1 2188 while (cdouble < 0.0)
08ca2aa3
TH
2189 cdouble += anv;
2190 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
c4c5f44a 2191 sv = newSVnv(cdouble);
a6ec74c1
JH
2192 }
2193 else {
fa8ec7c1
NC
2194 if (checksum < bits_in_uv) {
2195 UV mask = ((UV)1 << checksum) - 1;
92d41999 2196 cuv &= mask;
a6ec74c1 2197 }
c4c5f44a 2198 sv = newSVuv(cuv);
a6ec74c1
JH
2199 }
2200 XPUSHs(sv_2mortal(sv));
2201 checksum = 0;
2202 }
fc241834 2203
49704364
WL
2204 if (symptr->flags & FLAG_SLASH){
2205 if (SP - PL_stack_base - start_sp_offset <= 0)
2206 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
2207 if( next_symbol(symptr) ){
2208 if( symptr->howlen == e_number )
2209 Perl_croak(aTHX_ "Count after length/code in unpack" );
2210 if( beyond ){
2211 /* ...end of char buffer then no decent length available */
2212 Perl_croak(aTHX_ "length/code after end of string in unpack" );
2213 } else {
2214 /* take top of stack (hope it's numeric) */
2215 len = POPi;
2216 if( len < 0 )
2217 Perl_croak(aTHX_ "Negative '/' count in unpack" );
2218 }
2219 } else {
2220 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
2221 }
2222 datumtype = symptr->code;
21c16052 2223 explicit_length = FALSE;
49704364
WL
2224 goto redo_switch;
2225 }
a6ec74c1 2226 }
49704364 2227
18529408
IZ
2228 if (new_s)
2229 *new_s = s;
2230 PUTBACK;
2231 return SP - PL_stack_base - start_sp_offset;
2232}
2233
2234PP(pp_unpack)
2235{
97aff369 2236 dVAR;
18529408 2237 dSP;
bab9c0ac 2238 dPOPPOPssrl;
18529408
IZ
2239 I32 gimme = GIMME_V;
2240 STRLEN llen;
2241 STRLEN rlen;
5c144d81
NC
2242 const char *pat = SvPV_const(left, llen);
2243 const char *s = SvPV_const(right, rlen);
f7fe979e
AL
2244 const char *strend = s + rlen;
2245 const char *patend = pat + llen;
08ca2aa3 2246 I32 cnt;
18529408
IZ
2247
2248 PUTBACK;
7accc089 2249 cnt = unpackstring(pat, patend, s, strend,
49704364 2250 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
f337b084 2251 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
49704364 2252
18529408
IZ
2253 SPAGAIN;
2254 if ( !cnt && gimme == G_SCALAR )
2255 PUSHs(&PL_sv_undef);
a6ec74c1
JH
2256 RETURN;
2257}
2258
f337b084 2259STATIC U8 *
f7fe979e 2260doencodes(U8 *h, const char *s, I32 len)
a6ec74c1 2261{
f337b084 2262 *h++ = PL_uuemap[len];
a6ec74c1 2263 while (len > 2) {
f337b084
TH
2264 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2265 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
2266 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
2267 *h++ = PL_uuemap[(077 & (s[2] & 077))];
a6ec74c1
JH
2268 s += 3;
2269 len -= 3;
2270 }
2271 if (len > 0) {
f7fe979e 2272 const char r = (len > 1 ? s[1] : '\0');
f337b084
TH
2273 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2274 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
2275 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
2276 *h++ = PL_uuemap[0];
a6ec74c1 2277 }
f337b084
TH
2278 *h++ = '\n';
2279 return h;
a6ec74c1
JH
2280}
2281
2282STATIC SV *
f7fe979e 2283S_is_an_int(pTHX_ const char *s, STRLEN l)
a6ec74c1 2284{
8b6e33c7
AL
2285 SV *result = newSVpvn(s, l);
2286 char *const result_c = SvPV_nolen(result); /* convenience */
2287 char *out = result_c;
2288 bool skip = 1;
2289 bool ignore = 0;
a6ec74c1
JH
2290
2291 while (*s) {
2292 switch (*s) {
2293 case ' ':
2294 break;
2295 case '+':
2296 if (!skip) {
2297 SvREFCNT_dec(result);
2298 return (NULL);
2299 }
2300 break;
2301 case '0':
2302 case '1':
2303 case '2':
2304 case '3':
2305 case '4':
2306 case '5':
2307 case '6':
2308 case '7':
2309 case '8':
2310 case '9':
2311 skip = 0;
2312 if (!ignore) {
2313 *(out++) = *s;
2314 }
2315 break;
2316 case '.':
2317 ignore = 1;
2318 break;
2319 default:
2320 SvREFCNT_dec(result);
2321 return (NULL);
2322 }
2323 s++;
2324 }
2325 *(out++) = '\0';
2326 SvCUR_set(result, out - result_c);
2327 return (result);
2328}
2329
2330/* pnum must be '\0' terminated */
2331STATIC int
2332S_div128(pTHX_ SV *pnum, bool *done)
2333{
8b6e33c7
AL
2334 STRLEN len;
2335 char * const s = SvPV(pnum, len);
2336 char *t = s;
2337 int m = 0;
2338
2339 *done = 1;
2340 while (*t) {
2341 const int i = m * 10 + (*t - '0');
2342 const int r = (i >> 7); /* r < 10 */
2343 m = i & 0x7F;
2344 if (r) {
2345 *done = 0;
2346 }
2347 *(t++) = '0' + r;
a6ec74c1 2348 }
8b6e33c7
AL
2349 *(t++) = '\0';
2350 SvCUR_set(pnum, (STRLEN) (t - s));
2351 return (m);
a6ec74c1
JH
2352}
2353
18529408 2354/*
7accc089
JH
2355=for apidoc packlist
2356
2357The engine implementing pack() Perl function.
2358
bfce84ec
AL
2359=cut
2360*/
7accc089
JH
2361
2362void
f7fe979e 2363Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, register SV **beglist, SV **endlist )
7accc089 2364{
97aff369 2365 dVAR;
aadb217d
JH
2366 tempsym_t sym;
2367
f7fe979e 2368 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
49704364 2369
f337b084
TH
2370 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
2371 Also make sure any UTF8 flag is loaded */
56eb0262 2372 SvPV_force_nolen(cat);
bfce84ec
AL
2373 if (DO_UTF8(cat))
2374 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
f337b084 2375
49704364
WL
2376 (void)pack_rec( cat, &sym, beglist, endlist );
2377}
2378
f337b084
TH
2379/* like sv_utf8_upgrade, but also repoint the group start markers */
2380STATIC void
2381marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2382 STRLEN len;
2383 tempsym_t *group;
f7fe979e
AL
2384 const char *from_ptr, *from_start, *from_end, **marks, **m;
2385 char *to_start, *to_ptr;
f337b084
TH
2386
2387 if (SvUTF8(sv)) return;
2388
aa07b2f6 2389 from_start = SvPVX_const(sv);
f337b084
TH
2390 from_end = from_start + SvCUR(sv);
2391 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2392 if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
2393 if (from_ptr == from_end) {
2394 /* Simple case: no character needs to be changed */
2395 SvUTF8_on(sv);
2396 return;
2397 }
2398
3473cf63 2399 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
a02a5408 2400 Newx(to_start, len, char);
f337b084
TH
2401 Copy(from_start, to_start, from_ptr-from_start, char);
2402 to_ptr = to_start + (from_ptr-from_start);
2403
a02a5408 2404 Newx(marks, sym_ptr->level+2, const char *);
f337b084
TH
2405 for (group=sym_ptr; group; group = group->previous)
2406 marks[group->level] = from_start + group->strbeg;
2407 marks[sym_ptr->level+1] = from_end+1;
2408 for (m = marks; *m < from_ptr; m++)
2409 *m = to_start + (*m-from_start);
2410
2411 for (;from_ptr < from_end; from_ptr++) {
2412 while (*m == from_ptr) *m++ = to_ptr;
230e1fce 2413 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
f337b084
TH
2414 }
2415 *to_ptr = 0;
2416
2417 while (*m == from_ptr) *m++ = to_ptr;
2418 if (m != marks + sym_ptr->level+1) {
2419 Safefree(marks);
2420 Safefree(to_start);
2421 Perl_croak(aTHX_ "Assertion: marks beyond string end");
2422 }
2423 for (group=sym_ptr; group; group = group->previous)
2424 group->strbeg = marks[group->level] - to_start;
2425 Safefree(marks);
2426
2427 if (SvOOK(sv)) {
2428 if (SvIVX(sv)) {
b162af07 2429 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
f337b084
TH
2430 from_start -= SvIVX(sv);
2431 SvIV_set(sv, 0);
2432 }
2433 SvFLAGS(sv) &= ~SVf_OOK;
2434 }
2435 if (SvLEN(sv) != 0)
2436 Safefree(from_start);
f880fe2f 2437 SvPV_set(sv, to_start);
b162af07
SP
2438 SvCUR_set(sv, to_ptr - to_start);
2439 SvLEN_set(sv, len);
f337b084
TH
2440 SvUTF8_on(sv);
2441}
2442
2443/* Exponential string grower. Makes string extension effectively O(n)
2444 needed says how many extra bytes we need (not counting the final '\0')
2445 Only grows the string if there is an actual lack of space
2446*/
2447STATIC char *
0bd48802 2448S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
f7fe979e
AL
2449 const STRLEN cur = SvCUR(sv);
2450 const STRLEN len = SvLEN(sv);
f337b084
TH
2451 STRLEN extend;
2452 if (len - cur > needed) return SvPVX(sv);
2453 extend = needed > len ? needed : len;
2454 return SvGROW(sv, len+extend+1);
2455}
49704364
WL
2456
2457STATIC
2458SV **
f337b084 2459S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
49704364 2460{
97aff369 2461 dVAR;
49704364 2462 tempsym_t lookahead;
f337b084
TH
2463 I32 items = endlist - beglist;
2464 bool found = next_symbol(symptr);
2465 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
041457d9 2466 bool warn_utf8 = ckWARN(WARN_UTF8);
f337b084
TH
2467
2468 if (symptr->level == 0 && found && symptr->code == 'U') {
2469 marked_upgrade(aTHX_ cat, symptr);
2470 symptr->flags |= FLAG_DO_UTF8;
2471 utf8 = 0;
49704364 2472 }
f337b084 2473 symptr->strbeg = SvCUR(cat);
49704364
WL
2474
2475 while (found) {
f337b084
TH
2476 SV *fromstr;
2477 STRLEN fromlen;
2478 I32 len;
a0714e2c 2479 SV *lengthcode = NULL;
49704364 2480 I32 datumtype = symptr->code;
f337b084
TH
2481 howlen_t howlen = symptr->howlen;
2482 char *start = SvPVX(cat);
2483 char *cur = start + SvCUR(cat);
49704364 2484
f337b084
TH
2485#define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2486
2487 switch (howlen) {
fc241834 2488 case e_star:
f337b084
TH
2489 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2490 0 : items;
2491 break;
2492 default:
2493 /* e_no_len and e_number */
2494 len = symptr->length;
49704364
WL
2495 break;
2496 }
2497
f337b084 2498 if (len) {
a7a3cfaa 2499 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
f337b084 2500
a7a3cfaa
TH
2501 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2502 /* We can process this letter. */
2503 STRLEN size = props & PACK_SIZE_MASK;
2504 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2505 }
f337b084
TH
2506 }
2507
49704364
WL
2508 /* Look ahead for next symbol. Do we have code/code? */
2509 lookahead = *symptr;
2510 found = next_symbol(&lookahead);
246f24af
TH
2511 if (symptr->flags & FLAG_SLASH) {
2512 IV count;
f337b084 2513 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
246f24af
TH
2514 if (strchr("aAZ", lookahead.code)) {
2515 if (lookahead.howlen == e_number) count = lookahead.length;
2516 else {
ce399ba6
NC
2517 if (items > 0) {
2518 if (SvGAMAGIC(*beglist)) {
2519 /* Avoid reading the active data more than once
2520 by copying it to a temporary. */
2521 STRLEN len;
2522 const char *const pv = SvPV_const(*beglist, len);
2523 SV *const temp = sv_2mortal(newSVpvn(pv, len));
2524 if (SvUTF8(*beglist))
2525 SvUTF8_on(temp);
2526 *beglist = temp;
2527 }
246f24af
TH
2528 count = DO_UTF8(*beglist) ?
2529 sv_len_utf8(*beglist) : sv_len(*beglist);
ce399ba6 2530 }
246f24af
TH
2531 else count = 0;
2532 if (lookahead.code == 'Z') count++;
2533 }
2534 } else {
2535 if (lookahead.howlen == e_number && lookahead.length < items)
2536 count = lookahead.length;
2537 else count = items;
2538 }
2539 lookahead.howlen = e_number;
2540 lookahead.length = count;
2541 lengthcode = sv_2mortal(newSViv(count));
a6ec74c1 2542 }
49704364 2543
fc241834
RGS
2544 /* Code inside the switch must take care to properly update
2545 cat (CUR length and '\0' termination) if it updated *cur and
f337b084 2546 doesn't simply leave using break */
1109a392 2547 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 2548 default:
f337b084
TH
2549 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2550 (int) TYPE_NO_MODIFIERS(datumtype));
a6ec74c1 2551 case '%':
49704364 2552 Perl_croak(aTHX_ "'%%' may not be used in pack");
28be1210
TH
2553 {
2554 char *from;
2555#ifdef PERL_PACK_CAN_SHRIEKSIGN
2556 case '.' | TYPE_IS_SHRIEKING:
2557#endif
2558 case '.':
2559 if (howlen == e_star) from = start;
2560 else if (len == 0) from = cur;
2561 else {
2562 tempsym_t *group = symptr;
2563
2564 while (--len && group) group = group->previous;
2565 from = group ? start + group->strbeg : start;
2566 }
2567 fromstr = NEXTFROM;
2568 len = SvIV(fromstr);
2569 goto resize;
2570#ifdef PERL_PACK_CAN_SHRIEKSIGN
2571 case '@' | TYPE_IS_SHRIEKING:
2572#endif
a6ec74c1 2573 case '@':
28be1210
TH
2574 from = start + symptr->strbeg;
2575 resize:
2576#ifdef PERL_PACK_CAN_SHRIEKSIGN
2577 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
2578#else /* PERL_PACK_CAN_SHRIEKSIGN */
2579 if (utf8)
2580#endif
2581 if (len >= 0) {
2582 while (len && from < cur) {
2583 from += UTF8SKIP(from);
2584 len--;
2585 }
2586 if (from > cur)
2587 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2588 if (len) {
2589 /* Here we know from == cur */
2590 grow:
2591 GROWING(0, cat, start, cur, len);
2592 Zero(cur, len, char);
2593 cur += len;
2594 } else if (from < cur) {
2595 len = cur - from;
2596 goto shrink;
2597 } else goto no_change;
2598 } else {
2599 cur = from;
2600 len = -len;
2601 goto utf8_shrink;
f337b084 2602 }
28be1210
TH
2603 else {
2604 len -= cur - from;
f337b084 2605 if (len > 0) goto grow;
28be1210 2606 if (len == 0) goto no_change;
fc241834 2607 len = -len;
28be1210 2608 goto shrink;
f337b084 2609 }
a6ec74c1 2610 break;
28be1210 2611 }
fc241834 2612 case '(': {
49704364 2613 tempsym_t savsym = *symptr;
66c611c5
MHM
2614 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2615 symptr->flags |= group_modifiers;
49704364
WL
2616 symptr->patend = savsym.grpend;
2617 symptr->level++;
f337b084 2618 symptr->previous = &lookahead;
18529408 2619 while (len--) {
f337b084
TH
2620 U32 was_utf8;
2621 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2622 else symptr->flags &= ~FLAG_PARSE_UTF8;
2623 was_utf8 = SvUTF8(cat);
49704364 2624 symptr->patptr = savsym.grpbeg;
f337b084
TH
2625 beglist = pack_rec(cat, symptr, beglist, endlist);
2626 if (SvUTF8(cat) != was_utf8)
2627 /* This had better be an upgrade while in utf8==0 mode */
2628 utf8 = 1;
2629
49704364 2630 if (savsym.howlen == e_star && beglist == endlist)
18529408
IZ
2631 break; /* No way to continue */
2632 }
ee790063 2633 items = endlist - beglist;
f337b084
TH
2634 lookahead.flags = symptr->flags & ~group_modifiers;
2635 goto no_change;
18529408 2636 }
62f95557
IZ
2637 case 'X' | TYPE_IS_SHRIEKING:
2638 if (!len) /* Avoid division by 0 */
2639 len = 1;
f337b084
TH
2640 if (utf8) {
2641 char *hop, *last;
2642 I32 l = len;
2643 hop = last = start;
2644 while (hop < cur) {
2645 hop += UTF8SKIP(hop);
2646 if (--l == 0) {
2647 last = hop;
2648 l = len;
2649 }
2650 }
2651 if (last > cur)
2652 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2653 cur = last;
2654 break;
2655 }
2656 len = (cur-start) % len;
62f95557 2657 /* FALL THROUGH */
a6ec74c1 2658 case 'X':
f337b084
TH
2659 if (utf8) {
2660 if (len < 1) goto no_change;
28be1210 2661 utf8_shrink:
f337b084
TH
2662 while (len > 0) {
2663 if (cur <= start)
28be1210
TH
2664 Perl_croak(aTHX_ "'%c' outside of string in pack",
2665 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2666 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2667 if (cur <= start)
28be1210
TH
2668 Perl_croak(aTHX_ "'%c' outside of string in pack",
2669 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2670 }
2671 len--;
2672 }
2673 } else {
fc241834 2674 shrink:
f337b084 2675 if (cur - start < len)
28be1210
TH
2676 Perl_croak(aTHX_ "'%c' outside of string in pack",
2677 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2678 cur -= len;
2679 }
2680 if (cur < start+symptr->strbeg) {
2681 /* Make sure group starts don't point into the void */
2682 tempsym_t *group;
9e27e96a 2683 const STRLEN length = cur-start;
f337b084
TH
2684 for (group = symptr;
2685 group && length < group->strbeg;
2686 group = group->previous) group->strbeg = length;
2687 lookahead.strbeg = length;
2688 }
a6ec74c1 2689 break;
fc241834
RGS
2690 case 'x' | TYPE_IS_SHRIEKING: {
2691 I32 ai32;
62f95557
IZ
2692 if (!len) /* Avoid division by 0 */
2693 len = 1;
230e1fce 2694 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
fc241834
RGS
2695 else ai32 = (cur - start) % len;
2696 if (ai32 == 0) goto no_change;
2697 len -= ai32;
2698 }
2699 /* FALL THROUGH */
a6ec74c1 2700 case 'x':
f337b084 2701 goto grow;
a6ec74c1
JH
2702 case 'A':
2703 case 'Z':
f337b084 2704 case 'a': {
f7fe979e 2705 const char *aptr;
f337b084 2706
a6ec74c1 2707 fromstr = NEXTFROM;
e62f0680 2708 aptr = SvPV_const(fromstr, fromlen);
f337b084 2709 if (DO_UTF8(fromstr)) {
f7fe979e 2710 const char *end, *s;
f337b084
TH
2711
2712 if (!utf8 && !SvUTF8(cat)) {
2713 marked_upgrade(aTHX_ cat, symptr);
2714 lookahead.flags |= FLAG_DO_UTF8;
2715 lookahead.strbeg = symptr->strbeg;
2716 utf8 = 1;
2717 start = SvPVX(cat);
2718 cur = start + SvCUR(cat);
2719 }
fc241834 2720 if (howlen == e_star) {
f337b084
TH
2721 if (utf8) goto string_copy;
2722 len = fromlen+1;
2723 }
2724 s = aptr;
2725 end = aptr + fromlen;
2726 fromlen = datumtype == 'Z' ? len-1 : len;
2727 while ((I32) fromlen > 0 && s < end) {
2728 s += UTF8SKIP(s);
2729 fromlen--;
2730 }
2731 if (s > end)
2732 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2733 if (utf8) {
fc241834 2734 len = fromlen;
f337b084
TH
2735 if (datumtype == 'Z') len++;
2736 fromlen = s-aptr;
2737 len += fromlen;
fc241834 2738
f337b084 2739 goto string_copy;
fc241834 2740 }
f337b084
TH
2741 fromlen = len - fromlen;
2742 if (datumtype == 'Z') fromlen--;
2743 if (howlen == e_star) {
2744 len = fromlen;
2745 if (datumtype == 'Z') len++;
fc241834 2746 }
f337b084 2747 GROWING(0, cat, start, cur, len);
fc241834 2748 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
f337b084
TH
2749 datumtype | TYPE_IS_PACK))
2750 Perl_croak(aTHX_ "Perl bug: predicted utf8 length not available");
2751 cur += fromlen;
a6ec74c1 2752 len -= fromlen;
f337b084
TH
2753 } else if (utf8) {
2754 if (howlen == e_star) {
2755 len = fromlen;
2756 if (datumtype == 'Z') len++;
a6ec74c1 2757 }
f337b084
TH
2758 if (len <= (I32) fromlen) {
2759 fromlen = len;
2760 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2761 }
fc241834 2762 /* assumes a byte expands to at most UTF8_EXPAND bytes on
3473cf63
RGS
2763 upgrade, so:
2764 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2765 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
f337b084
TH
2766 len -= fromlen;
2767 while (fromlen > 0) {
230e1fce 2768 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
f337b084
TH
2769 aptr++;
2770 fromlen--;
fc241834 2771 }
f337b084
TH
2772 } else {
2773 string_copy:
2774 if (howlen == e_star) {
2775 len = fromlen;
2776 if (datumtype == 'Z') len++;
2777 }
2778 if (len <= (I32) fromlen) {
2779 fromlen = len;
2780 if (datumtype == 'Z' && fromlen > 0) fromlen--;
a6ec74c1 2781 }
f337b084
TH
2782 GROWING(0, cat, start, cur, len);
2783 Copy(aptr, cur, fromlen, char);
2784 cur += fromlen;
2785 len -= fromlen;
a6ec74c1 2786 }
f337b084
TH
2787 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2788 cur += len;
a6ec74c1 2789 break;
f337b084 2790 }
a6ec74c1 2791 case 'B':
f337b084 2792 case 'b': {
b83604b4 2793 const char *str, *end;
f337b084
TH
2794 I32 l, field_len;
2795 U8 bits;
2796 bool utf8_source;
2797 U32 utf8_flags;
a6ec74c1 2798
fc241834 2799 fromstr = NEXTFROM;
b83604b4 2800 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2801 end = str + fromlen;
2802 if (DO_UTF8(fromstr)) {
2803 utf8_source = TRUE;
041457d9 2804 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2805 } else {
2806 utf8_source = FALSE;
2807 utf8_flags = 0; /* Unused, but keep compilers happy */
2808 }
2809 if (howlen == e_star) len = fromlen;
2810 field_len = (len+7)/8;
2811 GROWING(utf8, cat, start, cur, field_len);
2812 if (len > (I32)fromlen) len = fromlen;
2813 bits = 0;
2814 l = 0;
2815 if (datumtype == 'B')
2816 while (l++ < len) {
2817 if (utf8_source) {
95b63a38 2818 UV val = 0;
f337b084
TH
2819 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2820 bits |= val & 1;
2821 } else bits |= *str++ & 1;
2822 if (l & 7) bits <<= 1;
fc241834 2823 else {
f337b084
TH
2824 PUSH_BYTE(utf8, cur, bits);
2825 bits = 0;
a6ec74c1
JH
2826 }
2827 }
f337b084
TH
2828 else
2829 /* datumtype == 'b' */
2830 while (l++ < len) {
2831 if (utf8_source) {
95b63a38 2832 UV val = 0;
f337b084
TH
2833 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2834 if (val & 1) bits |= 0x80;
2835 } else if (*str++ & 1)
2836 bits |= 0x80;
2837 if (l & 7) bits >>= 1;
fc241834 2838 else {
f337b084
TH
2839 PUSH_BYTE(utf8, cur, bits);
2840 bits = 0;
a6ec74c1
JH
2841 }
2842 }
f337b084
TH
2843 l--;
2844 if (l & 7) {
fc241834 2845 if (datumtype == 'B')
f337b084 2846 bits <<= 7 - (l & 7);
fc241834 2847 else
f337b084
TH
2848 bits >>= 7 - (l & 7);
2849 PUSH_BYTE(utf8, cur, bits);
2850 l += 7;
a6ec74c1 2851 }
f337b084
TH
2852 /* Determine how many chars are left in the requested field */
2853 l /= 8;
2854 if (howlen == e_star) field_len = 0;
2855 else field_len -= l;
2856 Zero(cur, field_len, char);
2857 cur += field_len;
a6ec74c1 2858 break;
f337b084 2859 }
a6ec74c1 2860 case 'H':
f337b084 2861 case 'h': {
10516c54 2862 const char *str, *end;
f337b084
TH
2863 I32 l, field_len;
2864 U8 bits;
2865 bool utf8_source;
2866 U32 utf8_flags;
a6ec74c1 2867
fc241834 2868 fromstr = NEXTFROM;
10516c54 2869 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2870 end = str + fromlen;
2871 if (DO_UTF8(fromstr)) {
2872 utf8_source = TRUE;
041457d9 2873 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2874 } else {
2875 utf8_source = FALSE;
2876 utf8_flags = 0; /* Unused, but keep compilers happy */
2877 }
2878 if (howlen == e_star) len = fromlen;
2879 field_len = (len+1)/2;
2880 GROWING(utf8, cat, start, cur, field_len);
2881 if (!utf8 && len > (I32)fromlen) len = fromlen;
2882 bits = 0;
2883 l = 0;
2884 if (datumtype == 'H')
2885 while (l++ < len) {
2886 if (utf8_source) {
95b63a38 2887 UV val = 0;
f337b084
TH
2888 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2889 if (val < 256 && isALPHA(val))
2890 bits |= (val + 9) & 0xf;
a6ec74c1 2891 else
f337b084
TH
2892 bits |= val & 0xf;
2893 } else if (isALPHA(*str))
2894 bits |= (*str++ + 9) & 0xf;
2895 else
2896 bits |= *str++ & 0xf;
2897 if (l & 1) bits <<= 4;
fc241834 2898 else {
f337b084
TH
2899 PUSH_BYTE(utf8, cur, bits);
2900 bits = 0;
a6ec74c1
JH
2901 }
2902 }
f337b084
TH
2903 else
2904 while (l++ < len) {
2905 if (utf8_source) {
95b63a38 2906 UV val = 0;
f337b084
TH
2907 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2908 if (val < 256 && isALPHA(val))
2909 bits |= ((val + 9) & 0xf) << 4;
a6ec74c1 2910 else
f337b084
TH
2911 bits |= (val & 0xf) << 4;
2912 } else if (isALPHA(*str))
2913 bits |= ((*str++ + 9) & 0xf) << 4;
2914 else
2915 bits |= (*str++ & 0xf) << 4;
2916 if (l & 1) bits >>= 4;
fc241834 2917 else {
f337b084
TH
2918 PUSH_BYTE(utf8, cur, bits);
2919 bits = 0;
a6ec74c1 2920 }
fc241834 2921 }
f337b084
TH
2922 l--;
2923 if (l & 1) {
2924 PUSH_BYTE(utf8, cur, bits);
2925 l++;
2926 }
2927 /* Determine how many chars are left in the requested field */
2928 l /= 2;
2929 if (howlen == e_star) field_len = 0;
2930 else field_len -= l;
2931 Zero(cur, field_len, char);
2932 cur += field_len;
2933 break;
fc241834
RGS
2934 }
2935 case 'c':
f337b084
TH
2936 while (len-- > 0) {
2937 IV aiv;
2938 fromstr = NEXTFROM;
2939 aiv = SvIV(fromstr);
2940 if ((-128 > aiv || aiv > 127) &&
2941 ckWARN(WARN_PACK))
2942 Perl_warner(aTHX_ packWARN(WARN_PACK),
2943 "Character in 'c' format wrapped in pack");
585ec06d 2944 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
a6ec74c1
JH
2945 }
2946 break;
2947 case 'C':
f337b084
TH
2948 if (len == 0) {
2949 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2950 break;
2951 }
2952 GROWING(0, cat, start, cur, len);
a6ec74c1 2953 while (len-- > 0) {
f337b084 2954 IV aiv;
a6ec74c1 2955 fromstr = NEXTFROM;
f337b084
TH
2956 aiv = SvIV(fromstr);
2957 if ((0 > aiv || aiv > 0xff) &&
fc241834
RGS
2958 ckWARN(WARN_PACK))
2959 Perl_warner(aTHX_ packWARN(WARN_PACK),
2960 "Character in 'C' format wrapped in pack");
585ec06d 2961 *cur++ = (char)(aiv & 0xff);
f337b084 2962 }
fc241834
RGS
2963 break;
2964 case 'W': {
2965 char *end;
670f1322 2966 U8 in_bytes = (U8)IN_BYTES;
fc241834
RGS
2967
2968 end = start+SvLEN(cat)-1;
2969 if (utf8) end -= UTF8_MAXLEN-1;
2970 while (len-- > 0) {
2971 UV auv;
2972 fromstr = NEXTFROM;
2973 auv = SvUV(fromstr);
2974 if (in_bytes) auv = auv % 0x100;
2975 if (utf8) {
2976 W_utf8:
2977 if (cur > end) {
2978 *cur = '\0';
b162af07 2979 SvCUR_set(cat, cur - start);
fc241834
RGS
2980
2981 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2982 end = start+SvLEN(cat)-UTF8_MAXLEN;
2983 }
230e1fce
NC
2984 cur = (char *) uvuni_to_utf8_flags((U8 *) cur,
2985 NATIVE_TO_UNI(auv),
041457d9 2986 warn_utf8 ?
230e1fce 2987 0 : UNICODE_ALLOW_ANY);
fc241834
RGS
2988 } else {
2989 if (auv >= 0x100) {
2990 if (!SvUTF8(cat)) {
2991 *cur = '\0';
b162af07 2992 SvCUR_set(cat, cur - start);
fc241834
RGS
2993 marked_upgrade(aTHX_ cat, symptr);
2994 lookahead.flags |= FLAG_DO_UTF8;
2995 lookahead.strbeg = symptr->strbeg;
2996 utf8 = 1;
2997 start = SvPVX(cat);
2998 cur = start + SvCUR(cat);
2999 end = start+SvLEN(cat)-UTF8_MAXLEN;
3000 goto W_utf8;
3001 }
3002 if (ckWARN(WARN_PACK))
3003 Perl_warner(aTHX_ packWARN(WARN_PACK),
3004 "Character in 'W' format wrapped in pack");
3005 auv &= 0xff;
3006 }
3007 if (cur >= end) {
3008 *cur = '\0';
b162af07 3009 SvCUR_set(cat, cur - start);
fc241834
RGS
3010 GROWING(0, cat, start, cur, len+1);
3011 end = start+SvLEN(cat)-1;
3012 }
fe2774ed 3013 *(U8 *) cur++ = (U8)auv;
a6ec74c1
JH
3014 }
3015 }
3016 break;
fc241834
RGS
3017 }
3018 case 'U': {
3019 char *end;
3020
3021 if (len == 0) {
3022 if (!(symptr->flags & FLAG_DO_UTF8)) {
3023 marked_upgrade(aTHX_ cat, symptr);
3024 lookahead.flags |= FLAG_DO_UTF8;
3025 lookahead.strbeg = symptr->strbeg;
3026 }
3027 utf8 = 0;
3028 goto no_change;
3029 }
3030
3031 end = start+SvLEN(cat);
3032 if (!utf8) end -= UTF8_MAXLEN;
a6ec74c1 3033 while (len-- > 0) {
fc241834 3034 UV auv;
a6ec74c1 3035 fromstr = NEXTFROM;
fc241834
RGS
3036 auv = SvUV(fromstr);
3037 if (utf8) {
230e1fce 3038 U8 buffer[UTF8_MAXLEN], *endb;
fc241834 3039 endb = uvuni_to_utf8_flags(buffer, auv,
041457d9 3040 warn_utf8 ?
fc241834
RGS
3041 0 : UNICODE_ALLOW_ANY);
3042 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
3043 *cur = '\0';
b162af07 3044 SvCUR_set(cat, cur - start);
fc241834
RGS
3045 GROWING(0, cat, start, cur,
3046 len+(endb-buffer)*UTF8_EXPAND);
3047 end = start+SvLEN(cat);
3048 }
64844641 3049 cur = bytes_to_uni(buffer, endb-buffer, cur);
fc241834
RGS
3050 } else {
3051 if (cur >= end) {
3052 *cur = '\0';
b162af07 3053 SvCUR_set(cat, cur - start);
fc241834
RGS
3054 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3055 end = start+SvLEN(cat)-UTF8_MAXLEN;
3056 }
230e1fce 3057 cur = (char *) uvuni_to_utf8_flags((U8 *) cur, auv,
041457d9 3058 warn_utf8 ?
230e1fce 3059 0 : UNICODE_ALLOW_ANY);
fc241834 3060 }
a6ec74c1 3061 }
a6ec74c1 3062 break;
fc241834 3063 }
a6ec74c1
JH
3064 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
3065 case 'f':
a6ec74c1 3066 while (len-- > 0) {
f337b084
TH
3067 float afloat;
3068 NV anv;
a6ec74c1 3069 fromstr = NEXTFROM;
f337b084 3070 anv = SvNV(fromstr);
5cdb9e01 3071#ifdef __VOS__
f337b084 3072 /* VOS does not automatically map a floating-point overflow
fc241834
RGS
3073 during conversion from double to float into infinity, so we
3074 do it by hand. This code should either be generalized for
3075 any OS that needs it, or removed if and when VOS implements
3076 posix-976 (suggestion to support mapping to infinity).
3077 Paul.Green@stratus.com 02-04-02. */
f337b084 3078 if (anv > FLT_MAX)
fc241834 3079 afloat = _float_constants[0]; /* single prec. inf. */
f337b084 3080 else if (anv < -FLT_MAX)
fc241834 3081 afloat = _float_constants[0]; /* single prec. inf. */
f337b084
TH
3082 else afloat = (float) anv;
3083#else /* __VOS__ */
baf3cf9c 3084# if defined(VMS) && !defined(__IEEE_FP)
f337b084 3085 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
3086 * on Alpha; fake it if we don't have them.
3087 */
f337b084 3088 if (anv > FLT_MAX)
fc241834 3089 afloat = FLT_MAX;
f337b084 3090 else if (anv < -FLT_MAX)
fc241834 3091 afloat = -FLT_MAX;
f337b084 3092 else afloat = (float)anv;
baf3cf9c 3093# else
f337b084 3094 afloat = (float)anv;
baf3cf9c 3095# endif
f337b084 3096#endif /* __VOS__ */
1109a392 3097 DO_BO_PACK_N(afloat, float);
f337b084 3098 PUSH_VAR(utf8, cur, afloat);
a6ec74c1
JH
3099 }
3100 break;
3101 case 'd':
a6ec74c1 3102 while (len-- > 0) {
f337b084
TH
3103 double adouble;
3104 NV anv;
a6ec74c1 3105 fromstr = NEXTFROM;
f337b084 3106 anv = SvNV(fromstr);
5cdb9e01 3107#ifdef __VOS__
f337b084 3108 /* VOS does not automatically map a floating-point overflow
fc241834
RGS
3109 during conversion from long double to double into infinity,
3110 so we do it by hand. This code should either be generalized
3111 for any OS that needs it, or removed if and when VOS
3112 implements posix-976 (suggestion to support mapping to
3113 infinity). Paul.Green@stratus.com 02-04-02. */
f337b084 3114 if (anv > DBL_MAX)
fc241834 3115 adouble = _double_constants[0]; /* double prec. inf. */
f337b084 3116 else if (anv < -DBL_MAX)
fc241834 3117 adouble = _double_constants[0]; /* double prec. inf. */
f337b084
TH
3118 else adouble = (double) anv;
3119#else /* __VOS__ */
baf3cf9c 3120# if defined(VMS) && !defined(__IEEE_FP)
f337b084 3121 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
3122 * on Alpha; fake it if we don't have them.
3123 */
f337b084 3124 if (anv > DBL_MAX)
fc241834 3125 adouble = DBL_MAX;
f337b084 3126 else if (anv < -DBL_MAX)
fc241834 3127 adouble = -DBL_MAX;
f337b084 3128 else adouble = (double)anv;
baf3cf9c 3129# else
f337b084 3130 adouble = (double)anv;
baf3cf9c 3131# endif
f337b084 3132#endif /* __VOS__ */
1109a392 3133 DO_BO_PACK_N(adouble, double);
f337b084 3134 PUSH_VAR(utf8, cur, adouble);
a6ec74c1
JH
3135 }
3136 break;
fc241834
RGS
3137 case 'F': {
3138 NV anv;
1109a392 3139 Zero(&anv, 1, NV); /* can be long double with unused bits */
92d41999
JH
3140 while (len-- > 0) {
3141 fromstr = NEXTFROM;
3142 anv = SvNV(fromstr);
1109a392 3143 DO_BO_PACK_N(anv, NV);
fc241834 3144 PUSH_VAR(utf8, cur, anv);
92d41999
JH
3145 }
3146 break;
fc241834 3147 }
92d41999 3148#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
fc241834
RGS
3149 case 'D': {
3150 long double aldouble;
1109a392
MHM
3151 /* long doubles can have unused bits, which may be nonzero */
3152 Zero(&aldouble, 1, long double);
92d41999
JH
3153 while (len-- > 0) {
3154 fromstr = NEXTFROM;
3155 aldouble = (long double)SvNV(fromstr);
1109a392 3156 DO_BO_PACK_N(aldouble, long double);
fc241834 3157 PUSH_VAR(utf8, cur, aldouble);
92d41999
JH
3158 }
3159 break;
fc241834 3160 }
92d41999 3161#endif
7212898e 3162#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3163 case 'n' | TYPE_IS_SHRIEKING:
7212898e 3164#endif
a6ec74c1
JH
3165 case 'n':
3166 while (len-- > 0) {
f337b084 3167 I16 ai16;
a6ec74c1 3168 fromstr = NEXTFROM;
ef108786 3169 ai16 = (I16)SvIV(fromstr);
a6ec74c1 3170#ifdef HAS_HTONS
ef108786 3171 ai16 = PerlSock_htons(ai16);
a6ec74c1 3172#endif
f337b084 3173 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3174 }
3175 break;
7212898e 3176#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3177 case 'v' | TYPE_IS_SHRIEKING:
7212898e 3178#endif
a6ec74c1
JH
3179 case 'v':
3180 while (len-- > 0) {
f337b084 3181 I16 ai16;
a6ec74c1 3182 fromstr = NEXTFROM;
ef108786 3183 ai16 = (I16)SvIV(fromstr);
a6ec74c1 3184#ifdef HAS_HTOVS
ef108786 3185 ai16 = htovs(ai16);
a6ec74c1 3186#endif
f337b084 3187 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3188 }
3189 break;
49704364 3190 case 'S' | TYPE_IS_SHRIEKING:
a6ec74c1 3191#if SHORTSIZE != SIZE16
fc241834 3192 while (len-- > 0) {
f337b084 3193 unsigned short aushort;
fc241834
RGS
3194 fromstr = NEXTFROM;
3195 aushort = SvUV(fromstr);
3196 DO_BO_PACK(aushort, s);
f337b084 3197 PUSH_VAR(utf8, cur, aushort);
fc241834 3198 }
49704364
WL
3199 break;
3200#else
3201 /* Fall through! */
a6ec74c1 3202#endif
49704364 3203 case 'S':
fc241834 3204 while (len-- > 0) {
f337b084 3205 U16 au16;
fc241834
RGS
3206 fromstr = NEXTFROM;
3207 au16 = (U16)SvUV(fromstr);
3208 DO_BO_PACK(au16, 16);
f337b084 3209 PUSH16(utf8, cur, &au16);
a6ec74c1
JH
3210 }
3211 break;
49704364 3212 case 's' | TYPE_IS_SHRIEKING:
a6ec74c1 3213#if SHORTSIZE != SIZE16
fc241834 3214 while (len-- > 0) {
f337b084 3215 short ashort;
fc241834
RGS
3216 fromstr = NEXTFROM;
3217 ashort = SvIV(fromstr);
3218 DO_BO_PACK(ashort, s);
f337b084 3219 PUSH_VAR(utf8, cur, ashort);
a6ec74c1 3220 }
49704364
WL
3221 break;
3222#else
3223 /* Fall through! */
a6ec74c1 3224#endif
49704364
WL
3225 case 's':
3226 while (len-- > 0) {
f337b084 3227 I16 ai16;
49704364 3228 fromstr = NEXTFROM;
ef108786
MHM
3229 ai16 = (I16)SvIV(fromstr);
3230 DO_BO_PACK(ai16, 16);
f337b084 3231 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3232 }
3233 break;
3234 case 'I':
49704364 3235 case 'I' | TYPE_IS_SHRIEKING:
a6ec74c1 3236 while (len-- > 0) {
f337b084 3237 unsigned int auint;
a6ec74c1
JH
3238 fromstr = NEXTFROM;
3239 auint = SvUV(fromstr);
1109a392 3240 DO_BO_PACK(auint, i);
f337b084 3241 PUSH_VAR(utf8, cur, auint);
a6ec74c1
JH
3242 }
3243 break;
92d41999
JH
3244 case 'j':
3245 while (len-- > 0) {
f337b084 3246 IV aiv;
92d41999
JH
3247 fromstr = NEXTFROM;
3248 aiv = SvIV(fromstr);
1109a392
MHM
3249#if IVSIZE == INTSIZE
3250 DO_BO_PACK(aiv, i);
3251#elif IVSIZE == LONGSIZE
3252 DO_BO_PACK(aiv, l);
3253#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
3254 DO_BO_PACK(aiv, 64);
f337b084
TH
3255#else
3256 Perl_croak(aTHX_ "'j' not supported on this platform");
1109a392 3257#endif
f337b084 3258 PUSH_VAR(utf8, cur, aiv);
92d41999
JH
3259 }
3260 break;
3261 case 'J':
3262 while (len-- > 0) {
f337b084 3263 UV auv;
92d41999
JH
3264 fromstr = NEXTFROM;
3265 auv = SvUV(fromstr);
1109a392
MHM
3266#if UVSIZE == INTSIZE
3267 DO_BO_PACK(auv, i);
3268#elif UVSIZE == LONGSIZE
3269 DO_BO_PACK(auv, l);
3270#elif defined(HAS_QUAD) && UVSIZE == U64SIZE
3271 DO_BO_PACK(auv, 64);
f337b084
TH
3272#else
3273 Perl_croak(aTHX_ "'J' not supported on this platform");
1109a392 3274#endif
f337b084 3275 PUSH_VAR(utf8, cur, auv);
92d41999
JH
3276 }
3277 break;
a6ec74c1
JH
3278 case 'w':
3279 while (len-- > 0) {
f337b084 3280 NV anv;
a6ec74c1 3281 fromstr = NEXTFROM;
15e9f109 3282 anv = SvNV(fromstr);
a6ec74c1 3283
f337b084
TH
3284 if (anv < 0) {
3285 *cur = '\0';
b162af07 3286 SvCUR_set(cat, cur - start);
49704364 3287 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
f337b084 3288 }
a6ec74c1 3289
196b62db
NC
3290 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
3291 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
3292 any negative IVs will have already been got by the croak()
3293 above. IOK is untrue for fractions, so we test them
3294 against UV_MAX_P1. */
f337b084
TH
3295 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
3296 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
a6ec74c1 3297 char *in = buf + sizeof(buf);
196b62db 3298 UV auv = SvUV(fromstr);
a6ec74c1
JH
3299
3300 do {
eb160463 3301 *--in = (char)((auv & 0x7f) | 0x80);
a6ec74c1
JH
3302 auv >>= 7;
3303 } while (auv);
3304 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
3305 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3306 in, (buf + sizeof(buf)) - in);
3307 } else if (SvPOKp(fromstr))
3308 goto w_string;
a6ec74c1 3309 else if (SvNOKp(fromstr)) {
0258719b
NC
3310 /* 10**NV_MAX_10_EXP is the largest power of 10
3311 so 10**(NV_MAX_10_EXP+1) is definately unrepresentable
3312 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
3313 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
3314 And with that many bytes only Inf can overflow.
8f8d40ab
PG
3315 Some C compilers are strict about integral constant
3316 expressions so we conservatively divide by a slightly
3317 smaller integer instead of multiplying by the exact
3318 floating-point value.
0258719b
NC
3319 */
3320#ifdef NV_MAX_10_EXP
f337b084 3321 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
8f8d40ab 3322 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
0258719b 3323#else
f337b084 3324 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
8f8d40ab 3325 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
0258719b 3326#endif
a6ec74c1
JH
3327 char *in = buf + sizeof(buf);
3328
8b6e33c7 3329 anv = Perl_floor(anv);
a6ec74c1 3330 do {
8b6e33c7 3331 const NV next = Perl_floor(anv / 128);
a6ec74c1 3332 if (in <= buf) /* this cannot happen ;-) */
49704364 3333 Perl_croak(aTHX_ "Cannot compress integer in pack");
0258719b 3334 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
15e9f109
NC
3335 anv = next;
3336 } while (anv > 0);
a6ec74c1 3337 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
3338 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3339 in, (buf + sizeof(buf)) - in);
3340 } else {
8b6e33c7
AL
3341 const char *from;
3342 char *result, *in;
735b914b
JH
3343 SV *norm;
3344 STRLEN len;
3345 bool done;
3346
f337b084 3347 w_string:
735b914b 3348 /* Copy string and check for compliance */
349d4f2f 3349 from = SvPV_const(fromstr, len);
735b914b 3350 if ((norm = is_an_int(from, len)) == NULL)
49704364 3351 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
735b914b 3352
a02a5408 3353 Newx(result, len, char);
735b914b
JH
3354 in = result + len;
3355 done = FALSE;
f337b084 3356 while (!done) *--in = div128(norm, &done) | 0x80;
735b914b 3357 result[len - 1] &= 0x7F; /* clear continue bit */
f337b084
TH
3358 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3359 in, (result + len) - in);
735b914b
JH
3360 Safefree(result);
3361 SvREFCNT_dec(norm); /* free norm */
fc241834 3362 }
a6ec74c1
JH
3363 }
3364 break;
3365 case 'i':
49704364 3366 case 'i' | TYPE_IS_SHRIEKING:
a6ec74c1 3367 while (len-- > 0) {
f337b084 3368 int aint;
a6ec74c1
JH
3369 fromstr = NEXTFROM;
3370 aint = SvIV(fromstr);
1109a392 3371 DO_BO_PACK(aint, i);
f337b084 3372 PUSH_VAR(utf8, cur, aint);
a6ec74c1
JH
3373 }
3374 break;
7212898e 3375#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3376 case 'N' | TYPE_IS_SHRIEKING:
7212898e 3377#endif
a6ec74c1
JH
3378 case 'N':
3379 while (len-- > 0) {
f337b084 3380 U32 au32;
a6ec74c1 3381 fromstr = NEXTFROM;
ef108786 3382 au32 = SvUV(fromstr);
a6ec74c1 3383#ifdef HAS_HTONL
ef108786 3384 au32 = PerlSock_htonl(au32);
a6ec74c1 3385#endif
f337b084 3386 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3387 }
3388 break;
7212898e 3389#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3390 case 'V' | TYPE_IS_SHRIEKING:
7212898e 3391#endif
a6ec74c1
JH
3392 case 'V':
3393 while (len-- > 0) {
f337b084 3394 U32 au32;
a6ec74c1 3395 fromstr = NEXTFROM;
ef108786 3396 au32 = SvUV(fromstr);
a6ec74c1 3397#ifdef HAS_HTOVL
ef108786 3398 au32 = htovl(au32);
a6ec74c1 3399#endif
f337b084 3400 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3401 }
3402 break;
49704364 3403 case 'L' | TYPE_IS_SHRIEKING:
a6ec74c1 3404#if LONGSIZE != SIZE32
fc241834 3405 while (len-- > 0) {
f337b084 3406 unsigned long aulong;
fc241834
RGS
3407 fromstr = NEXTFROM;
3408 aulong = SvUV(fromstr);
3409 DO_BO_PACK(aulong, l);
f337b084 3410 PUSH_VAR(utf8, cur, aulong);
a6ec74c1 3411 }
49704364
WL
3412 break;
3413#else
3414 /* Fall though! */
a6ec74c1 3415#endif
49704364 3416 case 'L':
fc241834 3417 while (len-- > 0) {
f337b084 3418 U32 au32;
fc241834
RGS
3419 fromstr = NEXTFROM;
3420 au32 = SvUV(fromstr);
3421 DO_BO_PACK(au32, 32);
f337b084 3422 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3423 }
3424 break;
49704364 3425 case 'l' | TYPE_IS_SHRIEKING:
a6ec74c1 3426#if LONGSIZE != SIZE32
fc241834 3427 while (len-- > 0) {
f337b084 3428 long along;
fc241834
RGS
3429 fromstr = NEXTFROM;
3430 along = SvIV(fromstr);
3431 DO_BO_PACK(along, l);
f337b084 3432 PUSH_VAR(utf8, cur, along);
a6ec74c1 3433 }
49704364
WL
3434 break;
3435#else
3436 /* Fall though! */
a6ec74c1 3437#endif
49704364
WL
3438 case 'l':
3439 while (len-- > 0) {
f337b084 3440 I32 ai32;
49704364 3441 fromstr = NEXTFROM;
ef108786
MHM
3442 ai32 = SvIV(fromstr);
3443 DO_BO_PACK(ai32, 32);
f337b084 3444 PUSH32(utf8, cur, &ai32);
a6ec74c1
JH
3445 }
3446 break;
3447#ifdef HAS_QUAD
3448 case 'Q':
3449 while (len-- > 0) {
f337b084 3450 Uquad_t auquad;
a6ec74c1 3451 fromstr = NEXTFROM;
f337b084 3452 auquad = (Uquad_t) SvUV(fromstr);
1109a392 3453 DO_BO_PACK(auquad, 64);
f337b084 3454 PUSH_VAR(utf8, cur, auquad);
a6ec74c1
JH
3455 }
3456 break;
3457 case 'q':
3458 while (len-- > 0) {
f337b084 3459 Quad_t aquad;
a6ec74c1
JH
3460 fromstr = NEXTFROM;
3461 aquad = (Quad_t)SvIV(fromstr);
1109a392 3462 DO_BO_PACK(aquad, 64);
f337b084 3463 PUSH_VAR(utf8, cur, aquad);
a6ec74c1
JH
3464 }
3465 break;
f337b084 3466#endif /* HAS_QUAD */
a6ec74c1
JH
3467 case 'P':
3468 len = 1; /* assume SV is correct length */
f337b084 3469 GROWING(utf8, cat, start, cur, sizeof(char *));
49704364 3470 /* Fall through! */
a6ec74c1
JH
3471 case 'p':
3472 while (len-- > 0) {
83003860 3473 const char *aptr;
f337b084 3474
a6ec74c1 3475 fromstr = NEXTFROM;
28a4f200
TH
3476 SvGETMAGIC(fromstr);
3477 if (!SvOK(fromstr)) aptr = NULL;
a6ec74c1 3478 else {
a6ec74c1
JH
3479 /* XXX better yet, could spirit away the string to
3480 * a safe spot and hang on to it until the result
3481 * of pack() (and all copies of the result) are
3482 * gone.
3483 */
041457d9
DM
3484 if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
3485 !SvREADONLY(fromstr))) && ckWARN(WARN_PACK)) {
9014280d 3486 Perl_warner(aTHX_ packWARN(WARN_PACK),
fc241834 3487 "Attempt to pack pointer to temporary value");
a6ec74c1
JH
3488 }
3489 if (SvPOK(fromstr) || SvNIOK(fromstr))
2596d9fe 3490 aptr = SvPV_nomg_const_nolen(fromstr);
a6ec74c1 3491 else
2596d9fe 3492 aptr = SvPV_force_flags_nolen(fromstr, 0);
a6ec74c1 3493 }
07409e01 3494 DO_BO_PACK_PC(aptr);
f337b084 3495 PUSH_VAR(utf8, cur, aptr);
a6ec74c1
JH
3496 }
3497 break;
fc241834 3498 case 'u': {
f7fe979e 3499 const char *aptr, *aend;
fc241834 3500 bool from_utf8;
f337b084 3501
a6ec74c1 3502 fromstr = NEXTFROM;
fc241834
RGS
3503 if (len <= 2) len = 45;
3504 else len = len / 3 * 3;
3505 if (len >= 64) {
0fdc08b6
RGS
3506 if (ckWARN(WARN_PACK))
3507 Perl_warner(aTHX_ packWARN(WARN_PACK),
fc241834
RGS
3508 "Field too wide in 'u' format in pack");
3509 len = 63;
3510 }
83003860 3511 aptr = SvPV_const(fromstr, fromlen);
fc241834
RGS
3512 from_utf8 = DO_UTF8(fromstr);
3513 if (from_utf8) {
3514 aend = aptr + fromlen;
3515 fromlen = sv_len_utf8(fromstr);
3516 } else aend = NULL; /* Unused, but keep compilers happy */
3517 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);