This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Normalise 3 fatal pack/unpack error messages to "panic: %s"
[perl5.git] / pp_pack.c
CommitLineData
a6ec74c1
JH
1/* pp_pack.c
2 *
1129b882
NC
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
a6ec74c1
JH
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
9 */
10
d31a8517
AT
11/*
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
16 * some salt.
4ac71550
TC
17 *
18 * [p.653 of _The Lord of the Rings_, IV/iv: "Of Herbs and Stewed Rabbit"]
d31a8517
AT
19 */
20
166f8a29
DM
21/* This file contains pp ("push/pop") functions that
22 * execute the opcodes that make up a perl program. A typical pp function
23 * expects to find its arguments on the stack, and usually pushes its
24 * results onto the stack, hence the 'pp' terminology. Each OP structure
25 * contains a pointer to the relevant pp_foo() function.
26 *
27 * This particular file just contains pp_pack() and pp_unpack(). See the
28 * other pp*.c files for the rest of the pp_ functions.
29 */
30
a6ec74c1
JH
31#include "EXTERN.h"
32#define PERL_IN_PP_PACK_C
33#include "perl.h"
34
f7fe979e
AL
35/* Types used by pack/unpack */
36typedef enum {
37 e_no_len, /* no length */
38 e_number, /* number, [] */
39 e_star /* asterisk */
40} howlen_t;
41
42typedef struct tempsym {
43 const char* patptr; /* current template char */
44 const char* patend; /* one after last char */
45 const char* grpbeg; /* 1st char of ()-group */
46 const char* grpend; /* end of ()-group */
47 I32 code; /* template code (!<>) */
48 I32 length; /* length/repeat count */
49 howlen_t howlen; /* how length is given */
50 int level; /* () nesting level */
51 U32 flags; /* /=4, comma=2, pack=1 */
52 /* and group modifiers */
53 STRLEN strbeg; /* offset of group start */
54 struct tempsym *previous; /* previous group */
55} tempsym_t;
56
57#define TEMPSYM_INIT(symptr, p, e, f) \
58 STMT_START { \
59 (symptr)->patptr = (p); \
60 (symptr)->patend = (e); \
61 (symptr)->grpbeg = NULL; \
62 (symptr)->grpend = NULL; \
63 (symptr)->grpend = NULL; \
64 (symptr)->code = 0; \
65 (symptr)->length = 0; \
10edeb5d 66 (symptr)->howlen = e_no_len; \
f7fe979e
AL
67 (symptr)->level = 0; \
68 (symptr)->flags = (f); \
69 (symptr)->strbeg = 0; \
70 (symptr)->previous = NULL; \
71 } STMT_END
72
7212898e 73#if PERL_VERSION >= 9
f337b084
TH
74# define PERL_PACK_CAN_BYTEORDER
75# define PERL_PACK_CAN_SHRIEKSIGN
76#endif
77
78#ifndef CHAR_BIT
79# define CHAR_BIT 8
7212898e 80#endif
3473cf63
RGS
81/* Maximum number of bytes to which a byte can grow due to upgrade */
82#define UTF8_EXPAND 2
7212898e 83
a6ec74c1 84/*
a6ec74c1
JH
85 * Offset for integer pack/unpack.
86 *
87 * On architectures where I16 and I32 aren't really 16 and 32 bits,
88 * which for now are all Crays, pack and unpack have to play games.
89 */
90
91/*
92 * These values are required for portability of pack() output.
93 * If they're not right on your machine, then pack() and unpack()
94 * wouldn't work right anyway; you'll need to apply the Cray hack.
95 * (I'd like to check them with #if, but you can't use sizeof() in
96 * the preprocessor.) --???
97 */
98/*
99 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
100 defines are now in config.h. --Andy Dougherty April 1998
101 */
102#define SIZE16 2
103#define SIZE32 4
104
105/* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
106 --jhi Feb 1999 */
107
1109a392
MHM
108#if U16SIZE > SIZE16 || U32SIZE > SIZE32
109# if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
08ca2aa3
TH
110# define OFF16(p) ((char*)(p))
111# define OFF32(p) ((char*)(p))
a6ec74c1 112# else
1109a392 113# if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
a6ec74c1
JH
114# define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
115# define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
116# else
08ca2aa3 117 ++++ bad cray byte order
a6ec74c1
JH
118# endif
119# endif
a6ec74c1 120#else
08ca2aa3
TH
121# define OFF16(p) ((char *) (p))
122# define OFF32(p) ((char *) (p))
a6ec74c1
JH
123#endif
124
f337b084
TH
125/* Only to be used inside a loop (see the break) */
126#define SHIFT16(utf8, s, strend, p, datumtype) STMT_START { \
127 if (utf8) { \
128 if (!uni_to_bytes(aTHX_ &(s), strend, OFF16(p), SIZE16, datumtype)) break; \
129 } else { \
130 Copy(s, OFF16(p), SIZE16, char); \
131 (s) += SIZE16; \
132 } \
133} STMT_END
134
135/* Only to be used inside a loop (see the break) */
136#define SHIFT32(utf8, s, strend, p, datumtype) STMT_START { \
137 if (utf8) { \
138 if (!uni_to_bytes(aTHX_ &(s), strend, OFF32(p), SIZE32, datumtype)) break; \
139 } else { \
140 Copy(s, OFF32(p), SIZE32, char); \
141 (s) += SIZE32; \
142 } \
143} STMT_END
144
145#define PUSH16(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF16(p), SIZE16)
146#define PUSH32(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF32(p), SIZE32)
08ca2aa3
TH
147
148/* Only to be used inside a loop (see the break) */
f337b084 149#define SHIFT_VAR(utf8, s, strend, var, datumtype) \
08ca2aa3
TH
150STMT_START { \
151 if (utf8) { \
f337b084
TH
152 if (!uni_to_bytes(aTHX_ &s, strend, \
153 (char *) &var, sizeof(var), datumtype)) break;\
08ca2aa3
TH
154 } else { \
155 Copy(s, (char *) &var, sizeof(var), char); \
156 s += sizeof(var); \
157 } \
08ca2aa3
TH
158} STMT_END
159
f337b084 160#define PUSH_VAR(utf8, aptr, var) \
230e1fce 161 PUSH_BYTES(utf8, aptr, &(var), sizeof(var))
f337b084 162
49704364
LW
163/* Avoid stack overflow due to pathological templates. 100 should be plenty. */
164#define MAX_SUB_TEMPLATE_LEVEL 100
165
66c611c5 166/* flags (note that type modifiers can also be used as flags!) */
f337b084
TH
167#define FLAG_WAS_UTF8 0x40
168#define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
49704364 169#define FLAG_UNPACK_ONLY_ONE 0x10
f337b084 170#define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
49704364
LW
171#define FLAG_SLASH 0x04
172#define FLAG_COMMA 0x02
173#define FLAG_PACK 0x01
174
a6ec74c1
JH
175STATIC SV *
176S_mul128(pTHX_ SV *sv, U8 m)
177{
178 STRLEN len;
179 char *s = SvPV(sv, len);
180 char *t;
a6ec74c1 181
7918f24d
NC
182 PERL_ARGS_ASSERT_MUL128;
183
a6ec74c1 184 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
396482e1 185 SV * const tmpNew = newSVpvs("0000000000");
a6ec74c1
JH
186
187 sv_catsv(tmpNew, sv);
188 SvREFCNT_dec(sv); /* free old sv */
189 sv = tmpNew;
190 s = SvPV(sv, len);
191 }
192 t = s + len - 1;
193 while (!*t) /* trailing '\0'? */
194 t--;
195 while (t > s) {
f7fe979e 196 const U32 i = ((*t - '0') << 7) + m;
eb160463
GS
197 *(t--) = '0' + (char)(i % 10);
198 m = (char)(i / 10);
a6ec74c1
JH
199 }
200 return (sv);
201}
202
203/* Explosives and implosives. */
204
205#if 'I' == 73 && 'J' == 74
206/* On an ASCII/ISO kind of system */
207#define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
208#else
209/*
210 Some other sort of character set - use memchr() so we don't match
211 the null byte.
212 */
213#define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
214#endif
215
66c611c5 216/* type modifiers */
62f95557 217#define TYPE_IS_SHRIEKING 0x100
1109a392
MHM
218#define TYPE_IS_BIG_ENDIAN 0x200
219#define TYPE_IS_LITTLE_ENDIAN 0x400
f337b084 220#define TYPE_IS_PACK 0x800
1109a392 221#define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
66c611c5 222#define TYPE_MODIFIERS(t) ((t) & ~0xFF)
1109a392
MHM
223#define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
224
7212898e 225#ifdef PERL_PACK_CAN_SHRIEKSIGN
28be1210 226# define SHRIEKING_ALLOWED_TYPES "sSiIlLxXnNvV@."
7212898e 227#else
28be1210 228# define SHRIEKING_ALLOWED_TYPES "sSiIlLxX"
7212898e
NC
229#endif
230
231#ifndef PERL_PACK_CAN_BYTEORDER
232/* Put "can't" first because it is shorter */
233# define TYPE_ENDIANNESS(t) 0
234# define TYPE_NO_ENDIANNESS(t) (t)
235
236# define ENDIANNESS_ALLOWED_TYPES ""
237
238# define DO_BO_UNPACK(var, type)
239# define DO_BO_PACK(var, type)
07409e01
NC
240# define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast)
241# define DO_BO_PACK_PTR(var, type, pre_cast, post_cast)
7212898e
NC
242# define DO_BO_UNPACK_N(var, type)
243# define DO_BO_PACK_N(var, type)
244# define DO_BO_UNPACK_P(var)
245# define DO_BO_PACK_P(var)
45f723e1
NC
246# define DO_BO_UNPACK_PC(var)
247# define DO_BO_PACK_PC(var)
66c611c5 248
f337b084 249#else /* PERL_PACK_CAN_BYTEORDER */
7212898e
NC
250
251# define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
252# define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
253
254# define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
255
256# define DO_BO_UNPACK(var, type) \
1109a392 257 STMT_START { \
66c611c5 258 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
259 case TYPE_IS_BIG_ENDIAN: var = my_betoh ## type (var); break; \
260 case TYPE_IS_LITTLE_ENDIAN: var = my_letoh ## type (var); break; \
261 default: break; \
262 } \
263 } STMT_END
264
7212898e 265# define DO_BO_PACK(var, type) \
1109a392 266 STMT_START { \
66c611c5 267 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
268 case TYPE_IS_BIG_ENDIAN: var = my_htobe ## type (var); break; \
269 case TYPE_IS_LITTLE_ENDIAN: var = my_htole ## type (var); break; \
270 default: break; \
271 } \
272 } STMT_END
273
07409e01 274# define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast) \
1109a392 275 STMT_START { \
66c611c5 276 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392 277 case TYPE_IS_BIG_ENDIAN: \
07409e01 278 var = (post_cast*) my_betoh ## type ((pre_cast) var); \
1109a392
MHM
279 break; \
280 case TYPE_IS_LITTLE_ENDIAN: \
07409e01 281 var = (post_cast *) my_letoh ## type ((pre_cast) var); \
1109a392
MHM
282 break; \
283 default: \
284 break; \
285 } \
286 } STMT_END
287
07409e01 288# define DO_BO_PACK_PTR(var, type, pre_cast, post_cast) \
1109a392 289 STMT_START { \
66c611c5 290 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392 291 case TYPE_IS_BIG_ENDIAN: \
07409e01 292 var = (post_cast *) my_htobe ## type ((pre_cast) var); \
1109a392
MHM
293 break; \
294 case TYPE_IS_LITTLE_ENDIAN: \
07409e01 295 var = (post_cast *) my_htole ## type ((pre_cast) var); \
1109a392
MHM
296 break; \
297 default: \
298 break; \
299 } \
300 } STMT_END
301
7212898e 302# define BO_CANT_DOIT(action, type) \
66c611c5
MHM
303 STMT_START { \
304 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
305 case TYPE_IS_BIG_ENDIAN: \
306 Perl_croak(aTHX_ "Can't %s big-endian %ss on this " \
307 "platform", #action, #type); \
308 break; \
309 case TYPE_IS_LITTLE_ENDIAN: \
310 Perl_croak(aTHX_ "Can't %s little-endian %ss on this " \
311 "platform", #action, #type); \
312 break; \
313 default: \
314 break; \
315 } \
316 } STMT_END
317
7212898e 318# if PTRSIZE == INTSIZE
07409e01
NC
319# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, i, int, void)
320# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, i, int, void)
321# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, i, int, char)
322# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, i, int, char)
7212898e 323# elif PTRSIZE == LONGSIZE
07409e01
NC
324# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, long, void)
325# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, long, void)
326# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, long, char)
327# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, long, char)
2b00a750
JD
328# elif PTRSIZE == IVSIZE
329# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, IV, void)
330# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, IV, void)
331# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, IV, char)
332# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, IV, char)
7212898e
NC
333# else
334# define DO_BO_UNPACK_P(var) BO_CANT_DOIT(unpack, pointer)
335# define DO_BO_PACK_P(var) BO_CANT_DOIT(pack, pointer)
45f723e1
NC
336# define DO_BO_UNPACK_PC(var) BO_CANT_DOIT(unpack, pointer)
337# define DO_BO_PACK_PC(var) BO_CANT_DOIT(pack, pointer)
7212898e 338# endif
1109a392 339
7212898e 340# if defined(my_htolen) && defined(my_letohn) && \
1109a392 341 defined(my_htoben) && defined(my_betohn)
7212898e 342# define DO_BO_UNPACK_N(var, type) \
1109a392 343 STMT_START { \
66c611c5 344 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
345 case TYPE_IS_BIG_ENDIAN: my_betohn(&var, sizeof(type)); break;\
346 case TYPE_IS_LITTLE_ENDIAN: my_letohn(&var, sizeof(type)); break;\
347 default: break; \
348 } \
349 } STMT_END
350
7212898e 351# define DO_BO_PACK_N(var, type) \
1109a392 352 STMT_START { \
66c611c5 353 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
354 case TYPE_IS_BIG_ENDIAN: my_htoben(&var, sizeof(type)); break;\
355 case TYPE_IS_LITTLE_ENDIAN: my_htolen(&var, sizeof(type)); break;\
356 default: break; \
357 } \
358 } STMT_END
7212898e
NC
359# else
360# define DO_BO_UNPACK_N(var, type) BO_CANT_DOIT(unpack, type)
361# define DO_BO_PACK_N(var, type) BO_CANT_DOIT(pack, type)
362# endif
363
f337b084 364#endif /* PERL_PACK_CAN_BYTEORDER */
62f95557 365
78d46eaa 366#define PACK_SIZE_CANNOT_CSUM 0x80
f337b084 367#define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
78d46eaa
NC
368#define PACK_SIZE_MASK 0x3F
369
78d46eaa
NC
370/* These tables are regenerated by genpacksizetables.pl (and then hand pasted
371 in). You're unlikely ever to need to regenerate them. */
a7a3cfaa
TH
372
373#if TYPE_IS_SHRIEKING != 0x100
374 ++++shriek offset should be 256
375#endif
376
377typedef U8 packprops_t;
78d46eaa
NC
378#if 'J'-'I' == 1
379/* ASCII */
f43a9a31 380STATIC const packprops_t packprops[512] = {
a7a3cfaa
TH
381 /* normal */
382 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
383 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
384 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
385 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
386 0, 0, 0,
1651fc44 387 /* C */ sizeof(unsigned char),
80a13697 388#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
a7a3cfaa 389 /* D */ LONG_DOUBLESIZE,
80a13697 390#else
a7a3cfaa 391 0,
80a13697 392#endif
a7a3cfaa
TH
393 0,
394 /* F */ NVSIZE,
395 0, 0,
396 /* I */ sizeof(unsigned int),
397 /* J */ UVSIZE,
398 0,
399 /* L */ SIZE32,
400 0,
401 /* N */ SIZE32,
402 0, 0,
80a13697 403#if defined(HAS_QUAD)
a7a3cfaa 404 /* Q */ sizeof(Uquad_t),
80a13697 405#else
a7a3cfaa 406 0,
80a13697 407#endif
a7a3cfaa
TH
408 0,
409 /* S */ SIZE16,
410 0,
411 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
412 /* V */ SIZE32,
413 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
414 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
415 /* c */ sizeof(char),
416 /* d */ sizeof(double),
417 0,
418 /* f */ sizeof(float),
419 0, 0,
420 /* i */ sizeof(int),
421 /* j */ IVSIZE,
422 0,
423 /* l */ SIZE32,
424 0,
425 /* n */ SIZE16,
426 0,
427 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
80a13697 428#if defined(HAS_QUAD)
a7a3cfaa 429 /* q */ sizeof(Quad_t),
80a13697 430#else
a7a3cfaa 431 0,
80a13697 432#endif
a7a3cfaa
TH
433 0,
434 /* s */ SIZE16,
435 0, 0,
436 /* v */ SIZE16,
437 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
438 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
439 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
440 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
441 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
442 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
443 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
444 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
445 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
446 0, 0, 0, 0, 0, 0, 0, 0,
447 /* shrieking */
448 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
449 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
450 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
451 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
452 0, 0, 0, 0, 0, 0, 0, 0, 0,
453 /* I */ sizeof(unsigned int),
454 0, 0,
455 /* L */ sizeof(unsigned long),
456 0,
7212898e 457#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 458 /* N */ SIZE32,
7212898e 459#else
a7a3cfaa 460 0,
7212898e 461#endif
a7a3cfaa
TH
462 0, 0, 0, 0,
463 /* S */ sizeof(unsigned short),
464 0, 0,
7212898e 465#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 466 /* V */ SIZE32,
7212898e 467#else
a7a3cfaa 468 0,
7212898e 469#endif
a7a3cfaa
TH
470 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
471 0, 0,
472 /* i */ sizeof(int),
473 0, 0,
474 /* l */ sizeof(long),
475 0,
7212898e 476#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 477 /* n */ SIZE16,
7212898e 478#else
a7a3cfaa 479 0,
7212898e 480#endif
a7a3cfaa
TH
481 0, 0, 0, 0,
482 /* s */ sizeof(short),
483 0, 0,
7212898e 484#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 485 /* v */ SIZE16,
7212898e 486#else
a7a3cfaa 487 0,
7212898e 488#endif
a7a3cfaa
TH
489 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
490 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
491 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
492 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
493 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
494 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
495 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
496 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
497 0, 0, 0, 0, 0, 0, 0, 0, 0
78d46eaa
NC
498};
499#else
500/* EBCDIC (or bust) */
f43a9a31 501STATIC const packprops_t packprops[512] = {
a7a3cfaa
TH
502 /* normal */
503 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
504 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
505 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
506 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
507 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
508 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
509 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
510 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
511 0, 0, 0,
512 /* c */ sizeof(char),
513 /* d */ sizeof(double),
514 0,
515 /* f */ sizeof(float),
516 0, 0,
517 /* i */ sizeof(int),
518 0, 0, 0, 0, 0, 0, 0,
519 /* j */ IVSIZE,
520 0,
521 /* l */ SIZE32,
522 0,
523 /* n */ SIZE16,
524 0,
525 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
80a13697 526#if defined(HAS_QUAD)
a7a3cfaa 527 /* q */ sizeof(Quad_t),
80a13697 528#else
a7a3cfaa 529 0,
80a13697 530#endif
a7a3cfaa
TH
531 0, 0, 0, 0, 0, 0, 0, 0, 0,
532 /* s */ SIZE16,
533 0, 0,
534 /* v */ SIZE16,
535 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
536 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
537 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1651fc44 538 /* C */ sizeof(unsigned char),
80a13697 539#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
a7a3cfaa 540 /* D */ LONG_DOUBLESIZE,
80a13697 541#else
a7a3cfaa 542 0,
80a13697 543#endif
a7a3cfaa
TH
544 0,
545 /* F */ NVSIZE,
546 0, 0,
547 /* I */ sizeof(unsigned int),
548 0, 0, 0, 0, 0, 0, 0,
549 /* J */ UVSIZE,
550 0,
551 /* L */ SIZE32,
552 0,
553 /* N */ SIZE32,
554 0, 0,
80a13697 555#if defined(HAS_QUAD)
a7a3cfaa 556 /* Q */ sizeof(Uquad_t),
80a13697 557#else
a7a3cfaa 558 0,
80a13697 559#endif
a7a3cfaa
TH
560 0, 0, 0, 0, 0, 0, 0, 0, 0,
561 /* S */ SIZE16,
562 0,
563 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
564 /* V */ SIZE32,
565 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
566 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
567 0, 0, 0, 0, 0, 0, 0, 0, 0,
568 /* shrieking */
569 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
570 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
571 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
572 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
573 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
574 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
575 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
576 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
577 0, 0, 0, 0, 0, 0, 0, 0, 0,
578 /* i */ sizeof(int),
579 0, 0, 0, 0, 0, 0, 0, 0, 0,
580 /* l */ sizeof(long),
581 0,
7212898e 582#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 583 /* n */ SIZE16,
7212898e 584#else
a7a3cfaa 585 0,
7212898e 586#endif
a7a3cfaa
TH
587 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
588 /* s */ sizeof(short),
589 0, 0,
7212898e 590#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 591 /* v */ SIZE16,
7212898e 592#else
a7a3cfaa 593 0,
7212898e 594#endif
a7a3cfaa
TH
595 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
596 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
597 0, 0, 0,
598 /* I */ sizeof(unsigned int),
599 0, 0, 0, 0, 0, 0, 0, 0, 0,
600 /* L */ sizeof(unsigned long),
601 0,
7212898e 602#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 603 /* N */ SIZE32,
7212898e 604#else
a7a3cfaa 605 0,
7212898e 606#endif
a7a3cfaa
TH
607 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
608 /* S */ sizeof(unsigned short),
609 0, 0,
7212898e 610#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 611 /* V */ SIZE32,
7212898e 612#else
a7a3cfaa 613 0,
7212898e 614#endif
a7a3cfaa
TH
615 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
616 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
78d46eaa
NC
617};
618#endif
619
08ca2aa3 620STATIC U8
f7fe979e 621uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
08ca2aa3 622{
08ca2aa3 623 STRLEN retlen;
0bcc34c2 624 UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
f337b084 625 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
08ca2aa3
TH
626 /* We try to process malformed UTF-8 as much as possible (preferrably with
627 warnings), but these two mean we make no progress in the string and
628 might enter an infinite loop */
629 if (retlen == (STRLEN) -1 || retlen == 0)
f337b084
TH
630 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
631 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3 632 if (val >= 0x100) {
f337b084 633 if (ckWARN(WARN_UNPACK))
08ca2aa3
TH
634 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
635 "Character in '%c' format wrapped in unpack",
f337b084 636 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3
TH
637 val &= 0xff;
638 }
639 *s += retlen;
fe2774ed 640 return (U8)val;
08ca2aa3
TH
641}
642
f337b084
TH
643#define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
644 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
08ca2aa3
TH
645 *(U8 *)(s)++)
646
647STATIC bool
f7fe979e 648uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
08ca2aa3
TH
649{
650 UV val;
651 STRLEN retlen;
f7fe979e 652 const char *from = *s;
08ca2aa3 653 int bad = 0;
f7fe979e 654 const U32 flags = ckWARN(WARN_UTF8) ?
08ca2aa3
TH
655 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
656 for (;buf_len > 0; buf_len--) {
657 if (from >= end) return FALSE;
f337b084 658 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
08ca2aa3
TH
659 if (retlen == (STRLEN) -1 || retlen == 0) {
660 from += UTF8SKIP(from);
661 bad |= 1;
662 } else from += retlen;
663 if (val >= 0x100) {
664 bad |= 2;
665 val &= 0xff;
666 }
fe2774ed 667 *(U8 *)buf++ = (U8)val;
08ca2aa3
TH
668 }
669 /* We have enough characters for the buffer. Did we have problems ? */
670 if (bad) {
671 if (bad & 1) {
672 /* Rewalk the string fragment while warning */
f7fe979e 673 const char *ptr;
9e27e96a 674 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
08ca2aa3
TH
675 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
676 if (ptr >= end) break;
f337b084 677 utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
08ca2aa3
TH
678 }
679 if (from > end) from = end;
680 }
64332b6b
NC
681 if ((bad & 2) && ((datumtype & TYPE_IS_PACK)
682 ? ckWARN(WARN_PACK) : ckWARN(WARN_UNPACK)))
fc241834 683 Perl_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
f337b084
TH
684 WARN_PACK : WARN_UNPACK),
685 "Character(s) in '%c' format wrapped in %s",
fc241834 686 (int) TYPE_NO_MODIFIERS(datumtype),
f337b084 687 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
08ca2aa3
TH
688 }
689 *s = from;
690 return TRUE;
691}
692
693STATIC bool
f7fe979e 694next_uni_uu(pTHX_ const char **s, const char *end, I32 *out)
08ca2aa3 695{
97aff369 696 dVAR;
08ca2aa3 697 STRLEN retlen;
0bcc34c2 698 const UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
08ca2aa3
TH
699 if (val >= 0x100 || !ISUUCHAR(val) ||
700 retlen == (STRLEN) -1 || retlen == 0) {
701 *out = 0;
702 return FALSE;
703 }
704 *out = PL_uudmap[val] & 077;
f337b084 705 *s += retlen;
08ca2aa3
TH
706 return TRUE;
707}
78d46eaa 708
64844641 709STATIC char *
14333449 710S_bytes_to_uni(const U8 *start, STRLEN len, char *dest) {
0bcc34c2 711 const U8 * const end = start + len;
64844641 712
7918f24d
NC
713 PERL_ARGS_ASSERT_BYTES_TO_UNI;
714
f337b084 715 while (start < end) {
48fa4626
ST
716 const UV uv = NATIVE_TO_ASCII(*start);
717 if (UNI_IS_INVARIANT(uv))
718 *dest++ = (char)(U8)UTF_TO_NATIVE(uv);
719 else {
720 *dest++ = (char)(U8)UTF8_EIGHT_BIT_HI(uv);
721 *dest++ = (char)(U8)UTF8_EIGHT_BIT_LO(uv);
f337b084
TH
722 }
723 start++;
724 }
64844641 725 return dest;
f337b084
TH
726}
727
230e1fce
NC
728#define PUSH_BYTES(utf8, cur, buf, len) \
729STMT_START { \
64844641
AL
730 if (utf8) \
731 (cur) = bytes_to_uni((U8 *) buf, len, (cur)); \
230e1fce
NC
732 else { \
733 Copy(buf, cur, len, char); \
734 (cur) += (len); \
735 } \
f337b084
TH
736} STMT_END
737
738#define GROWING(utf8, cat, start, cur, in_len) \
739STMT_START { \
740 STRLEN glen = (in_len); \
3473cf63 741 if (utf8) glen *= UTF8_EXPAND; \
f337b084 742 if ((cur) + glen >= (start) + SvLEN(cat)) { \
0bd48802 743 (start) = sv_exp_grow(cat, glen); \
f337b084
TH
744 (cur) = (start) + SvCUR(cat); \
745 } \
746} STMT_END
747
748#define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
749STMT_START { \
f7fe979e 750 const STRLEN glen = (in_len); \
f337b084 751 STRLEN gl = glen; \
3473cf63 752 if (utf8) gl *= UTF8_EXPAND; \
f337b084
TH
753 if ((cur) + gl >= (start) + SvLEN(cat)) { \
754 *cur = '\0'; \
b162af07 755 SvCUR_set((cat), (cur) - (start)); \
0bd48802 756 (start) = sv_exp_grow(cat, gl); \
f337b084
TH
757 (cur) = (start) + SvCUR(cat); \
758 } \
759 PUSH_BYTES(utf8, cur, buf, glen); \
760} STMT_END
761
762#define PUSH_BYTE(utf8, s, byte) \
763STMT_START { \
764 if (utf8) { \
f7fe979e 765 const U8 au8 = (byte); \
64844641 766 (s) = bytes_to_uni(&au8, 1, (s)); \
f337b084
TH
767 } else *(U8 *)(s)++ = (byte); \
768} STMT_END
769
770/* Only to be used inside a loop (see the break) */
771#define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
772STMT_START { \
773 STRLEN retlen; \
774 if (str >= end) break; \
775 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
776 if (retlen == (STRLEN) -1 || retlen == 0) { \
777 *cur = '\0'; \
778 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
779 } \
780 str += retlen; \
781} STMT_END
782
f7fe979e
AL
783static const char *_action( const tempsym_t* symptr )
784{
10edeb5d 785 return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
f7fe979e
AL
786}
787
206947d2 788/* Returns the sizeof() struct described by pat */
028d1f6d 789STATIC I32
f337b084 790S_measure_struct(pTHX_ tempsym_t* symptr)
206947d2 791{
f337b084 792 I32 total = 0;
206947d2 793
7918f24d
NC
794 PERL_ARGS_ASSERT_MEASURE_STRUCT;
795
49704364 796 while (next_symbol(symptr)) {
f337b084 797 I32 len;
f7fe979e 798 int size;
f337b084
TH
799
800 switch (symptr->howlen) {
fc241834 801 case e_star:
49704364 802 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
f7fe979e 803 _action( symptr ) );
49704364 804 break;
f337b084
TH
805 default:
806 /* e_no_len and e_number */
807 len = symptr->length;
808 break;
49704364
LW
809 }
810
a7a3cfaa 811 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
80a13697 812 if (!size) {
f7fe979e 813 int star;
80a13697
NC
814 /* endianness doesn't influence the size of a type */
815 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
816 default:
817 Perl_croak(aTHX_ "Invalid type '%c' in %s",
818 (int)TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 819 _action( symptr ) );
28be1210
TH
820#ifdef PERL_PACK_CAN_SHRIEKSIGN
821 case '.' | TYPE_IS_SHRIEKING:
822 case '@' | TYPE_IS_SHRIEKING:
823#endif
80a13697 824 case '@':
28be1210 825 case '.':
80a13697
NC
826 case '/':
827 case 'U': /* XXXX Is it correct? */
828 case 'w':
829 case 'u':
830 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
28be1210 831 (int) TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 832 _action( symptr ) );
80a13697
NC
833 case '%':
834 size = 0;
835 break;
836 case '(':
fc241834
RGS
837 {
838 tempsym_t savsym = *symptr;
839 symptr->patptr = savsym.grpbeg;
840 symptr->patend = savsym.grpend;
841 /* XXXX Theoretically, we need to measure many times at
842 different positions, since the subexpression may contain
843 alignment commands, but be not of aligned length.
844 Need to detect this and croak(). */
845 size = measure_struct(symptr);
846 *symptr = savsym;
847 break;
848 }
80a13697
NC
849 case 'X' | TYPE_IS_SHRIEKING:
850 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
851 */
852 if (!len) /* Avoid division by 0 */
853 len = 1;
854 len = total % len; /* Assumed: the start is aligned. */
855 /* FALL THROUGH */
856 case 'X':
857 size = -1;
858 if (total < len)
f7fe979e 859 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
80a13697
NC
860 break;
861 case 'x' | TYPE_IS_SHRIEKING:
862 if (!len) /* Avoid division by 0 */
863 len = 1;
864 star = total % len; /* Assumed: the start is aligned. */
865 if (star) /* Other portable ways? */
866 len = len - star;
867 else
868 len = 0;
869 /* FALL THROUGH */
870 case 'x':
871 case 'A':
872 case 'Z':
873 case 'a':
80a13697
NC
874 size = 1;
875 break;
876 case 'B':
877 case 'b':
878 len = (len + 7)/8;
879 size = 1;
880 break;
881 case 'H':
882 case 'h':
883 len = (len + 1)/2;
884 size = 1;
885 break;
78d46eaa 886
80a13697
NC
887 case 'P':
888 len = 1;
889 size = sizeof(char*);
78d46eaa
NC
890 break;
891 }
206947d2
IZ
892 }
893 total += len * size;
894 }
895 return total;
896}
897
49704364
LW
898
899/* locate matching closing parenthesis or bracket
900 * returns char pointer to char after match, or NULL
901 */
f7fe979e
AL
902STATIC const char *
903S_group_end(pTHX_ register const char *patptr, register const char *patend, char ender)
18529408 904{
7918f24d
NC
905 PERL_ARGS_ASSERT_GROUP_END;
906
49704364 907 while (patptr < patend) {
f7fe979e 908 const char c = *patptr++;
49704364
LW
909
910 if (isSPACE(c))
911 continue;
912 else if (c == ender)
913 return patptr-1;
914 else if (c == '#') {
915 while (patptr < patend && *patptr != '\n')
916 patptr++;
917 continue;
918 } else if (c == '(')
919 patptr = group_end(patptr, patend, ')') + 1;
920 else if (c == '[')
921 patptr = group_end(patptr, patend, ']') + 1;
18529408 922 }
49704364
LW
923 Perl_croak(aTHX_ "No group ending character '%c' found in template",
924 ender);
925 return 0;
18529408
IZ
926}
927
49704364
LW
928
929/* Convert unsigned decimal number to binary.
930 * Expects a pointer to the first digit and address of length variable
931 * Advances char pointer to 1st non-digit char and returns number
fc241834 932 */
f7fe979e
AL
933STATIC const char *
934S_get_num(pTHX_ register const char *patptr, I32 *lenptr )
49704364
LW
935{
936 I32 len = *patptr++ - '0';
7918f24d
NC
937
938 PERL_ARGS_ASSERT_GET_NUM;
939
49704364
LW
940 while (isDIGIT(*patptr)) {
941 if (len >= 0x7FFFFFFF/10)
942 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
943 len = (len * 10) + (*patptr++ - '0');
944 }
945 *lenptr = len;
946 return patptr;
947}
948
949/* The marvellous template parsing routine: Using state stored in *symptr,
950 * locates next template code and count
951 */
952STATIC bool
f337b084 953S_next_symbol(pTHX_ tempsym_t* symptr )
18529408 954{
f7fe979e 955 const char* patptr = symptr->patptr;
0bcc34c2 956 const char* const patend = symptr->patend;
49704364 957
7918f24d
NC
958 PERL_ARGS_ASSERT_NEXT_SYMBOL;
959
49704364
LW
960 symptr->flags &= ~FLAG_SLASH;
961
962 while (patptr < patend) {
963 if (isSPACE(*patptr))
964 patptr++;
965 else if (*patptr == '#') {
966 patptr++;
967 while (patptr < patend && *patptr != '\n')
968 patptr++;
969 if (patptr < patend)
970 patptr++;
971 } else {
fc241834 972 /* We should have found a template code */
49704364 973 I32 code = *patptr++ & 0xFF;
66c611c5 974 U32 inherited_modifiers = 0;
49704364
LW
975
976 if (code == ','){ /* grandfather in commas but with a warning */
977 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
978 symptr->flags |= FLAG_COMMA;
979 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
f7fe979e 980 "Invalid type ',' in %s", _action( symptr ) );
49704364
LW
981 }
982 continue;
983 }
fc241834 984
49704364 985 /* for '(', skip to ')' */
fc241834 986 if (code == '(') {
49704364
LW
987 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
988 Perl_croak(aTHX_ "()-group starts with a count in %s",
f7fe979e 989 _action( symptr ) );
49704364
LW
990 symptr->grpbeg = patptr;
991 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
992 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
993 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
f7fe979e 994 _action( symptr ) );
49704364
LW
995 }
996
66c611c5
MHM
997 /* look for group modifiers to inherit */
998 if (TYPE_ENDIANNESS(symptr->flags)) {
999 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
1000 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
1001 }
1002
1109a392
MHM
1003 /* look for modifiers */
1004 while (patptr < patend) {
b7787f18
AL
1005 const char *allowed;
1006 I32 modifier;
1109a392
MHM
1007 switch (*patptr) {
1008 case '!':
1009 modifier = TYPE_IS_SHRIEKING;
7212898e 1010 allowed = SHRIEKING_ALLOWED_TYPES;
1109a392 1011 break;
7212898e 1012#ifdef PERL_PACK_CAN_BYTEORDER
1109a392
MHM
1013 case '>':
1014 modifier = TYPE_IS_BIG_ENDIAN;
66c611c5 1015 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392
MHM
1016 break;
1017 case '<':
1018 modifier = TYPE_IS_LITTLE_ENDIAN;
66c611c5 1019 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392 1020 break;
f337b084 1021#endif /* PERL_PACK_CAN_BYTEORDER */
1109a392 1022 default:
b7787f18
AL
1023 allowed = "";
1024 modifier = 0;
1109a392
MHM
1025 break;
1026 }
66c611c5 1027
1109a392
MHM
1028 if (modifier == 0)
1029 break;
66c611c5 1030
1109a392
MHM
1031 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
1032 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
f7fe979e 1033 allowed, _action( symptr ) );
66c611c5
MHM
1034
1035 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
1109a392 1036 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
f7fe979e 1037 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
66c611c5
MHM
1038 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
1039 TYPE_ENDIANNESS_MASK)
1040 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
f7fe979e 1041 *patptr, _action( symptr ) );
66c611c5 1042
041457d9 1043 if ((code & modifier) && ckWARN(WARN_UNPACK)) {
1109a392
MHM
1044 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
1045 "Duplicate modifier '%c' after '%c' in %s",
1046 *patptr, (int) TYPE_NO_MODIFIERS(code),
f7fe979e 1047 _action( symptr ) );
1109a392 1048 }
66c611c5 1049
1109a392
MHM
1050 code |= modifier;
1051 patptr++;
49704364
LW
1052 }
1053
66c611c5
MHM
1054 /* inherit modifiers */
1055 code |= inherited_modifiers;
1056
fc241834 1057 /* look for count and/or / */
49704364
LW
1058 if (patptr < patend) {
1059 if (isDIGIT(*patptr)) {
1060 patptr = get_num( patptr, &symptr->length );
1061 symptr->howlen = e_number;
1062
1063 } else if (*patptr == '*') {
1064 patptr++;
1065 symptr->howlen = e_star;
1066
1067 } else if (*patptr == '[') {
f7fe979e 1068 const char* lenptr = ++patptr;
49704364
LW
1069 symptr->howlen = e_number;
1070 patptr = group_end( patptr, patend, ']' ) + 1;
1071 /* what kind of [] is it? */
1072 if (isDIGIT(*lenptr)) {
1073 lenptr = get_num( lenptr, &symptr->length );
1074 if( *lenptr != ']' )
1075 Perl_croak(aTHX_ "Malformed integer in [] in %s",
f7fe979e 1076 _action( symptr ) );
49704364
LW
1077 } else {
1078 tempsym_t savsym = *symptr;
1079 symptr->patend = patptr-1;
1080 symptr->patptr = lenptr;
1081 savsym.length = measure_struct(symptr);
1082 *symptr = savsym;
1083 }
1084 } else {
1085 symptr->howlen = e_no_len;
1086 symptr->length = 1;
1087 }
1088
1089 /* try to find / */
1090 while (patptr < patend) {
1091 if (isSPACE(*patptr))
1092 patptr++;
1093 else if (*patptr == '#') {
1094 patptr++;
1095 while (patptr < patend && *patptr != '\n')
1096 patptr++;
1097 if (patptr < patend)
1098 patptr++;
1099 } else {
66c611c5 1100 if (*patptr == '/') {
49704364
LW
1101 symptr->flags |= FLAG_SLASH;
1102 patptr++;
66c611c5
MHM
1103 if (patptr < patend &&
1104 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
49704364 1105 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
f7fe979e 1106 _action( symptr ) );
49704364
LW
1107 }
1108 break;
1109 }
18529408 1110 }
49704364
LW
1111 } else {
1112 /* at end - no count, no / */
1113 symptr->howlen = e_no_len;
1114 symptr->length = 1;
1115 }
1116
1117 symptr->code = code;
fc241834 1118 symptr->patptr = patptr;
49704364 1119 return TRUE;
18529408 1120 }
49704364 1121 }
fc241834 1122 symptr->patptr = patptr;
49704364 1123 return FALSE;
18529408
IZ
1124}
1125
18529408 1126/*
fc241834 1127 There is no way to cleanly handle the case where we should process the
08ca2aa3 1128 string per byte in its upgraded form while it's really in downgraded form
fc241834
RGS
1129 (e.g. estimates like strend-s as an upper bound for the number of
1130 characters left wouldn't work). So if we foresee the need of this
1131 (pattern starts with U or contains U0), we want to work on the encoded
1132 version of the string. Users are advised to upgrade their pack string
08ca2aa3
TH
1133 themselves if they need to do a lot of unpacks like this on it
1134*/
fc241834 1135STATIC bool
08ca2aa3
TH
1136need_utf8(const char *pat, const char *patend)
1137{
1138 bool first = TRUE;
7918f24d
NC
1139
1140 PERL_ARGS_ASSERT_NEED_UTF8;
1141
08ca2aa3
TH
1142 while (pat < patend) {
1143 if (pat[0] == '#') {
1144 pat++;
f7fe979e 1145 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
1146 if (!pat) return FALSE;
1147 } else if (pat[0] == 'U') {
1148 if (first || pat[1] == '0') return TRUE;
1149 } else first = FALSE;
1150 pat++;
1151 }
1152 return FALSE;
1153}
1154
1155STATIC char
1156first_symbol(const char *pat, const char *patend) {
7918f24d
NC
1157 PERL_ARGS_ASSERT_FIRST_SYMBOL;
1158
08ca2aa3
TH
1159 while (pat < patend) {
1160 if (pat[0] != '#') return pat[0];
1161 pat++;
f7fe979e 1162 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
1163 if (!pat) return 0;
1164 pat++;
1165 }
1166 return 0;
1167}
1168
1169/*
7accc089
JH
1170=for apidoc unpackstring
1171
608d3aed
LW
1172The engine implementing unpack() Perl function. C<unpackstring> puts the
1173extracted list items on the stack and returns the number of elements.
1174Issue C<PUTBACK> before and C<SPAGAIN> after the call to this function.
7accc089
JH
1175
1176=cut */
1177
1178I32
f7fe979e 1179Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
7accc089 1180{
f7fe979e 1181 tempsym_t sym;
08ca2aa3 1182
7918f24d
NC
1183 PERL_ARGS_ASSERT_UNPACKSTRING;
1184
f337b084 1185 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
08ca2aa3
TH
1186 else if (need_utf8(pat, patend)) {
1187 /* We probably should try to avoid this in case a scalar context call
1188 wouldn't get to the "U0" */
1189 STRLEN len = strend - s;
230e1fce 1190 s = (char *) bytes_to_utf8((U8 *) s, &len);
08ca2aa3
TH
1191 SAVEFREEPV(s);
1192 strend = s + len;
f337b084 1193 flags |= FLAG_DO_UTF8;
08ca2aa3
TH
1194 }
1195
f337b084
TH
1196 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
1197 flags |= FLAG_PARSE_UTF8;
08ca2aa3 1198
f7fe979e 1199 TEMPSYM_INIT(&sym, pat, patend, flags);
7accc089
JH
1200
1201 return unpack_rec(&sym, s, s, strend, NULL );
1202}
1203
4136a0f7 1204STATIC I32
f7fe979e 1205S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
49704364 1206{
27da23d5 1207 dVAR; dSP;
18529408 1208 SV *sv;
f7fe979e 1209 const I32 start_sp_offset = SP - PL_stack_base;
49704364 1210 howlen_t howlen;
a6ec74c1 1211 I32 checksum = 0;
92d41999 1212 UV cuv = 0;
a6ec74c1 1213 NV cdouble = 0.0;
f337b084 1214 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
49704364 1215 bool beyond = FALSE;
21c16052 1216 bool explicit_length;
9e27e96a 1217 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
f337b084 1218 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
7918f24d
NC
1219
1220 PERL_ARGS_ASSERT_UNPACK_REC;
1221
28be1210 1222 symptr->strbeg = s - strbeg;
49704364 1223
49704364 1224 while (next_symbol(symptr)) {
a7a3cfaa 1225 packprops_t props;
9e27e96a 1226 I32 len;
f337b084 1227 I32 datumtype = symptr->code;
206947d2 1228 /* do first one only unless in list context
08ca2aa3 1229 / is implemented by unpacking the count, then popping it from the
206947d2 1230 stack, so must check that we're not in the middle of a / */
49704364 1231 if ( unpack_only_one
206947d2 1232 && (SP - PL_stack_base == start_sp_offset + 1)
49704364 1233 && (datumtype != '/') ) /* XXX can this be omitted */
206947d2 1234 break;
49704364 1235
f337b084 1236 switch (howlen = symptr->howlen) {
fc241834
RGS
1237 case e_star:
1238 len = strend - strbeg; /* long enough */
49704364 1239 break;
f337b084
TH
1240 default:
1241 /* e_no_len and e_number */
1242 len = symptr->length;
1243 break;
49704364 1244 }
18529408 1245
21c16052 1246 explicit_length = TRUE;
a6ec74c1 1247 redo_switch:
49704364 1248 beyond = s >= strend;
a7a3cfaa
TH
1249
1250 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
1251 if (props) {
1252 /* props nonzero means we can process this letter. */
9e27e96a
AL
1253 const long size = props & PACK_SIZE_MASK;
1254 const long howmany = (strend - s) / size;
a7a3cfaa
TH
1255 if (len > howmany)
1256 len = howmany;
1257
1258 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
1259 if (len && unpack_only_one) len = 1;
1260 EXTEND(SP, len);
1261 EXTEND_MORTAL(len);
78d46eaa
NC
1262 }
1263 }
a7a3cfaa 1264
1109a392 1265 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 1266 default:
1109a392 1267 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
49704364 1268
a6ec74c1 1269 case '%':
49704364 1270 if (howlen == e_no_len)
18529408 1271 len = 16; /* len is not specified */
a6ec74c1 1272 checksum = len;
92d41999 1273 cuv = 0;
a6ec74c1 1274 cdouble = 0;
18529408 1275 continue;
a6ec74c1 1276 break;
18529408
IZ
1277 case '(':
1278 {
49704364 1279 tempsym_t savsym = *symptr;
9e27e96a 1280 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
66c611c5 1281 symptr->flags |= group_modifiers;
49704364 1282 symptr->patend = savsym.grpend;
28be1210 1283 symptr->previous = &savsym;
49704364 1284 symptr->level++;
18529408 1285 PUTBACK;
c6f750d1 1286 if (len && unpack_only_one) len = 1;
18529408 1287 while (len--) {
49704364 1288 symptr->patptr = savsym.grpbeg;
f337b084
TH
1289 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
1290 else symptr->flags &= ~FLAG_PARSE_UTF8;
08ca2aa3
TH
1291 unpack_rec(symptr, s, strbeg, strend, &s);
1292 if (s == strend && savsym.howlen == e_star)
49704364 1293 break; /* No way to continue */
18529408
IZ
1294 }
1295 SPAGAIN;
28be1210 1296 savsym.flags = symptr->flags & ~group_modifiers;
49704364 1297 *symptr = savsym;
18529408
IZ
1298 break;
1299 }
28be1210
TH
1300#ifdef PERL_PACK_CAN_SHRIEKSIGN
1301 case '.' | TYPE_IS_SHRIEKING:
1302#endif
1303 case '.': {
9e27e96a 1304 const char *from;
28be1210
TH
1305 SV *sv;
1306#ifdef PERL_PACK_CAN_SHRIEKSIGN
9e27e96a 1307 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
28be1210 1308#else /* PERL_PACK_CAN_SHRIEKSIGN */
9e27e96a 1309 const bool u8 = utf8;
28be1210
TH
1310#endif
1311 if (howlen == e_star) from = strbeg;
1312 else if (len <= 0) from = s;
1313 else {
1314 tempsym_t *group = symptr;
1315
1316 while (--len && group) group = group->previous;
1317 from = group ? strbeg + group->strbeg : strbeg;
1318 }
1319 sv = from <= s ?
00646304
CB
1320 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
1321 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
6e449a3a 1322 mXPUSHs(sv);
28be1210
TH
1323 break;
1324 }
1325#ifdef PERL_PACK_CAN_SHRIEKSIGN
1326 case '@' | TYPE_IS_SHRIEKING:
1327#endif
a6ec74c1 1328 case '@':
28be1210
TH
1329 s = strbeg + symptr->strbeg;
1330#ifdef PERL_PACK_CAN_SHRIEKSIGN
1331 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
1332#else /* PERL_PACK_CAN_SHRIEKSIGN */
1333 if (utf8)
1334#endif
1335 {
08ca2aa3
TH
1336 while (len > 0) {
1337 if (s >= strend)
1338 Perl_croak(aTHX_ "'@' outside of string in unpack");
1339 s += UTF8SKIP(s);
1340 len--;
1341 }
1342 if (s > strend)
1343 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
1344 } else {
28be1210 1345 if (strend-s < len)
fc241834 1346 Perl_croak(aTHX_ "'@' outside of string in unpack");
28be1210 1347 s += len;
08ca2aa3 1348 }
a6ec74c1 1349 break;
62f95557
IZ
1350 case 'X' | TYPE_IS_SHRIEKING:
1351 if (!len) /* Avoid division by 0 */
1352 len = 1;
08ca2aa3 1353 if (utf8) {
f7fe979e 1354 const char *hop, *last;
f337b084
TH
1355 I32 l = len;
1356 hop = last = strbeg;
1357 while (hop < s) {
1358 hop += UTF8SKIP(hop);
1359 if (--l == 0) {
08ca2aa3 1360 last = hop;
f337b084
TH
1361 l = len;
1362 }
fc241834 1363 }
f337b084
TH
1364 if (last > s)
1365 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
08ca2aa3
TH
1366 s = last;
1367 break;
f337b084
TH
1368 }
1369 len = (s - strbeg) % len;
62f95557 1370 /* FALL THROUGH */
a6ec74c1 1371 case 'X':
08ca2aa3
TH
1372 if (utf8) {
1373 while (len > 0) {
1374 if (s <= strbeg)
1375 Perl_croak(aTHX_ "'X' outside of string in unpack");
f337b084 1376 while (--s, UTF8_IS_CONTINUATION(*s)) {
08ca2aa3
TH
1377 if (s <= strbeg)
1378 Perl_croak(aTHX_ "'X' outside of string in unpack");
1379 }
1380 len--;
1381 }
1382 } else {
fc241834
RGS
1383 if (len > s - strbeg)
1384 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1385 s -= len;
08ca2aa3 1386 }
a6ec74c1 1387 break;
9e27e96a
AL
1388 case 'x' | TYPE_IS_SHRIEKING: {
1389 I32 ai32;
62f95557
IZ
1390 if (!len) /* Avoid division by 0 */
1391 len = 1;
230e1fce
NC
1392 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1393 else ai32 = (s - strbeg) % len;
08ca2aa3
TH
1394 if (ai32 == 0) break;
1395 len -= ai32;
9e27e96a 1396 }
62f95557 1397 /* FALL THROUGH */
a6ec74c1 1398 case 'x':
08ca2aa3
TH
1399 if (utf8) {
1400 while (len>0) {
1401 if (s >= strend)
1402 Perl_croak(aTHX_ "'x' outside of string in unpack");
1403 s += UTF8SKIP(s);
1404 len--;
1405 }
1406 } else {
fc241834
RGS
1407 if (len > strend - s)
1408 Perl_croak(aTHX_ "'x' outside of string in unpack");
1409 s += len;
f337b084 1410 }
a6ec74c1
JH
1411 break;
1412 case '/':
49704364
LW
1413 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1414 break;
a6ec74c1
JH
1415 case 'A':
1416 case 'Z':
1417 case 'a':
08ca2aa3
TH
1418 if (checksum) {
1419 /* Preliminary length estimate is assumed done in 'W' */
1420 if (len > strend - s) len = strend - s;
1421 goto W_checksum;
1422 }
1423 if (utf8) {
1424 I32 l;
f7fe979e 1425 const char *hop;
08ca2aa3
TH
1426 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1427 if (hop >= strend) {
1428 if (hop > strend)
1429 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1430 break;
fc241834 1431 }
a6ec74c1 1432 }
08ca2aa3
TH
1433 if (hop > strend)
1434 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1435 len = hop - s;
1436 } else if (len > strend - s)
1437 len = strend - s;
1438
1439 if (datumtype == 'Z') {
1440 /* 'Z' strips stuff after first null */
f7fe979e 1441 const char *ptr, *end;
f337b084
TH
1442 end = s + len;
1443 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
08ca2aa3
TH
1444 sv = newSVpvn(s, ptr-s);
1445 if (howlen == e_star) /* exact for 'Z*' */
1446 len = ptr-s + (ptr != strend ? 1 : 0);
1447 } else if (datumtype == 'A') {
1448 /* 'A' strips both nulls and spaces */
f7fe979e 1449 const char *ptr;
18bdf90a
TH
1450 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1451 for (ptr = s+len-1; ptr >= s; ptr--)
1452 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
230e1fce 1453 !is_utf8_space((U8 *) ptr)) break;
18bdf90a
TH
1454 if (ptr >= s) ptr += UTF8SKIP(ptr);
1455 else ptr++;
28be1210 1456 if (ptr > s+len)
18bdf90a
TH
1457 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1458 } else {
1459 for (ptr = s+len-1; ptr >= s; ptr--)
1460 if (*ptr != 0 && !isSPACE(*ptr)) break;
1461 ptr++;
1462 }
08ca2aa3
TH
1463 sv = newSVpvn(s, ptr-s);
1464 } else sv = newSVpvn(s, len);
1465
1466 if (utf8) {
1467 SvUTF8_on(sv);
1468 /* Undo any upgrade done due to need_utf8() */
f337b084 1469 if (!(symptr->flags & FLAG_WAS_UTF8))
08ca2aa3 1470 sv_utf8_downgrade(sv, 0);
a6ec74c1 1471 }
6e449a3a 1472 mXPUSHs(sv);
08ca2aa3 1473 s += len;
a6ec74c1
JH
1474 break;
1475 case 'B':
08ca2aa3
TH
1476 case 'b': {
1477 char *str;
49704364 1478 if (howlen == e_star || len > (strend - s) * 8)
a6ec74c1
JH
1479 len = (strend - s) * 8;
1480 if (checksum) {
f337b084 1481 if (utf8)
08ca2aa3 1482 while (len >= 8 && s < strend) {
f337b084 1483 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
08ca2aa3
TH
1484 len -= 8;
1485 }
f337b084 1486 else
fc241834 1487 while (len >= 8) {
08ca2aa3 1488 cuv += PL_bitcount[*(U8 *)s++];
fc241834
RGS
1489 len -= 8;
1490 }
08ca2aa3
TH
1491 if (len && s < strend) {
1492 U8 bits;
f337b084
TH
1493 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1494 if (datumtype == 'b')
a6ec74c1 1495 while (len-- > 0) {
92d41999 1496 if (bits & 1) cuv++;
a6ec74c1
JH
1497 bits >>= 1;
1498 }
f337b084 1499 else
a6ec74c1 1500 while (len-- > 0) {
08ca2aa3 1501 if (bits & 0x80) cuv++;
a6ec74c1
JH
1502 bits <<= 1;
1503 }
fc241834 1504 }
a6ec74c1
JH
1505 break;
1506 }
08ca2aa3 1507
561b68a9 1508 sv = sv_2mortal(newSV(len ? len : 1));
a6ec74c1
JH
1509 SvPOK_on(sv);
1510 str = SvPVX(sv);
1511 if (datumtype == 'b') {
f337b084 1512 U8 bits = 0;
f7fe979e 1513 const I32 ai32 = len;
08ca2aa3
TH
1514 for (len = 0; len < ai32; len++) {
1515 if (len & 7) bits >>= 1;
1516 else if (utf8) {
1517 if (s >= strend) break;
f337b084 1518 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1519 } else bits = *(U8 *) s++;
1520 *str++ = bits & 1 ? '1' : '0';
a6ec74c1 1521 }
08ca2aa3 1522 } else {
f337b084 1523 U8 bits = 0;
f7fe979e 1524 const I32 ai32 = len;
08ca2aa3
TH
1525 for (len = 0; len < ai32; len++) {
1526 if (len & 7) bits <<= 1;
1527 else if (utf8) {
1528 if (s >= strend) break;
f337b084 1529 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1530 } else bits = *(U8 *) s++;
1531 *str++ = bits & 0x80 ? '1' : '0';
a6ec74c1
JH
1532 }
1533 }
1534 *str = '\0';
aa07b2f6 1535 SvCUR_set(sv, str - SvPVX_const(sv));
08ca2aa3 1536 XPUSHs(sv);
a6ec74c1 1537 break;
08ca2aa3 1538 }
a6ec74c1 1539 case 'H':
08ca2aa3
TH
1540 case 'h': {
1541 char *str;
fc241834 1542 /* Preliminary length estimate, acceptable for utf8 too */
49704364 1543 if (howlen == e_star || len > (strend - s) * 2)
a6ec74c1 1544 len = (strend - s) * 2;
561b68a9 1545 sv = sv_2mortal(newSV(len ? len : 1));
a6ec74c1
JH
1546 SvPOK_on(sv);
1547 str = SvPVX(sv);
1548 if (datumtype == 'h') {
f337b084 1549 U8 bits = 0;
9e27e96a 1550 I32 ai32 = len;
fc241834
RGS
1551 for (len = 0; len < ai32; len++) {
1552 if (len & 1) bits >>= 4;
1553 else if (utf8) {
1554 if (s >= strend) break;
f337b084 1555 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
fc241834 1556 } else bits = * (U8 *) s++;
a6ec74c1
JH
1557 *str++ = PL_hexdigit[bits & 15];
1558 }
08ca2aa3 1559 } else {
f337b084 1560 U8 bits = 0;
f7fe979e 1561 const I32 ai32 = len;
08ca2aa3
TH
1562 for (len = 0; len < ai32; len++) {
1563 if (len & 1) bits <<= 4;
1564 else if (utf8) {
1565 if (s >= strend) break;
f337b084 1566 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3 1567 } else bits = *(U8 *) s++;
a6ec74c1
JH
1568 *str++ = PL_hexdigit[(bits >> 4) & 15];
1569 }
1570 }
1571 *str = '\0';
aa07b2f6 1572 SvCUR_set(sv, str - SvPVX_const(sv));
08ca2aa3 1573 XPUSHs(sv);
a6ec74c1 1574 break;
08ca2aa3 1575 }
1651fc44
ML
1576 case 'C':
1577 if (len == 0) {
1578 if (explicit_length)
1579 /* Switch to "character" mode */
1580 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1581 break;
1582 }
1583 /* FALL THROUGH */
a6ec74c1 1584 case 'c':
1651fc44
ML
1585 while (len-- > 0 && s < strend) {
1586 int aint;
1587 if (utf8)
1588 {
1589 STRLEN retlen;
1590 aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1591 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1592 if (retlen == (STRLEN) -1 || retlen == 0)
1593 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1594 s += retlen;
1595 }
1596 else
1597 aint = *(U8 *)(s)++;
1598 if (aint >= 128 && datumtype != 'C') /* fake up signed chars */
73cb7263 1599 aint -= 256;
08ca2aa3 1600 if (!checksum)
6e449a3a 1601 mPUSHi(aint);
73cb7263
NC
1602 else if (checksum > bits_in_uv)
1603 cdouble += (NV)aint;
1604 else
1605 cuv += aint;
a6ec74c1
JH
1606 }
1607 break;
08ca2aa3
TH
1608 case 'W':
1609 W_checksum:
1651fc44 1610 if (utf8) {
08ca2aa3 1611 while (len-- > 0 && s < strend) {
08ca2aa3 1612 STRLEN retlen;
f7fe979e 1613 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
f337b084 1614 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
08ca2aa3
TH
1615 if (retlen == (STRLEN) -1 || retlen == 0)
1616 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1617 s += retlen;
1618 if (!checksum)
6e449a3a 1619 mPUSHu(val);
08ca2aa3
TH
1620 else if (checksum > bits_in_uv)
1621 cdouble += (NV) val;
d6d3e8bd 1622 else
08ca2aa3 1623 cuv += val;
fc241834 1624 }
08ca2aa3 1625 } else if (!checksum)
a6ec74c1 1626 while (len-- > 0) {
f7fe979e 1627 const U8 ch = *(U8 *) s++;
6e449a3a 1628 mPUSHu(ch);
a6ec74c1 1629 }
08ca2aa3
TH
1630 else if (checksum > bits_in_uv)
1631 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1632 else
1633 while (len-- > 0) cuv += *(U8 *) s++;
a6ec74c1
JH
1634 break;
1635 case 'U':
35bcd338 1636 if (len == 0) {
08ca2aa3
TH
1637 if (explicit_length) {
1638 /* Switch to "bytes in UTF-8" mode */
f337b084 1639 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
08ca2aa3
TH
1640 else
1641 /* Should be impossible due to the need_utf8() test */
1642 Perl_croak(aTHX_ "U0 mode on a byte string");
1643 }
35bcd338
JH
1644 break;
1645 }
08ca2aa3 1646 if (len > strend - s) len = strend - s;
fc241834 1647 if (!checksum) {
08ca2aa3
TH
1648 if (len && unpack_only_one) len = 1;
1649 EXTEND(SP, len);
1650 EXTEND_MORTAL(len);
fc241834 1651 }
08ca2aa3
TH
1652 while (len-- > 0 && s < strend) {
1653 STRLEN retlen;
1654 UV auv;
1655 if (utf8) {
1656 U8 result[UTF8_MAXLEN];
f7fe979e 1657 const char *ptr = s;
08ca2aa3 1658 STRLEN len;
08ca2aa3
TH
1659 /* Bug: warns about bad utf8 even if we are short on bytes
1660 and will break out of the loop */
230e1fce
NC
1661 if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1662 'U'))
08ca2aa3
TH
1663 break;
1664 len = UTF8SKIP(result);
fc241834 1665 if (!uni_to_bytes(aTHX_ &ptr, strend,
230e1fce 1666 (char *) &result[1], len-1, 'U')) break;
08ca2aa3
TH
1667 auv = utf8n_to_uvuni(result, len, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1668 s = ptr;
1669 } else {
1670 auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1671 if (retlen == (STRLEN) -1 || retlen == 0)
1672 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1673 s += retlen;
1674 }
1675 if (!checksum)
6e449a3a 1676 mPUSHu(auv);
73cb7263 1677 else if (checksum > bits_in_uv)
08ca2aa3 1678 cdouble += (NV) auv;
73cb7263 1679 else
08ca2aa3 1680 cuv += auv;
a6ec74c1
JH
1681 }
1682 break;
49704364
LW
1683 case 's' | TYPE_IS_SHRIEKING:
1684#if SHORTSIZE != SIZE16
73cb7263 1685 while (len-- > 0) {
08ca2aa3 1686 short ashort;
f337b084
TH
1687 SHIFT_VAR(utf8, s, strend, ashort, datumtype);
1688 DO_BO_UNPACK(ashort, s);
08ca2aa3 1689 if (!checksum)
6e449a3a 1690 mPUSHi(ashort);
73cb7263
NC
1691 else if (checksum > bits_in_uv)
1692 cdouble += (NV)ashort;
1693 else
1694 cuv += ashort;
49704364
LW
1695 }
1696 break;
1697#else
1698 /* Fallthrough! */
a6ec74c1 1699#endif
49704364 1700 case 's':
73cb7263 1701 while (len-- > 0) {
08ca2aa3
TH
1702 I16 ai16;
1703
1704#if U16SIZE > SIZE16
1705 ai16 = 0;
1706#endif
f337b084 1707 SHIFT16(utf8, s, strend, &ai16, datumtype);
73cb7263 1708 DO_BO_UNPACK(ai16, 16);
1109a392 1709#if U16SIZE > SIZE16
73cb7263
NC
1710 if (ai16 > 32767)
1711 ai16 -= 65536;
a6ec74c1 1712#endif
08ca2aa3 1713 if (!checksum)
6e449a3a 1714 mPUSHi(ai16);
73cb7263
NC
1715 else if (checksum > bits_in_uv)
1716 cdouble += (NV)ai16;
1717 else
1718 cuv += ai16;
a6ec74c1
JH
1719 }
1720 break;
49704364
LW
1721 case 'S' | TYPE_IS_SHRIEKING:
1722#if SHORTSIZE != SIZE16
73cb7263 1723 while (len-- > 0) {
08ca2aa3 1724 unsigned short aushort;
f337b084
TH
1725 SHIFT_VAR(utf8, s, strend, aushort, datumtype);
1726 DO_BO_UNPACK(aushort, s);
08ca2aa3 1727 if (!checksum)
6e449a3a 1728 mPUSHu(aushort);
73cb7263
NC
1729 else if (checksum > bits_in_uv)
1730 cdouble += (NV)aushort;
1731 else
1732 cuv += aushort;
49704364
LW
1733 }
1734 break;
1735#else
1736 /* Fallhrough! */
1737#endif
a6ec74c1
JH
1738 case 'v':
1739 case 'n':
1740 case 'S':
73cb7263 1741 while (len-- > 0) {
08ca2aa3
TH
1742 U16 au16;
1743#if U16SIZE > SIZE16
1744 au16 = 0;
1745#endif
f337b084 1746 SHIFT16(utf8, s, strend, &au16, datumtype);
08ca2aa3 1747 DO_BO_UNPACK(au16, 16);
a6ec74c1 1748#ifdef HAS_NTOHS
73cb7263
NC
1749 if (datumtype == 'n')
1750 au16 = PerlSock_ntohs(au16);
a6ec74c1
JH
1751#endif
1752#ifdef HAS_VTOHS
73cb7263
NC
1753 if (datumtype == 'v')
1754 au16 = vtohs(au16);
a6ec74c1 1755#endif
08ca2aa3 1756 if (!checksum)
6e449a3a 1757 mPUSHu(au16);
73cb7263 1758 else if (checksum > bits_in_uv)
f337b084 1759 cdouble += (NV) au16;
73cb7263
NC
1760 else
1761 cuv += au16;
a6ec74c1
JH
1762 }
1763 break;
7212898e 1764#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7
MHM
1765 case 'v' | TYPE_IS_SHRIEKING:
1766 case 'n' | TYPE_IS_SHRIEKING:
73cb7263 1767 while (len-- > 0) {
08ca2aa3
TH
1768 I16 ai16;
1769# if U16SIZE > SIZE16
1770 ai16 = 0;
1771# endif
f337b084 1772 SHIFT16(utf8, s, strend, &ai16, datumtype);
08ca2aa3 1773# ifdef HAS_NTOHS
73cb7263 1774 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
08ca2aa3
TH
1775 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1776# endif /* HAS_NTOHS */
1777# ifdef HAS_VTOHS
73cb7263 1778 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
08ca2aa3
TH
1779 ai16 = (I16) vtohs((U16) ai16);
1780# endif /* HAS_VTOHS */
1781 if (!checksum)
6e449a3a 1782 mPUSHi(ai16);
73cb7263 1783 else if (checksum > bits_in_uv)
08ca2aa3 1784 cdouble += (NV) ai16;
73cb7263
NC
1785 else
1786 cuv += ai16;
068bd2e7
MHM
1787 }
1788 break;
08ca2aa3 1789#endif /* PERL_PACK_CAN_SHRIEKSIGN */
a6ec74c1 1790 case 'i':
49704364 1791 case 'i' | TYPE_IS_SHRIEKING:
73cb7263 1792 while (len-- > 0) {
08ca2aa3 1793 int aint;
f337b084
TH
1794 SHIFT_VAR(utf8, s, strend, aint, datumtype);
1795 DO_BO_UNPACK(aint, i);
08ca2aa3 1796 if (!checksum)
6e449a3a 1797 mPUSHi(aint);
73cb7263
NC
1798 else if (checksum > bits_in_uv)
1799 cdouble += (NV)aint;
1800 else
1801 cuv += aint;
a6ec74c1
JH
1802 }
1803 break;
1804 case 'I':
49704364 1805 case 'I' | TYPE_IS_SHRIEKING:
73cb7263 1806 while (len-- > 0) {
08ca2aa3 1807 unsigned int auint;
f337b084
TH
1808 SHIFT_VAR(utf8, s, strend, auint, datumtype);
1809 DO_BO_UNPACK(auint, i);
08ca2aa3 1810 if (!checksum)
6e449a3a 1811 mPUSHu(auint);
73cb7263
NC
1812 else if (checksum > bits_in_uv)
1813 cdouble += (NV)auint;
1814 else
1815 cuv += auint;
a6ec74c1
JH
1816 }
1817 break;
92d41999 1818 case 'j':
73cb7263 1819 while (len-- > 0) {
08ca2aa3 1820 IV aiv;
f337b084 1821 SHIFT_VAR(utf8, s, strend, aiv, datumtype);
1109a392 1822#if IVSIZE == INTSIZE
f337b084 1823 DO_BO_UNPACK(aiv, i);
1109a392 1824#elif IVSIZE == LONGSIZE
f337b084 1825 DO_BO_UNPACK(aiv, l);
1109a392 1826#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
f337b084 1827 DO_BO_UNPACK(aiv, 64);
08ca2aa3
TH
1828#else
1829 Perl_croak(aTHX_ "'j' not supported on this platform");
1109a392 1830#endif
08ca2aa3 1831 if (!checksum)
6e449a3a 1832 mPUSHi(aiv);
73cb7263
NC
1833 else if (checksum > bits_in_uv)
1834 cdouble += (NV)aiv;
1835 else
1836 cuv += aiv;
92d41999
JH
1837 }
1838 break;
1839 case 'J':
73cb7263 1840 while (len-- > 0) {
08ca2aa3 1841 UV auv;
f337b084 1842 SHIFT_VAR(utf8, s, strend, auv, datumtype);
08ca2aa3 1843#if IVSIZE == INTSIZE
f337b084 1844 DO_BO_UNPACK(auv, i);
08ca2aa3 1845#elif IVSIZE == LONGSIZE
f337b084 1846 DO_BO_UNPACK(auv, l);
08ca2aa3 1847#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
f337b084 1848 DO_BO_UNPACK(auv, 64);
08ca2aa3
TH
1849#else
1850 Perl_croak(aTHX_ "'J' not supported on this platform");
1109a392 1851#endif
08ca2aa3 1852 if (!checksum)
6e449a3a 1853 mPUSHu(auv);
73cb7263
NC
1854 else if (checksum > bits_in_uv)
1855 cdouble += (NV)auv;
1856 else
1857 cuv += auv;
92d41999
JH
1858 }
1859 break;
49704364
LW
1860 case 'l' | TYPE_IS_SHRIEKING:
1861#if LONGSIZE != SIZE32
73cb7263 1862 while (len-- > 0) {
08ca2aa3 1863 long along;
f337b084
TH
1864 SHIFT_VAR(utf8, s, strend, along, datumtype);
1865 DO_BO_UNPACK(along, l);
08ca2aa3 1866 if (!checksum)
6e449a3a 1867 mPUSHi(along);
73cb7263
NC
1868 else if (checksum > bits_in_uv)
1869 cdouble += (NV)along;
1870 else
1871 cuv += along;
49704364
LW
1872 }
1873 break;
1874#else
1875 /* Fallthrough! */
a6ec74c1 1876#endif
49704364 1877 case 'l':
73cb7263 1878 while (len-- > 0) {
08ca2aa3
TH
1879 I32 ai32;
1880#if U32SIZE > SIZE32
1881 ai32 = 0;
1882#endif
f337b084 1883 SHIFT32(utf8, s, strend, &ai32, datumtype);
73cb7263 1884 DO_BO_UNPACK(ai32, 32);
25a9bd2a 1885#if U32SIZE > SIZE32
08ca2aa3 1886 if (ai32 > 2147483647) ai32 -= 4294967296;
a6ec74c1 1887#endif
08ca2aa3 1888 if (!checksum)
6e449a3a 1889 mPUSHi(ai32);
73cb7263
NC
1890 else if (checksum > bits_in_uv)
1891 cdouble += (NV)ai32;
1892 else
1893 cuv += ai32;
a6ec74c1
JH
1894 }
1895 break;
49704364
LW
1896 case 'L' | TYPE_IS_SHRIEKING:
1897#if LONGSIZE != SIZE32
73cb7263 1898 while (len-- > 0) {
08ca2aa3 1899 unsigned long aulong;
f337b084
TH
1900 SHIFT_VAR(utf8, s, strend, aulong, datumtype);
1901 DO_BO_UNPACK(aulong, l);
08ca2aa3 1902 if (!checksum)
6e449a3a 1903 mPUSHu(aulong);
73cb7263
NC
1904 else if (checksum > bits_in_uv)
1905 cdouble += (NV)aulong;
1906 else
1907 cuv += aulong;
49704364
LW
1908 }
1909 break;
1910#else
1911 /* Fall through! */
1912#endif
a6ec74c1
JH
1913 case 'V':
1914 case 'N':
1915 case 'L':
73cb7263 1916 while (len-- > 0) {
08ca2aa3
TH
1917 U32 au32;
1918#if U32SIZE > SIZE32
1919 au32 = 0;
1920#endif
f337b084 1921 SHIFT32(utf8, s, strend, &au32, datumtype);
08ca2aa3 1922 DO_BO_UNPACK(au32, 32);
a6ec74c1 1923#ifdef HAS_NTOHL
73cb7263
NC
1924 if (datumtype == 'N')
1925 au32 = PerlSock_ntohl(au32);
a6ec74c1
JH
1926#endif
1927#ifdef HAS_VTOHL
73cb7263
NC
1928 if (datumtype == 'V')
1929 au32 = vtohl(au32);
a6ec74c1 1930#endif
08ca2aa3 1931 if (!checksum)
6e449a3a 1932 mPUSHu(au32);
fc241834
RGS
1933 else if (checksum > bits_in_uv)
1934 cdouble += (NV)au32;
1935 else
1936 cuv += au32;
a6ec74c1
JH
1937 }
1938 break;
7212898e 1939#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7
MHM
1940 case 'V' | TYPE_IS_SHRIEKING:
1941 case 'N' | TYPE_IS_SHRIEKING:
73cb7263 1942 while (len-- > 0) {
08ca2aa3
TH
1943 I32 ai32;
1944# if U32SIZE > SIZE32
1945 ai32 = 0;
1946# endif
f337b084 1947 SHIFT32(utf8, s, strend, &ai32, datumtype);
08ca2aa3 1948# ifdef HAS_NTOHL
73cb7263
NC
1949 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1950 ai32 = (I32)PerlSock_ntohl((U32)ai32);
08ca2aa3
TH
1951# endif
1952# ifdef HAS_VTOHL
73cb7263
NC
1953 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1954 ai32 = (I32)vtohl((U32)ai32);
08ca2aa3
TH
1955# endif
1956 if (!checksum)
6e449a3a 1957 mPUSHi(ai32);
73cb7263
NC
1958 else if (checksum > bits_in_uv)
1959 cdouble += (NV)ai32;
1960 else
1961 cuv += ai32;
068bd2e7
MHM
1962 }
1963 break;
08ca2aa3 1964#endif /* PERL_PACK_CAN_SHRIEKSIGN */
a6ec74c1 1965 case 'p':
a6ec74c1 1966 while (len-- > 0) {
f7fe979e 1967 const char *aptr;
f337b084 1968 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
07409e01 1969 DO_BO_UNPACK_PC(aptr);
c4c5f44a 1970 /* newSVpv generates undef if aptr is NULL */
6e449a3a 1971 mPUSHs(newSVpv(aptr, 0));
a6ec74c1
JH
1972 }
1973 break;
1974 case 'w':
a6ec74c1
JH
1975 {
1976 UV auv = 0;
1977 U32 bytes = 0;
fc241834 1978
08ca2aa3
TH
1979 while (len > 0 && s < strend) {
1980 U8 ch;
f337b084 1981 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3 1982 auv = (auv << 7) | (ch & 0x7f);
a6ec74c1 1983 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
08ca2aa3 1984 if (ch < 0x80) {
a6ec74c1 1985 bytes = 0;
6e449a3a 1986 mPUSHu(auv);
a6ec74c1
JH
1987 len--;
1988 auv = 0;
08ca2aa3 1989 continue;
a6ec74c1 1990 }
08ca2aa3 1991 if (++bytes >= sizeof(UV)) { /* promote to string */
10516c54 1992 const char *t;
a6ec74c1 1993
f5992bc4 1994 sv = Perl_newSVpvf(aTHX_ "%.*"UVuf, (int)TYPE_DIGITS(UV), auv);
a6ec74c1 1995 while (s < strend) {
f337b084 1996 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3
TH
1997 sv = mul128(sv, (U8)(ch & 0x7f));
1998 if (!(ch & 0x80)) {
a6ec74c1
JH
1999 bytes = 0;
2000 break;
2001 }
2002 }
10516c54 2003 t = SvPV_nolen_const(sv);
a6ec74c1
JH
2004 while (*t == '0')
2005 t++;
2006 sv_chop(sv, t);
6e449a3a 2007 mPUSHs(sv);
a6ec74c1
JH
2008 len--;
2009 auv = 0;
2010 }
2011 }
2012 if ((s >= strend) && bytes)
49704364 2013 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
a6ec74c1
JH
2014 }
2015 break;
2016 case 'P':
49704364
LW
2017 if (symptr->howlen == e_star)
2018 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
a6ec74c1 2019 EXTEND(SP, 1);
2d3e0934 2020 if (s + sizeof(char*) <= strend) {
08ca2aa3 2021 char *aptr;
f337b084 2022 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
07409e01 2023 DO_BO_UNPACK_PC(aptr);
fc241834 2024 /* newSVpvn generates undef if aptr is NULL */
59cd0e26 2025 PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP));
08ca2aa3 2026 }
a6ec74c1
JH
2027 break;
2028#ifdef HAS_QUAD
2029 case 'q':
73cb7263 2030 while (len-- > 0) {
08ca2aa3 2031 Quad_t aquad;
f337b084
TH
2032 SHIFT_VAR(utf8, s, strend, aquad, datumtype);
2033 DO_BO_UNPACK(aquad, 64);
08ca2aa3 2034 if (!checksum)
6e449a3a
MHM
2035 mPUSHs(aquad >= IV_MIN && aquad <= IV_MAX ?
2036 newSViv((IV)aquad) : newSVnv((NV)aquad));
73cb7263
NC
2037 else if (checksum > bits_in_uv)
2038 cdouble += (NV)aquad;
2039 else
2040 cuv += aquad;
2041 }
a6ec74c1
JH
2042 break;
2043 case 'Q':
73cb7263 2044 while (len-- > 0) {
08ca2aa3 2045 Uquad_t auquad;
f337b084
TH
2046 SHIFT_VAR(utf8, s, strend, auquad, datumtype);
2047 DO_BO_UNPACK(auquad, 64);
08ca2aa3 2048 if (!checksum)
6e449a3a
MHM
2049 mPUSHs(auquad <= UV_MAX ?
2050 newSVuv((UV)auquad) : newSVnv((NV)auquad));
73cb7263
NC
2051 else if (checksum > bits_in_uv)
2052 cdouble += (NV)auquad;
2053 else
2054 cuv += auquad;
a6ec74c1
JH
2055 }
2056 break;
08ca2aa3 2057#endif /* HAS_QUAD */
a6ec74c1
JH
2058 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
2059 case 'f':
73cb7263 2060 while (len-- > 0) {
08ca2aa3 2061 float afloat;
f337b084 2062 SHIFT_VAR(utf8, s, strend, afloat, datumtype);
08ca2aa3
TH
2063 DO_BO_UNPACK_N(afloat, float);
2064 if (!checksum)
6e449a3a 2065 mPUSHn(afloat);
08ca2aa3 2066 else
73cb7263 2067 cdouble += afloat;
fc241834 2068 }
a6ec74c1
JH
2069 break;
2070 case 'd':
73cb7263 2071 while (len-- > 0) {
08ca2aa3 2072 double adouble;
f337b084 2073 SHIFT_VAR(utf8, s, strend, adouble, datumtype);
08ca2aa3
TH
2074 DO_BO_UNPACK_N(adouble, double);
2075 if (!checksum)
6e449a3a 2076 mPUSHn(adouble);
08ca2aa3 2077 else
73cb7263 2078 cdouble += adouble;
fc241834 2079 }
a6ec74c1 2080 break;
92d41999 2081 case 'F':
73cb7263 2082 while (len-- > 0) {
08ca2aa3 2083 NV anv;
f337b084 2084 SHIFT_VAR(utf8, s, strend, anv, datumtype);
08ca2aa3
TH
2085 DO_BO_UNPACK_N(anv, NV);
2086 if (!checksum)
6e449a3a 2087 mPUSHn(anv);
08ca2aa3 2088 else
73cb7263 2089 cdouble += anv;
fc241834 2090 }
92d41999
JH
2091 break;
2092#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2093 case 'D':
73cb7263 2094 while (len-- > 0) {
08ca2aa3 2095 long double aldouble;
f337b084 2096 SHIFT_VAR(utf8, s, strend, aldouble, datumtype);
08ca2aa3
TH
2097 DO_BO_UNPACK_N(aldouble, long double);
2098 if (!checksum)
6e449a3a 2099 mPUSHn(aldouble);
08ca2aa3
TH
2100 else
2101 cdouble += aldouble;
92d41999
JH
2102 }
2103 break;
2104#endif
a6ec74c1 2105 case 'u':
08ca2aa3 2106 {
f7fe979e 2107 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
561b68a9 2108 sv = sv_2mortal(newSV(l));
08ca2aa3
TH
2109 if (l) SvPOK_on(sv);
2110 }
2111 if (utf8) {
2112 while (next_uni_uu(aTHX_ &s, strend, &len)) {
2113 I32 a, b, c, d;
db187877 2114 char hunk[3];
08ca2aa3 2115
08ca2aa3
TH
2116 while (len > 0) {
2117 next_uni_uu(aTHX_ &s, strend, &a);
2118 next_uni_uu(aTHX_ &s, strend, &b);
2119 next_uni_uu(aTHX_ &s, strend, &c);
2120 next_uni_uu(aTHX_ &s, strend, &d);
2121 hunk[0] = (char)((a << 2) | (b >> 4));
2122 hunk[1] = (char)((b << 4) | (c >> 2));
2123 hunk[2] = (char)((c << 6) | d);
2124 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2125 len -= 3;
2126 }
2127 if (s < strend) {
f7fe979e
AL
2128 if (*s == '\n') {
2129 s++;
2130 }
08ca2aa3
TH
2131 else {
2132 /* possible checksum byte */
f7fe979e
AL
2133 const char *skip = s+UTF8SKIP(s);
2134 if (skip < strend && *skip == '\n')
2135 s = skip+1;
08ca2aa3
TH
2136 }
2137 }
2138 }
2139 } else {
fc241834
RGS
2140 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
2141 I32 a, b, c, d;
db187877 2142 char hunk[3];
a6ec74c1 2143
fc241834
RGS
2144 len = PL_uudmap[*(U8*)s++] & 077;
2145 while (len > 0) {
2146 if (s < strend && ISUUCHAR(*s))
2147 a = PL_uudmap[*(U8*)s++] & 077;
2148 else
2149 a = 0;
2150 if (s < strend && ISUUCHAR(*s))
2151 b = PL_uudmap[*(U8*)s++] & 077;
2152 else
2153 b = 0;
2154 if (s < strend && ISUUCHAR(*s))
2155 c = PL_uudmap[*(U8*)s++] & 077;
2156 else
2157 c = 0;
2158 if (s < strend && ISUUCHAR(*s))
2159 d = PL_uudmap[*(U8*)s++] & 077;
2160 else
2161 d = 0;
2162 hunk[0] = (char)((a << 2) | (b >> 4));
2163 hunk[1] = (char)((b << 4) | (c >> 2));
2164 hunk[2] = (char)((c << 6) | d);
2165 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2166 len -= 3;
2167 }
2168 if (*s == '\n')
2169 s++;
2170 else /* possible checksum byte */
2171 if (s + 1 < strend && s[1] == '\n')
2172 s += 2;
a6ec74c1 2173 }
08ca2aa3
TH
2174 }
2175 XPUSHs(sv);
a6ec74c1
JH
2176 break;
2177 }
49704364 2178
a6ec74c1 2179 if (checksum) {
1109a392 2180 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
92d41999 2181 (checksum > bits_in_uv &&
08ca2aa3
TH
2182 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
2183 NV trouble, anv;
a6ec74c1 2184
08ca2aa3 2185 anv = (NV) (1 << (checksum & 15));
a6ec74c1
JH
2186 while (checksum >= 16) {
2187 checksum -= 16;
08ca2aa3 2188 anv *= 65536.0;
a6ec74c1 2189 }
a6ec74c1 2190 while (cdouble < 0.0)
08ca2aa3
TH
2191 cdouble += anv;
2192 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
c4c5f44a 2193 sv = newSVnv(cdouble);
a6ec74c1
JH
2194 }
2195 else {
fa8ec7c1
NC
2196 if (checksum < bits_in_uv) {
2197 UV mask = ((UV)1 << checksum) - 1;
92d41999 2198 cuv &= mask;
a6ec74c1 2199 }
c4c5f44a 2200 sv = newSVuv(cuv);
a6ec74c1 2201 }
6e449a3a 2202 mXPUSHs(sv);
a6ec74c1
JH
2203 checksum = 0;
2204 }
fc241834 2205
49704364
LW
2206 if (symptr->flags & FLAG_SLASH){
2207 if (SP - PL_stack_base - start_sp_offset <= 0)
2208 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
2209 if( next_symbol(symptr) ){
2210 if( symptr->howlen == e_number )
2211 Perl_croak(aTHX_ "Count after length/code in unpack" );
2212 if( beyond ){
2213 /* ...end of char buffer then no decent length available */
2214 Perl_croak(aTHX_ "length/code after end of string in unpack" );
2215 } else {
2216 /* take top of stack (hope it's numeric) */
2217 len = POPi;
2218 if( len < 0 )
2219 Perl_croak(aTHX_ "Negative '/' count in unpack" );
2220 }
2221 } else {
2222 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
2223 }
2224 datumtype = symptr->code;
21c16052 2225 explicit_length = FALSE;
49704364
LW
2226 goto redo_switch;
2227 }
a6ec74c1 2228 }
49704364 2229
18529408
IZ
2230 if (new_s)
2231 *new_s = s;
2232 PUTBACK;
2233 return SP - PL_stack_base - start_sp_offset;
2234}
2235
2236PP(pp_unpack)
2237{
97aff369 2238 dVAR;
18529408 2239 dSP;
bab9c0ac 2240 dPOPPOPssrl;
18529408
IZ
2241 I32 gimme = GIMME_V;
2242 STRLEN llen;
2243 STRLEN rlen;
5c144d81
NC
2244 const char *pat = SvPV_const(left, llen);
2245 const char *s = SvPV_const(right, rlen);
f7fe979e
AL
2246 const char *strend = s + rlen;
2247 const char *patend = pat + llen;
08ca2aa3 2248 I32 cnt;
18529408
IZ
2249
2250 PUTBACK;
7accc089 2251 cnt = unpackstring(pat, patend, s, strend,
49704364 2252 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
f337b084 2253 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
49704364 2254
18529408
IZ
2255 SPAGAIN;
2256 if ( !cnt && gimme == G_SCALAR )
2257 PUSHs(&PL_sv_undef);
a6ec74c1
JH
2258 RETURN;
2259}
2260
f337b084 2261STATIC U8 *
f7fe979e 2262doencodes(U8 *h, const char *s, I32 len)
a6ec74c1 2263{
f337b084 2264 *h++ = PL_uuemap[len];
a6ec74c1 2265 while (len > 2) {
f337b084
TH
2266 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2267 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
2268 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
2269 *h++ = PL_uuemap[(077 & (s[2] & 077))];
a6ec74c1
JH
2270 s += 3;
2271 len -= 3;
2272 }
2273 if (len > 0) {
f7fe979e 2274 const char r = (len > 1 ? s[1] : '\0');
f337b084
TH
2275 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2276 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
2277 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
2278 *h++ = PL_uuemap[0];
a6ec74c1 2279 }
f337b084
TH
2280 *h++ = '\n';
2281 return h;
a6ec74c1
JH
2282}
2283
2284STATIC SV *
f7fe979e 2285S_is_an_int(pTHX_ const char *s, STRLEN l)
a6ec74c1 2286{
8b6e33c7
AL
2287 SV *result = newSVpvn(s, l);
2288 char *const result_c = SvPV_nolen(result); /* convenience */
2289 char *out = result_c;
2290 bool skip = 1;
2291 bool ignore = 0;
a6ec74c1 2292
7918f24d
NC
2293 PERL_ARGS_ASSERT_IS_AN_INT;
2294
a6ec74c1
JH
2295 while (*s) {
2296 switch (*s) {
2297 case ' ':
2298 break;
2299 case '+':
2300 if (!skip) {
2301 SvREFCNT_dec(result);
2302 return (NULL);
2303 }
2304 break;
2305 case '0':
2306 case '1':
2307 case '2':
2308 case '3':
2309 case '4':
2310 case '5':
2311 case '6':
2312 case '7':
2313 case '8':
2314 case '9':
2315 skip = 0;
2316 if (!ignore) {
2317 *(out++) = *s;
2318 }
2319 break;
2320 case '.':
2321 ignore = 1;
2322 break;
2323 default:
2324 SvREFCNT_dec(result);
2325 return (NULL);
2326 }
2327 s++;
2328 }
2329 *(out++) = '\0';
2330 SvCUR_set(result, out - result_c);
2331 return (result);
2332}
2333
2334/* pnum must be '\0' terminated */
2335STATIC int
2336S_div128(pTHX_ SV *pnum, bool *done)
2337{
8b6e33c7
AL
2338 STRLEN len;
2339 char * const s = SvPV(pnum, len);
2340 char *t = s;
2341 int m = 0;
2342
7918f24d
NC
2343 PERL_ARGS_ASSERT_DIV128;
2344
8b6e33c7
AL
2345 *done = 1;
2346 while (*t) {
2347 const int i = m * 10 + (*t - '0');
2348 const int r = (i >> 7); /* r < 10 */
2349 m = i & 0x7F;
2350 if (r) {
2351 *done = 0;
2352 }
2353 *(t++) = '0' + r;
a6ec74c1 2354 }
8b6e33c7
AL
2355 *(t++) = '\0';
2356 SvCUR_set(pnum, (STRLEN) (t - s));
2357 return (m);
a6ec74c1
JH
2358}
2359
18529408 2360/*
7accc089
JH
2361=for apidoc packlist
2362
2363The engine implementing pack() Perl function.
2364
bfce84ec
AL
2365=cut
2366*/
7accc089
JH
2367
2368void
f7fe979e 2369Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, register SV **beglist, SV **endlist )
7accc089 2370{
97aff369 2371 dVAR;
aadb217d
JH
2372 tempsym_t sym;
2373
7918f24d
NC
2374 PERL_ARGS_ASSERT_PACKLIST;
2375
f7fe979e 2376 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
49704364 2377
f337b084
TH
2378 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
2379 Also make sure any UTF8 flag is loaded */
56eb0262 2380 SvPV_force_nolen(cat);
bfce84ec
AL
2381 if (DO_UTF8(cat))
2382 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
f337b084 2383
49704364
LW
2384 (void)pack_rec( cat, &sym, beglist, endlist );
2385}
2386
f337b084
TH
2387/* like sv_utf8_upgrade, but also repoint the group start markers */
2388STATIC void
2389marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2390 STRLEN len;
2391 tempsym_t *group;
f7fe979e
AL
2392 const char *from_ptr, *from_start, *from_end, **marks, **m;
2393 char *to_start, *to_ptr;
f337b084
TH
2394
2395 if (SvUTF8(sv)) return;
2396
aa07b2f6 2397 from_start = SvPVX_const(sv);
f337b084
TH
2398 from_end = from_start + SvCUR(sv);
2399 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2400 if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
2401 if (from_ptr == from_end) {
2402 /* Simple case: no character needs to be changed */
2403 SvUTF8_on(sv);
2404 return;
2405 }
2406
3473cf63 2407 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
a02a5408 2408 Newx(to_start, len, char);
f337b084
TH
2409 Copy(from_start, to_start, from_ptr-from_start, char);
2410 to_ptr = to_start + (from_ptr-from_start);
2411
a02a5408 2412 Newx(marks, sym_ptr->level+2, const char *);
f337b084
TH
2413 for (group=sym_ptr; group; group = group->previous)
2414 marks[group->level] = from_start + group->strbeg;
2415 marks[sym_ptr->level+1] = from_end+1;
2416 for (m = marks; *m < from_ptr; m++)
2417 *m = to_start + (*m-from_start);
2418
2419 for (;from_ptr < from_end; from_ptr++) {
2420 while (*m == from_ptr) *m++ = to_ptr;
230e1fce 2421 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
f337b084
TH
2422 }
2423 *to_ptr = 0;
2424
2425 while (*m == from_ptr) *m++ = to_ptr;
2426 if (m != marks + sym_ptr->level+1) {
2427 Safefree(marks);
2428 Safefree(to_start);
5f5991a0 2429 Perl_croak(aTHX_ "panic: marks beyond string end");
f337b084
TH
2430 }
2431 for (group=sym_ptr; group; group = group->previous)
2432 group->strbeg = marks[group->level] - to_start;
2433 Safefree(marks);
2434
2435 if (SvOOK(sv)) {
2436 if (SvIVX(sv)) {
b162af07 2437 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
f337b084
TH
2438 from_start -= SvIVX(sv);
2439 SvIV_set(sv, 0);
2440 }
2441 SvFLAGS(sv) &= ~SVf_OOK;
2442 }
2443 if (SvLEN(sv) != 0)
2444 Safefree(from_start);
f880fe2f 2445 SvPV_set(sv, to_start);
b162af07
SP
2446 SvCUR_set(sv, to_ptr - to_start);
2447 SvLEN_set(sv, len);
f337b084
TH
2448 SvUTF8_on(sv);
2449}
2450
2451/* Exponential string grower. Makes string extension effectively O(n)
2452 needed says how many extra bytes we need (not counting the final '\0')
2453 Only grows the string if there is an actual lack of space
2454*/
2455STATIC char *
0bd48802 2456S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
f7fe979e
AL
2457 const STRLEN cur = SvCUR(sv);
2458 const STRLEN len = SvLEN(sv);
f337b084 2459 STRLEN extend;
7918f24d
NC
2460
2461 PERL_ARGS_ASSERT_SV_EXP_GROW;
2462
f337b084
TH
2463 if (len - cur > needed) return SvPVX(sv);
2464 extend = needed > len ? needed : len;
2465 return SvGROW(sv, len+extend+1);
2466}
49704364
LW
2467
2468STATIC
2469SV **
f337b084 2470S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
49704364 2471{
97aff369 2472 dVAR;
49704364 2473 tempsym_t lookahead;
f337b084
TH
2474 I32 items = endlist - beglist;
2475 bool found = next_symbol(symptr);
2476 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
041457d9 2477 bool warn_utf8 = ckWARN(WARN_UTF8);
f337b084 2478
7918f24d
NC
2479 PERL_ARGS_ASSERT_PACK_REC;
2480
f337b084
TH
2481 if (symptr->level == 0 && found && symptr->code == 'U') {
2482 marked_upgrade(aTHX_ cat, symptr);
2483 symptr->flags |= FLAG_DO_UTF8;
2484 utf8 = 0;
49704364 2485 }
f337b084 2486 symptr->strbeg = SvCUR(cat);
49704364
LW
2487
2488 while (found) {
f337b084
TH
2489 SV *fromstr;
2490 STRLEN fromlen;
2491 I32 len;
a0714e2c 2492 SV *lengthcode = NULL;
49704364 2493 I32 datumtype = symptr->code;
f337b084
TH
2494 howlen_t howlen = symptr->howlen;
2495 char *start = SvPVX(cat);
2496 char *cur = start + SvCUR(cat);
49704364 2497
f337b084
TH
2498#define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2499
2500 switch (howlen) {
fc241834 2501 case e_star:
f337b084
TH
2502 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2503 0 : items;
2504 break;
2505 default:
2506 /* e_no_len and e_number */
2507 len = symptr->length;
49704364
LW
2508 break;
2509 }
2510
f337b084 2511 if (len) {
a7a3cfaa 2512 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
f337b084 2513
a7a3cfaa
TH
2514 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2515 /* We can process this letter. */
2516 STRLEN size = props & PACK_SIZE_MASK;
2517 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2518 }
f337b084
TH
2519 }
2520
49704364
LW
2521 /* Look ahead for next symbol. Do we have code/code? */
2522 lookahead = *symptr;
2523 found = next_symbol(&lookahead);
246f24af
TH
2524 if (symptr->flags & FLAG_SLASH) {
2525 IV count;
f337b084 2526 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
246f24af
TH
2527 if (strchr("aAZ", lookahead.code)) {
2528 if (lookahead.howlen == e_number) count = lookahead.length;
2529 else {
ce399ba6
NC
2530 if (items > 0) {
2531 if (SvGAMAGIC(*beglist)) {
2532 /* Avoid reading the active data more than once
2533 by copying it to a temporary. */
2534 STRLEN len;
2535 const char *const pv = SvPV_const(*beglist, len);
740cce10 2536 SV *const temp
59cd0e26
NC
2537 = newSVpvn_flags(pv, len,
2538 SVs_TEMP | SvUTF8(*beglist));
ce399ba6
NC
2539 *beglist = temp;
2540 }
246f24af
TH
2541 count = DO_UTF8(*beglist) ?
2542 sv_len_utf8(*beglist) : sv_len(*beglist);
ce399ba6 2543 }
246f24af
TH
2544 else count = 0;
2545 if (lookahead.code == 'Z') count++;
2546 }
2547 } else {
2548 if (lookahead.howlen == e_number && lookahead.length < items)
2549 count = lookahead.length;
2550 else count = items;
2551 }
2552 lookahead.howlen = e_number;
2553 lookahead.length = count;
2554 lengthcode = sv_2mortal(newSViv(count));
a6ec74c1 2555 }
49704364 2556
fc241834
RGS
2557 /* Code inside the switch must take care to properly update
2558 cat (CUR length and '\0' termination) if it updated *cur and
f337b084 2559 doesn't simply leave using break */
1109a392 2560 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 2561 default:
f337b084
TH
2562 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2563 (int) TYPE_NO_MODIFIERS(datumtype));
a6ec74c1 2564 case '%':
49704364 2565 Perl_croak(aTHX_ "'%%' may not be used in pack");
28be1210
TH
2566 {
2567 char *from;
2568#ifdef PERL_PACK_CAN_SHRIEKSIGN
2569 case '.' | TYPE_IS_SHRIEKING:
2570#endif
2571 case '.':
2572 if (howlen == e_star) from = start;
2573 else if (len == 0) from = cur;
2574 else {
2575 tempsym_t *group = symptr;
2576
2577 while (--len && group) group = group->previous;
2578 from = group ? start + group->strbeg : start;
2579 }
2580 fromstr = NEXTFROM;
2581 len = SvIV(fromstr);
2582 goto resize;
2583#ifdef PERL_PACK_CAN_SHRIEKSIGN
2584 case '@' | TYPE_IS_SHRIEKING:
2585#endif
a6ec74c1 2586 case '@':
28be1210
TH
2587 from = start + symptr->strbeg;
2588 resize:
2589#ifdef PERL_PACK_CAN_SHRIEKSIGN
2590 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
2591#else /* PERL_PACK_CAN_SHRIEKSIGN */
2592 if (utf8)
2593#endif
2594 if (len >= 0) {
2595 while (len && from < cur) {
2596 from += UTF8SKIP(from);
2597 len--;
2598 }
2599 if (from > cur)
2600 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2601 if (len) {
2602 /* Here we know from == cur */
2603 grow:
2604 GROWING(0, cat, start, cur, len);
2605 Zero(cur, len, char);
2606 cur += len;
2607 } else if (from < cur) {
2608 len = cur - from;
2609 goto shrink;
2610 } else goto no_change;
2611 } else {
2612 cur = from;
2613 len = -len;
2614 goto utf8_shrink;
f337b084 2615 }
28be1210
TH
2616 else {
2617 len -= cur - from;
f337b084 2618 if (len > 0) goto grow;
28be1210 2619 if (len == 0) goto no_change;
fc241834 2620 len = -len;
28be1210 2621 goto shrink;
f337b084 2622 }
a6ec74c1 2623 break;
28be1210 2624 }
fc241834 2625 case '(': {
49704364 2626 tempsym_t savsym = *symptr;
66c611c5
MHM
2627 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2628 symptr->flags |= group_modifiers;
49704364
LW
2629 symptr->patend = savsym.grpend;
2630 symptr->level++;
f337b084 2631 symptr->previous = &lookahead;
18529408 2632 while (len--) {
f337b084
TH
2633 U32 was_utf8;
2634 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2635 else symptr->flags &= ~FLAG_PARSE_UTF8;
2636 was_utf8 = SvUTF8(cat);
49704364 2637 symptr->patptr = savsym.grpbeg;
f337b084
TH
2638 beglist = pack_rec(cat, symptr, beglist, endlist);
2639 if (SvUTF8(cat) != was_utf8)
2640 /* This had better be an upgrade while in utf8==0 mode */
2641 utf8 = 1;
2642
49704364 2643 if (savsym.howlen == e_star && beglist == endlist)
18529408
IZ
2644 break; /* No way to continue */
2645 }
ee790063 2646 items = endlist - beglist;
f337b084
TH
2647 lookahead.flags = symptr->flags & ~group_modifiers;
2648 goto no_change;
18529408 2649 }
62f95557
IZ
2650 case 'X' | TYPE_IS_SHRIEKING:
2651 if (!len) /* Avoid division by 0 */
2652 len = 1;
f337b084
TH
2653 if (utf8) {
2654 char *hop, *last;
2655 I32 l = len;
2656 hop = last = start;
2657 while (hop < cur) {
2658 hop += UTF8SKIP(hop);
2659 if (--l == 0) {
2660 last = hop;
2661 l = len;
2662 }
2663 }
2664 if (last > cur)
2665 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2666 cur = last;
2667 break;
2668 }
2669 len = (cur-start) % len;
62f95557 2670 /* FALL THROUGH */
a6ec74c1 2671 case 'X':
f337b084
TH
2672 if (utf8) {
2673 if (len < 1) goto no_change;
28be1210 2674 utf8_shrink:
f337b084
TH
2675 while (len > 0) {
2676 if (cur <= start)
28be1210
TH
2677 Perl_croak(aTHX_ "'%c' outside of string in pack",
2678 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2679 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2680 if (cur <= start)
28be1210
TH
2681 Perl_croak(aTHX_ "'%c' outside of string in pack",
2682 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2683 }
2684 len--;
2685 }
2686 } else {
fc241834 2687 shrink:
f337b084 2688 if (cur - start < len)
28be1210
TH
2689 Perl_croak(aTHX_ "'%c' outside of string in pack",
2690 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2691 cur -= len;
2692 }
2693 if (cur < start+symptr->strbeg) {
2694 /* Make sure group starts don't point into the void */
2695 tempsym_t *group;
9e27e96a 2696 const STRLEN length = cur-start;
f337b084
TH
2697 for (group = symptr;
2698 group && length < group->strbeg;
2699 group = group->previous) group->strbeg = length;
2700 lookahead.strbeg = length;
2701 }
a6ec74c1 2702 break;
fc241834
RGS
2703 case 'x' | TYPE_IS_SHRIEKING: {
2704 I32 ai32;
62f95557
IZ
2705 if (!len) /* Avoid division by 0 */
2706 len = 1;
230e1fce 2707 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
fc241834
RGS
2708 else ai32 = (cur - start) % len;
2709 if (ai32 == 0) goto no_change;
2710 len -= ai32;
2711 }
2712 /* FALL THROUGH */
a6ec74c1 2713 case 'x':
f337b084 2714 goto grow;
a6ec74c1
JH
2715 case 'A':
2716 case 'Z':
f337b084 2717 case 'a': {
f7fe979e 2718 const char *aptr;
f337b084 2719
a6ec74c1 2720 fromstr = NEXTFROM;
e62f0680 2721 aptr = SvPV_const(fromstr, fromlen);
f337b084 2722 if (DO_UTF8(fromstr)) {
f7fe979e 2723 const char *end, *s;
f337b084
TH
2724
2725 if (!utf8 && !SvUTF8(cat)) {
2726 marked_upgrade(aTHX_ cat, symptr);
2727 lookahead.flags |= FLAG_DO_UTF8;
2728 lookahead.strbeg = symptr->strbeg;
2729 utf8 = 1;
2730 start = SvPVX(cat);
2731 cur = start + SvCUR(cat);
2732 }
fc241834 2733 if (howlen == e_star) {
f337b084
TH
2734 if (utf8) goto string_copy;
2735 len = fromlen+1;
2736 }
2737 s = aptr;
2738 end = aptr + fromlen;
2739 fromlen = datumtype == 'Z' ? len-1 : len;
2740 while ((I32) fromlen > 0 && s < end) {
2741 s += UTF8SKIP(s);
2742 fromlen--;
2743 }
2744 if (s > end)
2745 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2746 if (utf8) {
fc241834 2747 len = fromlen;
f337b084
TH
2748 if (datumtype == 'Z') len++;
2749 fromlen = s-aptr;
2750 len += fromlen;
fc241834 2751
f337b084 2752 goto string_copy;
fc241834 2753 }
f337b084
TH
2754 fromlen = len - fromlen;
2755 if (datumtype == 'Z') fromlen--;
2756 if (howlen == e_star) {
2757 len = fromlen;
2758 if (datumtype == 'Z') len++;
fc241834 2759 }
f337b084 2760 GROWING(0, cat, start, cur, len);
fc241834 2761 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
f337b084 2762 datumtype | TYPE_IS_PACK))
5f5991a0 2763 Perl_croak(aTHX_ "panic: predicted utf8 length not available");
f337b084 2764 cur += fromlen;
a6ec74c1 2765 len -= fromlen;
f337b084
TH
2766 } else if (utf8) {
2767 if (howlen == e_star) {
2768 len = fromlen;
2769 if (datumtype == 'Z') len++;
a6ec74c1 2770 }
f337b084
TH
2771 if (len <= (I32) fromlen) {
2772 fromlen = len;
2773 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2774 }
fc241834 2775 /* assumes a byte expands to at most UTF8_EXPAND bytes on
3473cf63
RGS
2776 upgrade, so:
2777 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2778 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
f337b084
TH
2779 len -= fromlen;
2780 while (fromlen > 0) {
230e1fce 2781 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
f337b084
TH
2782 aptr++;
2783 fromlen--;
fc241834 2784 }
f337b084
TH
2785 } else {
2786 string_copy:
2787 if (howlen == e_star) {
2788 len = fromlen;
2789 if (datumtype == 'Z') len++;
2790 }
2791 if (len <= (I32) fromlen) {
2792 fromlen = len;
2793 if (datumtype == 'Z' && fromlen > 0) fromlen--;
a6ec74c1 2794 }
f337b084
TH
2795 GROWING(0, cat, start, cur, len);
2796 Copy(aptr, cur, fromlen, char);
2797 cur += fromlen;
2798 len -= fromlen;
a6ec74c1 2799 }
f337b084
TH
2800 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2801 cur += len;
3c4fb04a 2802 SvTAINT(cat);
a6ec74c1 2803 break;
f337b084 2804 }
a6ec74c1 2805 case 'B':
f337b084 2806 case 'b': {
b83604b4 2807 const char *str, *end;
f337b084
TH
2808 I32 l, field_len;
2809 U8 bits;
2810 bool utf8_source;
2811 U32 utf8_flags;
a6ec74c1 2812
fc241834 2813 fromstr = NEXTFROM;
b83604b4 2814 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2815 end = str + fromlen;
2816 if (DO_UTF8(fromstr)) {
2817 utf8_source = TRUE;
041457d9 2818 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2819 } else {
2820 utf8_source = FALSE;
2821 utf8_flags = 0; /* Unused, but keep compilers happy */
2822 }
2823 if (howlen == e_star) len = fromlen;
2824 field_len = (len+7)/8;
2825 GROWING(utf8, cat, start, cur, field_len);
2826 if (len > (I32)fromlen) len = fromlen;
2827 bits = 0;
2828 l = 0;
2829 if (datumtype == 'B')
2830 while (l++ < len) {
2831 if (utf8_source) {
95b63a38 2832 UV val = 0;
f337b084
TH
2833 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2834 bits |= val & 1;
2835 } else bits |= *str++ & 1;
2836 if (l & 7) bits <<= 1;
fc241834 2837 else {
f337b084
TH
2838 PUSH_BYTE(utf8, cur, bits);
2839 bits = 0;
a6ec74c1
JH
2840 }
2841 }
f337b084
TH
2842 else
2843 /* datumtype == 'b' */
2844 while (l++ < len) {
2845 if (utf8_source) {
95b63a38 2846 UV val = 0;
f337b084
TH
2847 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2848 if (val & 1) bits |= 0x80;
2849 } else if (*str++ & 1)
2850 bits |= 0x80;
2851 if (l & 7) bits >>= 1;
fc241834 2852 else {
f337b084
TH
2853 PUSH_BYTE(utf8, cur, bits);
2854 bits = 0;
a6ec74c1
JH
2855 }
2856 }
f337b084
TH
2857 l--;
2858 if (l & 7) {
fc241834 2859 if (datumtype == 'B')
f337b084 2860 bits <<= 7 - (l & 7);
fc241834 2861 else
f337b084
TH
2862 bits >>= 7 - (l & 7);
2863 PUSH_BYTE(utf8, cur, bits);
2864 l += 7;
a6ec74c1 2865 }
f337b084
TH
2866 /* Determine how many chars are left in the requested field */
2867 l /= 8;
2868 if (howlen == e_star) field_len = 0;
2869 else field_len -= l;
2870 Zero(cur, field_len, char);
2871 cur += field_len;
a6ec74c1 2872 break;
f337b084 2873 }
a6ec74c1 2874 case 'H':
f337b084 2875 case 'h': {
10516c54 2876 const char *str, *end;
f337b084
TH
2877 I32 l, field_len;
2878 U8 bits;
2879 bool utf8_source;
2880 U32 utf8_flags;
a6ec74c1 2881
fc241834 2882 fromstr = NEXTFROM;
10516c54 2883 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2884 end = str + fromlen;
2885 if (DO_UTF8(fromstr)) {
2886 utf8_source = TRUE;
041457d9 2887 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2888 } else {
2889 utf8_source = FALSE;
2890 utf8_flags = 0; /* Unused, but keep compilers happy */
2891 }
2892 if (howlen == e_star) len = fromlen;
2893 field_len = (len+1)/2;
2894 GROWING(utf8, cat, start, cur, field_len);
2895 if (!utf8 && len > (I32)fromlen) len = fromlen;
2896 bits = 0;
2897 l = 0;
2898 if (datumtype == 'H')
2899 while (l++ < len) {
2900 if (utf8_source) {
95b63a38 2901 UV val = 0;
f337b084
TH
2902 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2903 if (val < 256 && isALPHA(val))
2904 bits |= (val + 9) & 0xf;
a6ec74c1 2905 else
f337b084
TH
2906 bits |= val & 0xf;
2907 } else if (isALPHA(*str))
2908 bits |= (*str++ + 9) & 0xf;
2909 else
2910 bits |= *str++ & 0xf;
2911 if (l & 1) bits <<= 4;
fc241834 2912 else {
f337b084
TH
2913 PUSH_BYTE(utf8, cur, bits);
2914 bits = 0;
a6ec74c1
JH
2915 }
2916 }
f337b084
TH
2917 else
2918 while (l++ < len) {
2919 if (utf8_source) {
95b63a38 2920 UV val = 0;
f337b084
TH
2921 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2922 if (val < 256 && isALPHA(val))
2923 bits |= ((val + 9) & 0xf) << 4;
a6ec74c1 2924 else
f337b084
TH
2925 bits |= (val & 0xf) << 4;
2926 } else if (isALPHA(*str))
2927 bits |= ((*str++ + 9) & 0xf) << 4;
2928 else
2929 bits |= (*str++ & 0xf) << 4;
2930 if (l & 1) bits >>= 4;
fc241834 2931 else {
f337b084
TH
2932 PUSH_BYTE(utf8, cur, bits);
2933 bits = 0;
a6ec74c1 2934 }
fc241834 2935 }
f337b084
TH
2936 l--;
2937 if (l & 1) {
2938 PUSH_BYTE(utf8, cur, bits);
2939 l++;
2940 }
2941 /* Determine how many chars are left in the requested field */
2942 l /= 2;
2943 if (howlen == e_star) field_len = 0;
2944 else field_len -= l;
2945 Zero(cur, field_len, char);
2946 cur += field_len;
2947 break;
fc241834
RGS
2948 }
2949 case 'c':
f337b084
TH
2950 while (len-- > 0) {
2951 IV aiv;
2952 fromstr = NEXTFROM;
2953 aiv = SvIV(fromstr);
2954 if ((-128 > aiv || aiv > 127) &&
2955 ckWARN(WARN_PACK))
2956 Perl_warner(aTHX_ packWARN(WARN_PACK),
2957 "Character in 'c' format wrapped in pack");
585ec06d 2958 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
a6ec74c1
JH
2959 }
2960 break;
2961 case 'C':
f337b084
TH
2962 if (len == 0) {
2963 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2964 break;
2965 }
a6ec74c1 2966 while (len-- > 0) {
f337b084 2967 IV aiv;
a6ec74c1 2968 fromstr = NEXTFROM;
f337b084
TH
2969 aiv = SvIV(fromstr);
2970 if ((0 > aiv || aiv > 0xff) &&
fc241834
RGS
2971 ckWARN(WARN_PACK))
2972 Perl_warner(aTHX_ packWARN(WARN_PACK),
2973 "Character in 'C' format wrapped in pack");
1651fc44 2974 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
f337b084 2975 }
fc241834
RGS
2976 break;
2977 case 'W': {
2978 char *end;
670f1322 2979 U8 in_bytes = (U8)IN_BYTES;
fc241834
RGS
2980
2981 end = start+SvLEN(cat)-1;
2982 if (utf8) end -= UTF8_MAXLEN-1;
2983 while (len-- > 0) {
2984 UV auv;
2985 fromstr = NEXTFROM;
2986 auv = SvUV(fromstr);
2987 if (in_bytes) auv = auv % 0x100;
2988 if (utf8) {
2989 W_utf8:
2990 if (cur > end) {
2991 *cur = '\0';
b162af07 2992 SvCUR_set(cat, cur - start);
fc241834
RGS
2993
2994 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2995 end = start+SvLEN(cat)-UTF8_MAXLEN;
2996 }
230e1fce
NC
2997 cur = (char *) uvuni_to_utf8_flags((U8 *) cur,
2998 NATIVE_TO_UNI(auv),
041457d9 2999 warn_utf8 ?
230e1fce 3000 0 : UNICODE_ALLOW_ANY);
fc241834
RGS
3001 } else {
3002 if (auv >= 0x100) {
3003 if (!SvUTF8(cat)) {
3004 *cur = '\0';
b162af07 3005 SvCUR_set(cat, cur - start);
fc241834
RGS
3006 marked_upgrade(aTHX_ cat, symptr);
3007 lookahead.flags |= FLAG_DO_UTF8;
3008 lookahead.strbeg = symptr->strbeg;
3009 utf8 = 1;
3010 start = SvPVX(cat);
3011 cur = start + SvCUR(cat);
3012 end = start+SvLEN(cat)-UTF8_MAXLEN;
3013 goto W_utf8;
3014 }
3015 if (ckWARN(WARN_PACK))
3016 Perl_warner(aTHX_ packWARN(WARN_PACK),
3017 "Character in 'W' format wrapped in pack");
3018 auv &= 0xff;
3019 }
3020 if (cur >= end) {
3021 *cur = '\0';
b162af07 3022 SvCUR_set(cat, cur - start);
fc241834
RGS
3023 GROWING(0, cat, start, cur, len+1);
3024 end = start+SvLEN(cat)-1;
3025 }
fe2774ed 3026 *(U8 *) cur++ = (U8)auv;
a6ec74c1
JH
3027 }
3028 }
3029 break;
fc241834
RGS
3030 }
3031 case 'U': {
3032 char *end;
3033
3034 if (len == 0) {
3035 if (!(symptr->flags & FLAG_DO_UTF8)) {
3036 marked_upgrade(aTHX_ cat, symptr);
3037 lookahead.flags |= FLAG_DO_UTF8;
3038 lookahead.strbeg = symptr->strbeg;
3039 }
3040 utf8 = 0;
3041 goto no_change;
3042 }
3043
3044 end = start+SvLEN(cat);
3045 if (!utf8) end -= UTF8_MAXLEN;
a6ec74c1 3046 while (len-- > 0) {
fc241834 3047 UV auv;
a6ec74c1 3048 fromstr = NEXTFROM;
fc241834
RGS
3049 auv = SvUV(fromstr);
3050 if (utf8) {
230e1fce 3051 U8 buffer[UTF8_MAXLEN], *endb;
fc241834 3052 endb = uvuni_to_utf8_flags(buffer, auv,
041457d9 3053 warn_utf8 ?
fc241834
RGS
3054 0 : UNICODE_ALLOW_ANY);
3055 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
3056 *cur = '\0';
b162af07 3057 SvCUR_set(cat, cur - start);
fc241834
RGS
3058 GROWING(0, cat, start, cur,
3059 len+(endb-buffer)*UTF8_EXPAND);
3060 end = start+SvLEN(cat);
3061 }
64844641 3062 cur = bytes_to_uni(buffer, endb-buffer, cur);
fc241834
RGS
3063 } else {
3064 if (cur >= end) {
3065 *cur = '\0';
b162af07 3066 SvCUR_set(cat, cur - start);
fc241834
RGS
3067 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3068 end = start+SvLEN(cat)-UTF8_MAXLEN;
3069 }
230e1fce 3070 cur = (char *) uvuni_to_utf8_flags((U8 *) cur, auv,
041457d9 3071 warn_utf8 ?
230e1fce 3072 0 : UNICODE_ALLOW_ANY);
fc241834 3073 }
a6ec74c1 3074 }
a6ec74c1 3075 break;
fc241834 3076 }
a6ec74c1
JH
3077 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
3078 case 'f':
a6ec74c1 3079 while (len-- > 0) {
f337b084
TH
3080 float afloat;
3081 NV anv;
a6ec74c1 3082 fromstr = NEXTFROM;
f337b084 3083 anv = SvNV(fromstr);
5cdb9e01 3084#ifdef __VOS__
f337b084 3085 /* VOS does not automatically map a floating-point overflow
fc241834
RGS
3086 during conversion from double to float into infinity, so we
3087 do it by hand. This code should either be generalized for
3088 any OS that needs it, or removed if and when VOS implements
3089 posix-976 (suggestion to support mapping to infinity).
3090 Paul.Green@stratus.com 02-04-02. */
3722f0dc
PG
3091{
3092extern const float _float_constants[];
f337b084 3093 if (anv > FLT_MAX)
fc241834 3094 afloat = _float_constants[0]; /* single prec. inf. */
f337b084 3095 else if (anv < -FLT_MAX)
fc241834 3096 afloat = _float_constants[0]; /* single prec. inf. */
f337b084 3097 else afloat = (float) anv;
3722f0dc 3098}
f337b084 3099#else /* __VOS__ */
baf3cf9c 3100# if defined(VMS) && !defined(__IEEE_FP)
f337b084 3101 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
3102 * on Alpha; fake it if we don't have them.
3103 */
f337b084 3104 if (anv > FLT_MAX)
fc241834 3105 afloat = FLT_MAX;
f337b084 3106 else if (anv < -FLT_MAX)
fc241834 3107 afloat = -FLT_MAX;
f337b084 3108 else afloat = (float)anv;
baf3cf9c 3109# else
f337b084 3110 afloat = (float)anv;
baf3cf9c 3111# endif
f337b084 3112#endif /* __VOS__ */
1109a392 3113 DO_BO_PACK_N(afloat, float);
f337b084 3114 PUSH_VAR(utf8, cur, afloat);
a6ec74c1
JH
3115 }
3116 break;
3117 case 'd':
a6ec74c1 3118 while (len-- > 0) {
f337b084
TH
3119 double adouble;
3120 NV anv;
a6ec74c1 3121 fromstr = NEXTFROM;
f337b084 3122 anv = SvNV(fromstr);
5cdb9e01 3123#ifdef __VOS__
f337b084 3124 /* VOS does not automatically map a floating-point overflow
fc241834
RGS
3125 during conversion from long double to double into infinity,
3126 so we do it by hand. This code should either be generalized
3127 for any OS that needs it, or removed if and when VOS
3128 implements posix-976 (suggestion to support mapping to
3129 infinity). Paul.Green@stratus.com 02-04-02. */
3722f0dc
PG
3130{
3131extern const double _double_constants[];
f337b084 3132 if (anv > DBL_MAX)
fc241834 3133 adouble = _double_constants[0]; /* double prec. inf. */
f337b084 3134 else if (anv < -DBL_MAX)
fc241834 3135 adouble = _double_constants[0]; /* double prec. inf. */
f337b084 3136 else adouble = (double) anv;
3722f0dc 3137}
f337b084 3138#else /* __VOS__ */
baf3cf9c 3139# if defined(VMS) && !defined(__IEEE_FP)
f337b084 3140 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
3141 * on Alpha; fake it if we don't have them.
3142 */
f337b084 3143 if (anv > DBL_MAX)
fc241834 3144 adouble = DBL_MAX;
f337b084 3145 else if (anv < -DBL_MAX)
fc241834 3146 adouble = -DBL_MAX;
f337b084 3147 else adouble = (double)anv;
baf3cf9c 3148# else
f337b084 3149 adouble = (double)anv;
baf3cf9c 3150# endif
f337b084 3151#endif /* __VOS__ */
1109a392 3152 DO_BO_PACK_N(adouble, double);
f337b084 3153 PUSH_VAR(utf8, cur, adouble);
a6ec74c1
JH
3154 }
3155 break;
fc241834
RGS
3156 case 'F': {
3157 NV anv;
1109a392 3158 Zero(&anv, 1, NV); /* can be long double with unused bits */
92d41999
JH
3159 while (len-- > 0) {
3160 fromstr = NEXTFROM;
3161 anv = SvNV(fromstr);
1109a392 3162 DO_BO_PACK_N(anv, NV);
fc241834 3163 PUSH_VAR(utf8, cur, anv);
92d41999
JH
3164 }
3165 break;
fc241834 3166 }
92d41999 3167#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
fc241834
RGS
3168 case 'D': {
3169 long double aldouble;
1109a392
MHM
3170 /* long doubles can have unused bits, which may be nonzero */
3171 Zero(&aldouble, 1, long double);
92d41999
JH
3172 while (len-- > 0) {
3173 fromstr = NEXTFROM;
3174 aldouble = (long double)SvNV(fromstr);
1109a392 3175 DO_BO_PACK_N(aldouble, long double);
fc241834 3176 PUSH_VAR(utf8, cur, aldouble);
92d41999
JH
3177 }
3178 break;
fc241834 3179 }
92d41999 3180#endif
7212898e 3181#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3182 case 'n' | TYPE_IS_SHRIEKING:
7212898e 3183#endif
a6ec74c1
JH
3184 case 'n':
3185 while (len-- > 0) {
f337b084 3186 I16 ai16;
a6ec74c1 3187 fromstr = NEXTFROM;
ef108786 3188 ai16 = (I16)SvIV(fromstr);
a6ec74c1 3189#ifdef HAS_HTONS
ef108786 3190 ai16 = PerlSock_htons(ai16);
a6ec74c1 3191#endif
f337b084 3192 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3193 }
3194 break;
7212898e 3195#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3196 case 'v' | TYPE_IS_SHRIEKING:
7212898e 3197#endif
a6ec74c1
JH
3198 case 'v':
3199 while (len-- > 0) {
f337b084 3200 I16 ai16;
a6ec74c1 3201 fromstr = NEXTFROM;
ef108786 3202 ai16 = (I16)SvIV(fromstr);
a6ec74c1 3203#ifdef HAS_HTOVS
ef108786 3204 ai16 = htovs(ai16);
a6ec74c1 3205#endif
f337b084 3206 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3207 }
3208 break;
49704364 3209 case 'S' | TYPE_IS_SHRIEKING:
a6ec74c1 3210#if SHORTSIZE != SIZE16
fc241834 3211 while (len-- > 0) {
f337b084 3212 unsigned short aushort;
fc241834
RGS
3213 fromstr = NEXTFROM;
3214 aushort = SvUV(fromstr);
3215 DO_BO_PACK(aushort, s);
f337b084 3216 PUSH_VAR(utf8, cur, aushort);
fc241834 3217 }
49704364
LW
3218 break;
3219#else
3220 /* Fall through! */
a6ec74c1 3221#endif
49704364 3222 case 'S':
fc241834 3223 while (len-- > 0) {
f337b084 3224 U16 au16;
fc241834
RGS
3225 fromstr = NEXTFROM;
3226 au16 = (U16)SvUV(fromstr);
3227 DO_BO_PACK(au16, 16);
f337b084 3228 PUSH16(utf8, cur, &au16);
a6ec74c1
JH
3229 }
3230 break;
49704364 3231 case 's' | TYPE_IS_SHRIEKING:
a6ec74c1 3232#if SHORTSIZE != SIZE16
fc241834 3233 while (len-- > 0) {
f337b084 3234 short ashort;
fc241834
RGS
3235 fromstr = NEXTFROM;
3236 ashort = SvIV(fromstr);
3237 DO_BO_PACK(ashort, s);
f337b084 3238 PUSH_VAR(utf8, cur, ashort);
a6ec74c1 3239 }
49704364
LW
3240 break;
3241#else
3242 /* Fall through! */
a6ec74c1 3243#endif
49704364
LW
3244 case 's':
3245 while (len-- > 0) {
f337b084 3246 I16 ai16;
49704364 3247 fromstr = NEXTFROM;
ef108786
MHM
3248 ai16 = (I16)SvIV(fromstr);
3249 DO_BO_PACK(ai16, 16);
f337b084 3250 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3251 }
3252 break;
3253 case 'I':
49704364 3254 case 'I' | TYPE_IS_SHRIEKING:
a6ec74c1 3255 while (len-- > 0) {
f337b084 3256 unsigned int auint;
a6ec74c1
JH
3257 fromstr = NEXTFROM;
3258 auint = SvUV(fromstr);
1109a392 3259 DO_BO_PACK(auint, i);
f337b084 3260 PUSH_VAR(utf8, cur, auint);
a6ec74c1
JH
3261 }
3262 break;
92d41999
JH
3263 case 'j':
3264 while (len-- > 0) {
f337b084 3265 IV aiv;
92d41999
JH
3266 fromstr = NEXTFROM;
3267 aiv = SvIV(fromstr);
1109a392
MHM
3268#if IVSIZE == INTSIZE
3269 DO_BO_PACK(aiv, i);
3270#elif IVSIZE == LONGSIZE
3271 DO_BO_PACK(aiv, l);
3272#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
3273 DO_BO_PACK(aiv, 64);
f337b084
TH
3274#else
3275 Perl_croak(aTHX_ "'j' not supported on this platform");
1109a392 3276#endif
f337b084 3277 PUSH_VAR(utf8, cur, aiv);
92d41999
JH
3278 }
3279 break;
3280 case 'J':
3281 while (len-- > 0) {
f337b084 3282 UV auv;
92d41999
JH
3283 fromstr = NEXTFROM;
3284 auv = SvUV(fromstr);
1109a392
MHM
3285#if UVSIZE == INTSIZE
3286 DO_BO_PACK(auv, i);
3287#elif UVSIZE == LONGSIZE
3288 DO_BO_PACK(auv, l);
3289#elif defined(HAS_QUAD) && UVSIZE == U64SIZE
3290 DO_BO_PACK(auv, 64);
f337b084
TH
3291#else
3292 Perl_croak(aTHX_ "'J' not supported on this platform");
1109a392 3293#endif
f337b084 3294 PUSH_VAR(utf8, cur, auv);
92d41999
JH
3295 }
3296 break;
a6ec74c1
JH
3297 case 'w':
3298 while (len-- > 0) {
f337b084 3299 NV anv;
a6ec74c1 3300 fromstr = NEXTFROM;
15e9f109 3301 anv = SvNV(fromstr);
a6ec74c1 3302
f337b084
TH
3303 if (anv < 0) {
3304 *cur = '\0';
b162af07 3305 SvCUR_set(cat, cur - start);
49704364 3306 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
f337b084 3307 }
a6ec74c1 3308
196b62db
NC
3309 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
3310 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
3311 any negative IVs will have already been got by the croak()
3312 above. IOK is untrue for fractions, so we test them
3313 against UV_MAX_P1. */
f337b084
TH
3314 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
3315 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
a6ec74c1 3316 char *in = buf + sizeof(buf);
196b62db 3317 UV auv = SvUV(fromstr);
a6ec74c1
JH
3318
3319 do {
eb160463 3320 *--in = (char)((auv & 0x7f) | 0x80);
a6ec74c1
JH
3321 auv >>= 7;
3322 } while (auv);
3323 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
3324 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3325 in, (buf + sizeof(buf)) - in);
3326 } else if (SvPOKp(fromstr))
3327 goto w_string;
a6ec74c1 3328 else if (SvNOKp(fromstr)) {
0258719b
NC
3329 /* 10**NV_MAX_10_EXP is the largest power of 10
3330 so 10**(NV_MAX_10_EXP+1) is definately unrepresentable
3331 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
3332 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
3333 And with that many bytes only Inf can overflow.
8f8d40ab
PG
3334 Some C compilers are strict about integral constant
3335 expressions so we conservatively divide by a slightly
3336 smaller integer instead of multiplying by the exact
3337 floating-point value.
0258719b
NC
3338 */
3339#ifdef NV_MAX_10_EXP
f337b084 3340 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
8f8d40ab 3341 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
0258719b 3342#else
f337b084 3343 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
8f8d40ab 3344 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
0258719b 3345#endif
a6ec74c1
JH
3346 char *in = buf + sizeof(buf);
3347
8b6e33c7 3348 anv = Perl_floor(anv);
a6ec74c1 3349 do {
8b6e33c7 3350 const NV next = Perl_floor(anv / 128);
a6ec74c1 3351 if (in <= buf) /* this cannot happen ;-) */
49704364 3352 Perl_croak(aTHX_ "Cannot compress integer in pack");
0258719b 3353 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
15e9f109
NC
3354 anv = next;
3355 } while (anv > 0);
a6ec74c1 3356 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
3357 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3358 in, (buf + sizeof(buf)) - in);
3359 } else {
8b6e33c7
AL
3360 const char *from;
3361 char *result, *in;
735b914b
JH
3362 SV *norm;
3363 STRLEN len;
3364 bool done;
3365
f337b084 3366 w_string:
735b914b 3367 /* Copy string and check for compliance */
349d4f2f 3368 from = SvPV_const(fromstr, len);
735b914b 3369 if ((norm = is_an_int(from, len)) == NULL)
49704364 3370 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
735b914b 3371
a02a5408 3372 Newx(result, len, char);
735b914b
JH
3373 in = result + len;
3374 done = FALSE;
f337b084 3375 while (!done) *--in = div128(norm, &done) | 0x80;
735b914b 3376 result[len - 1] &= 0x7F; /* clear continue bit */
f337b084
TH
3377 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3378 in, (result + len) - in);
735b914b
JH
3379 Safefree(result);
3380 SvREFCNT_dec(norm); /* free norm */
fc241834 3381 }
a6ec74c1
JH
3382 }
3383 break;
3384 case 'i':
49704364 3385 case 'i' | TYPE_IS_SHRIEKING:
a6ec74c1 3386 while (len-- > 0) {
f337b084 3387 int aint;
a6ec74c1
JH
3388 fromstr = NEXTFROM;
3389 aint = SvIV(fromstr);
1109a392 3390 DO_BO_PACK(aint, i);
f337b084 3391 PUSH_VAR(utf8, cur, aint);
a6ec74c1
JH
3392 }
3393 break;
7212898e 3394#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3395 case 'N' | TYPE_IS_SHRIEKING:
7212898e 3396#endif
a6ec74c1
JH
3397 case 'N':
3398 while (len-- > 0) {
f337b084 3399 U32 au32;
a6ec74c1 3400 fromstr = NEXTFROM;
ef108786 3401 au32 = SvUV(fromstr);
a6ec74c1 3402#ifdef HAS_HTONL
ef108786 3403 au32 = PerlSock_htonl(au32);
a6ec74c1 3404#endif
f337b084 3405 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3406 }
3407 break;
7212898e 3408#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3409 case 'V' | TYPE_IS_SHRIEKING:
7212898e 3410#endif
a6ec74c1
JH
3411 case 'V':
3412 while (len-- > 0) {
f337b084 3413 U32 au32;
a6ec74c1 3414 fromstr = NEXTFROM;
ef108786 3415 au32 = SvUV(fromstr);
a6ec74c1 3416#ifdef HAS_HTOVL
ef108786 3417 au32 = htovl(au32);
a6ec74c1 3418#endif
f337b084 3419 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3420 }
3421 break;
49704364 3422 case 'L' | TYPE_IS_SHRIEKING:
a6ec74c1 3423#if LONGSIZE != SIZE32
fc241834 3424 while (len-- > 0) {
f337b084 3425 unsigned long aulong;
fc241834
RGS
3426 fromstr = NEXTFROM;
3427 aulong = SvUV(fromstr);
3428 DO_BO_PACK(aulong, l);
f337b084 3429 PUSH_VAR(utf8, cur, aulong);
a6ec74c1 3430 }
49704364
LW
3431 break;
3432#else
3433 /* Fall though! */
a6ec74c1 3434#endif
49704364 3435 case 'L':
fc241834 3436 while (len-- > 0) {
f337b084 3437 U32 au32;
fc241834
RGS
3438 fromstr = NEXTFROM;
3439 au32 = SvUV(fromstr);
3440 DO_BO_PACK(au32, 32);
f337b084 3441 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3442 }
3443 break;
49704364 3444 case 'l' | TYPE_IS_SHRIEKING:
a6ec74c1 3445#if LONGSIZE != SIZE32
fc241834 3446 while (len-- > 0) {
f337b084 3447 long along;
fc241834
RGS
3448 fromstr = NEXTFROM;
3449 along = SvIV(fromstr);
3450 DO_BO_PACK(along, l);
f337b084 3451 PUSH_VAR(utf8, cur, along);
a6ec74c1 3452 }
49704364
LW
3453 break;
3454#else
3455 /* Fall though! */
a6ec74c1 3456#endif
49704364
LW
3457 case 'l':
3458 while (len-- > 0) {
f337b084 3459 I32 ai32;
49704364 3460 fromstr = NEXTFROM;
ef108786
MHM
3461 ai32 = SvIV(fromstr);
3462 DO_BO_PACK(ai32, 32);
f337b084 3463 PUSH32(utf8, cur, &ai32);
a6ec74c1
JH
3464 }
3465 break;
3466#ifdef HAS_QUAD
3467 case 'Q':
3468 while (len-- > 0) {
f337b084 3469 Uquad_t auquad;
a6ec74c1 3470 fromstr = NEXTFROM;
f337b084 3471 auquad = (Uquad_t) SvUV(fromstr);
1109a392 3472 DO_BO_PACK(auquad, 64);
f337b084 3473 PUSH_VAR(utf8, cur, auquad);
a6ec74c1
JH
3474 }
3475 break;
3476 case 'q':
3477 while (len-- > 0) {
f337b084 3478 Quad_t aquad;
a6ec74c1
JH
3479 fromstr = NEXTFROM;
3480 aquad = (Quad_t)SvIV(fromstr);
1109a392 3481 DO_BO_PACK(aquad, 64);
f337b084 3482 PUSH_VAR(utf8, cur, aquad);
a6ec74c1
JH
3483 }
3484 break;
f337b084 3485#endif /* HAS_QUAD */
a6ec74c1
JH
3486 case 'P':
3487 len = 1; /* assume SV is correct length */
f337b084 3488 GROWING(utf8, cat, start, cur, sizeof(char *));
49704364 3489 /* Fall through! */
a6ec74c1
JH
3490 case 'p':
3491 while (len-- > 0) {
83003860 3492 const char *aptr;
f337b084 3493
a6ec74c1 3494 fromstr = NEXTFROM;
28a4f200 3495 SvGETMAGIC(fromstr);
3496 if (!SvOK(fromstr)) aptr = NULL;
a6ec74c1 3497 else {
a6ec74c1
JH
3498 /* XXX better yet, could spirit away the string to
3499 * a safe spot and hang on to it until the result
3500 * of pack() (and all copies