This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
upgrade bytes_to_uni
[perl5.git] / pp_pack.c
CommitLineData
a6ec74c1
JH
1/* pp_pack.c
2 *
4bb101f2 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
acde74e1 4 * 2000, 2001, 2002, 2003, 2004, 2005, 2006, by Larry Wall and others
a6ec74c1
JH
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
9 */
10
d31a8517
AT
11/*
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
16 * some salt.
17 */
18
166f8a29
DM
19/* This file contains pp ("push/pop") functions that
20 * execute the opcodes that make up a perl program. A typical pp function
21 * expects to find its arguments on the stack, and usually pushes its
22 * results onto the stack, hence the 'pp' terminology. Each OP structure
23 * contains a pointer to the relevant pp_foo() function.
24 *
25 * This particular file just contains pp_pack() and pp_unpack(). See the
26 * other pp*.c files for the rest of the pp_ functions.
27 */
28
29
a6ec74c1
JH
30#include "EXTERN.h"
31#define PERL_IN_PP_PACK_C
32#include "perl.h"
33
f7fe979e
AL
34/* Types used by pack/unpack */
35typedef enum {
36 e_no_len, /* no length */
37 e_number, /* number, [] */
38 e_star /* asterisk */
39} howlen_t;
40
41typedef struct tempsym {
42 const char* patptr; /* current template char */
43 const char* patend; /* one after last char */
44 const char* grpbeg; /* 1st char of ()-group */
45 const char* grpend; /* end of ()-group */
46 I32 code; /* template code (!<>) */
47 I32 length; /* length/repeat count */
48 howlen_t howlen; /* how length is given */
49 int level; /* () nesting level */
50 U32 flags; /* /=4, comma=2, pack=1 */
51 /* and group modifiers */
52 STRLEN strbeg; /* offset of group start */
53 struct tempsym *previous; /* previous group */
54} tempsym_t;
55
56#define TEMPSYM_INIT(symptr, p, e, f) \
57 STMT_START { \
58 (symptr)->patptr = (p); \
59 (symptr)->patend = (e); \
60 (symptr)->grpbeg = NULL; \
61 (symptr)->grpend = NULL; \
62 (symptr)->grpend = NULL; \
63 (symptr)->code = 0; \
64 (symptr)->length = 0; \
65 (symptr)->howlen = 0; \
66 (symptr)->level = 0; \
67 (symptr)->flags = (f); \
68 (symptr)->strbeg = 0; \
69 (symptr)->previous = NULL; \
70 } STMT_END
71
7212898e 72#if PERL_VERSION >= 9
f337b084
TH
73# define PERL_PACK_CAN_BYTEORDER
74# define PERL_PACK_CAN_SHRIEKSIGN
75#endif
76
77#ifndef CHAR_BIT
78# define CHAR_BIT 8
7212898e 79#endif
3473cf63
RGS
80/* Maximum number of bytes to which a byte can grow due to upgrade */
81#define UTF8_EXPAND 2
7212898e 82
a6ec74c1 83/*
a6ec74c1
JH
84 * Offset for integer pack/unpack.
85 *
86 * On architectures where I16 and I32 aren't really 16 and 32 bits,
87 * which for now are all Crays, pack and unpack have to play games.
88 */
89
90/*
91 * These values are required for portability of pack() output.
92 * If they're not right on your machine, then pack() and unpack()
93 * wouldn't work right anyway; you'll need to apply the Cray hack.
94 * (I'd like to check them with #if, but you can't use sizeof() in
95 * the preprocessor.) --???
96 */
97/*
98 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
99 defines are now in config.h. --Andy Dougherty April 1998
100 */
101#define SIZE16 2
102#define SIZE32 4
103
104/* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
105 --jhi Feb 1999 */
106
1109a392
MHM
107#if U16SIZE > SIZE16 || U32SIZE > SIZE32
108# if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
08ca2aa3
TH
109# define OFF16(p) ((char*)(p))
110# define OFF32(p) ((char*)(p))
a6ec74c1 111# else
1109a392 112# if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
a6ec74c1
JH
113# define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
114# define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
115# else
08ca2aa3 116 ++++ bad cray byte order
a6ec74c1
JH
117# endif
118# endif
a6ec74c1 119#else
08ca2aa3
TH
120# define OFF16(p) ((char *) (p))
121# define OFF32(p) ((char *) (p))
a6ec74c1
JH
122#endif
123
f337b084
TH
124/* Only to be used inside a loop (see the break) */
125#define SHIFT16(utf8, s, strend, p, datumtype) STMT_START { \
126 if (utf8) { \
127 if (!uni_to_bytes(aTHX_ &(s), strend, OFF16(p), SIZE16, datumtype)) break; \
128 } else { \
129 Copy(s, OFF16(p), SIZE16, char); \
130 (s) += SIZE16; \
131 } \
132} STMT_END
133
134/* Only to be used inside a loop (see the break) */
135#define SHIFT32(utf8, s, strend, p, datumtype) STMT_START { \
136 if (utf8) { \
137 if (!uni_to_bytes(aTHX_ &(s), strend, OFF32(p), SIZE32, datumtype)) break; \
138 } else { \
139 Copy(s, OFF32(p), SIZE32, char); \
140 (s) += SIZE32; \
141 } \
142} STMT_END
143
144#define PUSH16(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF16(p), SIZE16)
145#define PUSH32(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF32(p), SIZE32)
08ca2aa3
TH
146
147/* Only to be used inside a loop (see the break) */
f337b084 148#define SHIFT_VAR(utf8, s, strend, var, datumtype) \
08ca2aa3
TH
149STMT_START { \
150 if (utf8) { \
f337b084
TH
151 if (!uni_to_bytes(aTHX_ &s, strend, \
152 (char *) &var, sizeof(var), datumtype)) break;\
08ca2aa3
TH
153 } else { \
154 Copy(s, (char *) &var, sizeof(var), char); \
155 s += sizeof(var); \
156 } \
08ca2aa3
TH
157} STMT_END
158
f337b084 159#define PUSH_VAR(utf8, aptr, var) \
230e1fce 160 PUSH_BYTES(utf8, aptr, &(var), sizeof(var))
f337b084 161
49704364
LW
162/* Avoid stack overflow due to pathological templates. 100 should be plenty. */
163#define MAX_SUB_TEMPLATE_LEVEL 100
164
66c611c5 165/* flags (note that type modifiers can also be used as flags!) */
f337b084
TH
166#define FLAG_WAS_UTF8 0x40
167#define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
49704364 168#define FLAG_UNPACK_ONLY_ONE 0x10
f337b084 169#define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
49704364
LW
170#define FLAG_SLASH 0x04
171#define FLAG_COMMA 0x02
172#define FLAG_PACK 0x01
173
a6ec74c1
JH
174STATIC SV *
175S_mul128(pTHX_ SV *sv, U8 m)
176{
177 STRLEN len;
178 char *s = SvPV(sv, len);
179 char *t;
a6ec74c1
JH
180
181 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
396482e1 182 SV * const tmpNew = newSVpvs("0000000000");
a6ec74c1
JH
183
184 sv_catsv(tmpNew, sv);
185 SvREFCNT_dec(sv); /* free old sv */
186 sv = tmpNew;
187 s = SvPV(sv, len);
188 }
189 t = s + len - 1;
190 while (!*t) /* trailing '\0'? */
191 t--;
192 while (t > s) {
f7fe979e 193 const U32 i = ((*t - '0') << 7) + m;
eb160463
GS
194 *(t--) = '0' + (char)(i % 10);
195 m = (char)(i / 10);
a6ec74c1
JH
196 }
197 return (sv);
198}
199
200/* Explosives and implosives. */
201
202#if 'I' == 73 && 'J' == 74
203/* On an ASCII/ISO kind of system */
204#define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
205#else
206/*
207 Some other sort of character set - use memchr() so we don't match
208 the null byte.
209 */
210#define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
211#endif
212
66c611c5 213/* type modifiers */
62f95557 214#define TYPE_IS_SHRIEKING 0x100
1109a392
MHM
215#define TYPE_IS_BIG_ENDIAN 0x200
216#define TYPE_IS_LITTLE_ENDIAN 0x400
f337b084 217#define TYPE_IS_PACK 0x800
1109a392 218#define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
66c611c5 219#define TYPE_MODIFIERS(t) ((t) & ~0xFF)
1109a392
MHM
220#define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
221
7212898e 222#ifdef PERL_PACK_CAN_SHRIEKSIGN
28be1210 223# define SHRIEKING_ALLOWED_TYPES "sSiIlLxXnNvV@."
7212898e 224#else
28be1210 225# define SHRIEKING_ALLOWED_TYPES "sSiIlLxX"
7212898e
NC
226#endif
227
228#ifndef PERL_PACK_CAN_BYTEORDER
229/* Put "can't" first because it is shorter */
230# define TYPE_ENDIANNESS(t) 0
231# define TYPE_NO_ENDIANNESS(t) (t)
232
233# define ENDIANNESS_ALLOWED_TYPES ""
234
235# define DO_BO_UNPACK(var, type)
236# define DO_BO_PACK(var, type)
07409e01
NC
237# define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast)
238# define DO_BO_PACK_PTR(var, type, pre_cast, post_cast)
7212898e
NC
239# define DO_BO_UNPACK_N(var, type)
240# define DO_BO_PACK_N(var, type)
241# define DO_BO_UNPACK_P(var)
242# define DO_BO_PACK_P(var)
45f723e1
NC
243# define DO_BO_UNPACK_PC(var)
244# define DO_BO_PACK_PC(var)
66c611c5 245
f337b084 246#else /* PERL_PACK_CAN_BYTEORDER */
7212898e
NC
247
248# define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
249# define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
250
251# define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
252
253# define DO_BO_UNPACK(var, type) \
1109a392 254 STMT_START { \
66c611c5 255 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
256 case TYPE_IS_BIG_ENDIAN: var = my_betoh ## type (var); break; \
257 case TYPE_IS_LITTLE_ENDIAN: var = my_letoh ## type (var); break; \
258 default: break; \
259 } \
260 } STMT_END
261
7212898e 262# define DO_BO_PACK(var, type) \
1109a392 263 STMT_START { \
66c611c5 264 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
265 case TYPE_IS_BIG_ENDIAN: var = my_htobe ## type (var); break; \
266 case TYPE_IS_LITTLE_ENDIAN: var = my_htole ## type (var); break; \
267 default: break; \
268 } \
269 } STMT_END
270
07409e01 271# define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast) \
1109a392 272 STMT_START { \
66c611c5 273 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392 274 case TYPE_IS_BIG_ENDIAN: \
07409e01 275 var = (post_cast*) my_betoh ## type ((pre_cast) var); \
1109a392
MHM
276 break; \
277 case TYPE_IS_LITTLE_ENDIAN: \
07409e01 278 var = (post_cast *) my_letoh ## type ((pre_cast) var); \
1109a392
MHM
279 break; \
280 default: \
281 break; \
282 } \
283 } STMT_END
284
07409e01 285# define DO_BO_PACK_PTR(var, type, pre_cast, post_cast) \
1109a392 286 STMT_START { \
66c611c5 287 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392 288 case TYPE_IS_BIG_ENDIAN: \
07409e01 289 var = (post_cast *) my_htobe ## type ((pre_cast) var); \
1109a392
MHM
290 break; \
291 case TYPE_IS_LITTLE_ENDIAN: \
07409e01 292 var = (post_cast *) my_htole ## type ((pre_cast) var); \
1109a392
MHM
293 break; \
294 default: \
295 break; \
296 } \
297 } STMT_END
298
7212898e 299# define BO_CANT_DOIT(action, type) \
66c611c5
MHM
300 STMT_START { \
301 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
302 case TYPE_IS_BIG_ENDIAN: \
303 Perl_croak(aTHX_ "Can't %s big-endian %ss on this " \
304 "platform", #action, #type); \
305 break; \
306 case TYPE_IS_LITTLE_ENDIAN: \
307 Perl_croak(aTHX_ "Can't %s little-endian %ss on this " \
308 "platform", #action, #type); \
309 break; \
310 default: \
311 break; \
312 } \
313 } STMT_END
314
7212898e 315# if PTRSIZE == INTSIZE
07409e01
NC
316# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, i, int, void)
317# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, i, int, void)
318# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, i, int, char)
319# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, i, int, char)
7212898e 320# elif PTRSIZE == LONGSIZE
07409e01
NC
321# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, long, void)
322# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, long, void)
323# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, long, char)
324# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, long, char)
2b00a750
JD
325# elif PTRSIZE == IVSIZE
326# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, IV, void)
327# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, IV, void)
328# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, IV, char)
329# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, IV, char)
7212898e
NC
330# else
331# define DO_BO_UNPACK_P(var) BO_CANT_DOIT(unpack, pointer)
332# define DO_BO_PACK_P(var) BO_CANT_DOIT(pack, pointer)
45f723e1
NC
333# define DO_BO_UNPACK_PC(var) BO_CANT_DOIT(unpack, pointer)
334# define DO_BO_PACK_PC(var) BO_CANT_DOIT(pack, pointer)
7212898e 335# endif
1109a392 336
7212898e 337# if defined(my_htolen) && defined(my_letohn) && \
1109a392 338 defined(my_htoben) && defined(my_betohn)
7212898e 339# define DO_BO_UNPACK_N(var, type) \
1109a392 340 STMT_START { \
66c611c5 341 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
342 case TYPE_IS_BIG_ENDIAN: my_betohn(&var, sizeof(type)); break;\
343 case TYPE_IS_LITTLE_ENDIAN: my_letohn(&var, sizeof(type)); break;\
344 default: break; \
345 } \
346 } STMT_END
347
7212898e 348# define DO_BO_PACK_N(var, type) \
1109a392 349 STMT_START { \
66c611c5 350 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
351 case TYPE_IS_BIG_ENDIAN: my_htoben(&var, sizeof(type)); break;\
352 case TYPE_IS_LITTLE_ENDIAN: my_htolen(&var, sizeof(type)); break;\
353 default: break; \
354 } \
355 } STMT_END
7212898e
NC
356# else
357# define DO_BO_UNPACK_N(var, type) BO_CANT_DOIT(unpack, type)
358# define DO_BO_PACK_N(var, type) BO_CANT_DOIT(pack, type)
359# endif
360
f337b084 361#endif /* PERL_PACK_CAN_BYTEORDER */
62f95557 362
78d46eaa 363#define PACK_SIZE_CANNOT_CSUM 0x80
f337b084 364#define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
78d46eaa
NC
365#define PACK_SIZE_MASK 0x3F
366
78d46eaa
NC
367/* These tables are regenerated by genpacksizetables.pl (and then hand pasted
368 in). You're unlikely ever to need to regenerate them. */
a7a3cfaa
TH
369
370#if TYPE_IS_SHRIEKING != 0x100
371 ++++shriek offset should be 256
372#endif
373
374typedef U8 packprops_t;
78d46eaa
NC
375#if 'J'-'I' == 1
376/* ASCII */
f43a9a31 377STATIC const packprops_t packprops[512] = {
a7a3cfaa
TH
378 /* normal */
379 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
380 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
381 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
382 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
383 0, 0, 0,
384 /* C */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
80a13697 385#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
a7a3cfaa 386 /* D */ LONG_DOUBLESIZE,
80a13697 387#else
a7a3cfaa 388 0,
80a13697 389#endif
a7a3cfaa
TH
390 0,
391 /* F */ NVSIZE,
392 0, 0,
393 /* I */ sizeof(unsigned int),
394 /* J */ UVSIZE,
395 0,
396 /* L */ SIZE32,
397 0,
398 /* N */ SIZE32,
399 0, 0,
80a13697 400#if defined(HAS_QUAD)
a7a3cfaa 401 /* Q */ sizeof(Uquad_t),
80a13697 402#else
a7a3cfaa 403 0,
80a13697 404#endif
a7a3cfaa
TH
405 0,
406 /* S */ SIZE16,
407 0,
408 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
409 /* V */ SIZE32,
410 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
411 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
412 /* c */ sizeof(char),
413 /* d */ sizeof(double),
414 0,
415 /* f */ sizeof(float),
416 0, 0,
417 /* i */ sizeof(int),
418 /* j */ IVSIZE,
419 0,
420 /* l */ SIZE32,
421 0,
422 /* n */ SIZE16,
423 0,
424 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
80a13697 425#if defined(HAS_QUAD)
a7a3cfaa 426 /* q */ sizeof(Quad_t),
80a13697 427#else
a7a3cfaa 428 0,
80a13697 429#endif
a7a3cfaa
TH
430 0,
431 /* s */ SIZE16,
432 0, 0,
433 /* v */ SIZE16,
434 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
435 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
436 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
437 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
438 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
439 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
440 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
441 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
442 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
443 0, 0, 0, 0, 0, 0, 0, 0,
444 /* shrieking */
445 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
446 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
447 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
448 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
449 0, 0, 0, 0, 0, 0, 0, 0, 0,
450 /* I */ sizeof(unsigned int),
451 0, 0,
452 /* L */ sizeof(unsigned long),
453 0,
7212898e 454#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 455 /* N */ SIZE32,
7212898e 456#else
a7a3cfaa 457 0,
7212898e 458#endif
a7a3cfaa
TH
459 0, 0, 0, 0,
460 /* S */ sizeof(unsigned short),
461 0, 0,
7212898e 462#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 463 /* V */ SIZE32,
7212898e 464#else
a7a3cfaa 465 0,
7212898e 466#endif
a7a3cfaa
TH
467 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
468 0, 0,
469 /* i */ sizeof(int),
470 0, 0,
471 /* l */ sizeof(long),
472 0,
7212898e 473#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 474 /* n */ SIZE16,
7212898e 475#else
a7a3cfaa 476 0,
7212898e 477#endif
a7a3cfaa
TH
478 0, 0, 0, 0,
479 /* s */ sizeof(short),
480 0, 0,
7212898e 481#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 482 /* v */ SIZE16,
7212898e 483#else
a7a3cfaa 484 0,
7212898e 485#endif
a7a3cfaa
TH
486 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
487 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
488 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
489 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
490 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
491 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
492 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
493 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
494 0, 0, 0, 0, 0, 0, 0, 0, 0
78d46eaa
NC
495};
496#else
497/* EBCDIC (or bust) */
f43a9a31 498STATIC const packprops_t packprops[512] = {
a7a3cfaa
TH
499 /* normal */
500 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
501 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
502 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
503 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
504 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
505 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
506 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
507 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
508 0, 0, 0,
509 /* c */ sizeof(char),
510 /* d */ sizeof(double),
511 0,
512 /* f */ sizeof(float),
513 0, 0,
514 /* i */ sizeof(int),
515 0, 0, 0, 0, 0, 0, 0,
516 /* j */ IVSIZE,
517 0,
518 /* l */ SIZE32,
519 0,
520 /* n */ SIZE16,
521 0,
522 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
80a13697 523#if defined(HAS_QUAD)
a7a3cfaa 524 /* q */ sizeof(Quad_t),
80a13697 525#else
a7a3cfaa 526 0,
80a13697 527#endif
a7a3cfaa
TH
528 0, 0, 0, 0, 0, 0, 0, 0, 0,
529 /* s */ SIZE16,
530 0, 0,
531 /* v */ SIZE16,
532 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
533 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
534 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
535 /* C */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
80a13697 536#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
a7a3cfaa 537 /* D */ LONG_DOUBLESIZE,
80a13697 538#else
a7a3cfaa 539 0,
80a13697 540#endif
a7a3cfaa
TH
541 0,
542 /* F */ NVSIZE,
543 0, 0,
544 /* I */ sizeof(unsigned int),
545 0, 0, 0, 0, 0, 0, 0,
546 /* J */ UVSIZE,
547 0,
548 /* L */ SIZE32,
549 0,
550 /* N */ SIZE32,
551 0, 0,
80a13697 552#if defined(HAS_QUAD)
a7a3cfaa 553 /* Q */ sizeof(Uquad_t),
80a13697 554#else
a7a3cfaa 555 0,
80a13697 556#endif
a7a3cfaa
TH
557 0, 0, 0, 0, 0, 0, 0, 0, 0,
558 /* S */ SIZE16,
559 0,
560 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
561 /* V */ SIZE32,
562 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
563 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
564 0, 0, 0, 0, 0, 0, 0, 0, 0,
565 /* shrieking */
566 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
567 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
568 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
569 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
570 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
571 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
572 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
573 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
574 0, 0, 0, 0, 0, 0, 0, 0, 0,
575 /* i */ sizeof(int),
576 0, 0, 0, 0, 0, 0, 0, 0, 0,
577 /* l */ sizeof(long),
578 0,
7212898e 579#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 580 /* n */ SIZE16,
7212898e 581#else
a7a3cfaa 582 0,
7212898e 583#endif
a7a3cfaa
TH
584 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
585 /* s */ sizeof(short),
586 0, 0,
7212898e 587#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 588 /* v */ SIZE16,
7212898e 589#else
a7a3cfaa 590 0,
7212898e 591#endif
a7a3cfaa
TH
592 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
593 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
594 0, 0, 0,
595 /* I */ sizeof(unsigned int),
596 0, 0, 0, 0, 0, 0, 0, 0, 0,
597 /* L */ sizeof(unsigned long),
598 0,
7212898e 599#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 600 /* N */ SIZE32,
7212898e 601#else
a7a3cfaa 602 0,
7212898e 603#endif
a7a3cfaa
TH
604 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
605 /* S */ sizeof(unsigned short),
606 0, 0,
7212898e 607#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 608 /* V */ SIZE32,
7212898e 609#else
a7a3cfaa 610 0,
7212898e 611#endif
a7a3cfaa
TH
612 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
613 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
78d46eaa
NC
614};
615#endif
616
08ca2aa3 617STATIC U8
f7fe979e 618uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
08ca2aa3 619{
08ca2aa3 620 STRLEN retlen;
0bcc34c2 621 UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
f337b084 622 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
08ca2aa3
TH
623 /* We try to process malformed UTF-8 as much as possible (preferrably with
624 warnings), but these two mean we make no progress in the string and
625 might enter an infinite loop */
626 if (retlen == (STRLEN) -1 || retlen == 0)
f337b084
TH
627 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
628 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3 629 if (val >= 0x100) {
f337b084 630 if (ckWARN(WARN_UNPACK))
08ca2aa3
TH
631 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
632 "Character in '%c' format wrapped in unpack",
f337b084 633 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3
TH
634 val &= 0xff;
635 }
636 *s += retlen;
fe2774ed 637 return (U8)val;
08ca2aa3
TH
638}
639
f337b084
TH
640#define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
641 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
08ca2aa3
TH
642 *(U8 *)(s)++)
643
644STATIC bool
f7fe979e 645uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
08ca2aa3
TH
646{
647 UV val;
648 STRLEN retlen;
f7fe979e 649 const char *from = *s;
08ca2aa3 650 int bad = 0;
f7fe979e 651 const U32 flags = ckWARN(WARN_UTF8) ?
08ca2aa3
TH
652 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
653 for (;buf_len > 0; buf_len--) {
654 if (from >= end) return FALSE;
f337b084 655 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
08ca2aa3
TH
656 if (retlen == (STRLEN) -1 || retlen == 0) {
657 from += UTF8SKIP(from);
658 bad |= 1;
659 } else from += retlen;
660 if (val >= 0x100) {
661 bad |= 2;
662 val &= 0xff;
663 }
fe2774ed 664 *(U8 *)buf++ = (U8)val;
08ca2aa3
TH
665 }
666 /* We have enough characters for the buffer. Did we have problems ? */
667 if (bad) {
668 if (bad & 1) {
669 /* Rewalk the string fragment while warning */
f7fe979e 670 const char *ptr;
9e27e96a 671 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
08ca2aa3
TH
672 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
673 if (ptr >= end) break;
f337b084 674 utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
08ca2aa3
TH
675 }
676 if (from > end) from = end;
677 }
678 if ((bad & 2) && ckWARN(WARN_UNPACK))
fc241834 679 Perl_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
f337b084
TH
680 WARN_PACK : WARN_UNPACK),
681 "Character(s) in '%c' format wrapped in %s",
fc241834 682 (int) TYPE_NO_MODIFIERS(datumtype),
f337b084 683 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
08ca2aa3
TH
684 }
685 *s = from;
686 return TRUE;
687}
688
689STATIC bool
f7fe979e 690next_uni_uu(pTHX_ const char **s, const char *end, I32 *out)
08ca2aa3 691{
97aff369 692 dVAR;
08ca2aa3 693 STRLEN retlen;
0bcc34c2 694 const UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
08ca2aa3
TH
695 if (val >= 0x100 || !ISUUCHAR(val) ||
696 retlen == (STRLEN) -1 || retlen == 0) {
697 *out = 0;
698 return FALSE;
699 }
700 *out = PL_uudmap[val] & 077;
f337b084 701 *s += retlen;
08ca2aa3
TH
702 return TRUE;
703}
78d46eaa 704
64844641
AL
705STATIC char *
706S_bytes_to_uni(pTHX_ const U8 *start, STRLEN len, char *dest) {
0bcc34c2 707 const U8 * const end = start + len;
64844641 708
f337b084 709 while (start < end) {
64844641 710 U8 buffer[UTF8_MAXLEN];
f7fe979e 711 const int length =
f337b084
TH
712 uvuni_to_utf8_flags(buffer, NATIVE_TO_UNI(*start), 0) - buffer;
713 switch(length) {
714 case 1:
64844641 715 *dest++ = buffer[0];
f337b084
TH
716 break;
717 case 2:
64844641
AL
718 *dest++ = buffer[0];
719 *dest++ = buffer[1];
f337b084
TH
720 break;
721 default:
722 Perl_croak(aTHX_ "Perl bug: value %d UTF-8 expands to %d bytes",
723 *start, length);
724 }
725 start++;
726 }
64844641 727 return dest;
f337b084
TH
728}
729
230e1fce
NC
730#define PUSH_BYTES(utf8, cur, buf, len) \
731STMT_START { \
64844641
AL
732 if (utf8) \
733 (cur) = bytes_to_uni((U8 *) buf, len, (cur)); \
230e1fce
NC
734 else { \
735 Copy(buf, cur, len, char); \
736 (cur) += (len); \
737 } \
f337b084
TH
738} STMT_END
739
740#define GROWING(utf8, cat, start, cur, in_len) \
741STMT_START { \
742 STRLEN glen = (in_len); \
3473cf63 743 if (utf8) glen *= UTF8_EXPAND; \
f337b084 744 if ((cur) + glen >= (start) + SvLEN(cat)) { \
0bd48802 745 (start) = sv_exp_grow(cat, glen); \
f337b084
TH
746 (cur) = (start) + SvCUR(cat); \
747 } \
748} STMT_END
749
750#define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
751STMT_START { \
f7fe979e 752 const STRLEN glen = (in_len); \
f337b084 753 STRLEN gl = glen; \
3473cf63 754 if (utf8) gl *= UTF8_EXPAND; \
f337b084
TH
755 if ((cur) + gl >= (start) + SvLEN(cat)) { \
756 *cur = '\0'; \
b162af07 757 SvCUR_set((cat), (cur) - (start)); \
0bd48802 758 (start) = sv_exp_grow(cat, gl); \
f337b084
TH
759 (cur) = (start) + SvCUR(cat); \
760 } \
761 PUSH_BYTES(utf8, cur, buf, glen); \
762} STMT_END
763
764#define PUSH_BYTE(utf8, s, byte) \
765STMT_START { \
766 if (utf8) { \
f7fe979e 767 const U8 au8 = (byte); \
64844641 768 (s) = bytes_to_uni(&au8, 1, (s)); \
f337b084
TH
769 } else *(U8 *)(s)++ = (byte); \
770} STMT_END
771
772/* Only to be used inside a loop (see the break) */
773#define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
774STMT_START { \
775 STRLEN retlen; \
776 if (str >= end) break; \
777 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
778 if (retlen == (STRLEN) -1 || retlen == 0) { \
779 *cur = '\0'; \
780 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
781 } \
782 str += retlen; \
783} STMT_END
784
f7fe979e
AL
785static const char *_action( const tempsym_t* symptr )
786{
787 return ( symptr->flags & FLAG_PACK ) ? "pack" : "unpack";
788}
789
206947d2 790/* Returns the sizeof() struct described by pat */
028d1f6d 791STATIC I32
f337b084 792S_measure_struct(pTHX_ tempsym_t* symptr)
206947d2 793{
f337b084 794 I32 total = 0;
206947d2 795
49704364 796 while (next_symbol(symptr)) {
f337b084 797 I32 len;
f7fe979e 798 int size;
f337b084
TH
799
800 switch (symptr->howlen) {
fc241834 801 case e_star:
49704364 802 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
f7fe979e 803 _action( symptr ) );
49704364 804 break;
f337b084
TH
805 default:
806 /* e_no_len and e_number */
807 len = symptr->length;
808 break;
49704364
LW
809 }
810
a7a3cfaa 811 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
80a13697 812 if (!size) {
f7fe979e 813 int star;
80a13697
NC
814 /* endianness doesn't influence the size of a type */
815 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
816 default:
817 Perl_croak(aTHX_ "Invalid type '%c' in %s",
818 (int)TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 819 _action( symptr ) );
28be1210
TH
820#ifdef PERL_PACK_CAN_SHRIEKSIGN
821 case '.' | TYPE_IS_SHRIEKING:
822 case '@' | TYPE_IS_SHRIEKING:
823#endif
80a13697 824 case '@':
28be1210 825 case '.':
80a13697
NC
826 case '/':
827 case 'U': /* XXXX Is it correct? */
828 case 'w':
829 case 'u':
830 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
28be1210 831 (int) TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 832 _action( symptr ) );
80a13697
NC
833 case '%':
834 size = 0;
835 break;
836 case '(':
fc241834
RGS
837 {
838 tempsym_t savsym = *symptr;
839 symptr->patptr = savsym.grpbeg;
840 symptr->patend = savsym.grpend;
841 /* XXXX Theoretically, we need to measure many times at
842 different positions, since the subexpression may contain
843 alignment commands, but be not of aligned length.
844 Need to detect this and croak(). */
845 size = measure_struct(symptr);
846 *symptr = savsym;
847 break;
848 }
80a13697
NC
849 case 'X' | TYPE_IS_SHRIEKING:
850 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
851 */
852 if (!len) /* Avoid division by 0 */
853 len = 1;
854 len = total % len; /* Assumed: the start is aligned. */
855 /* FALL THROUGH */
856 case 'X':
857 size = -1;
858 if (total < len)
f7fe979e 859 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
80a13697
NC
860 break;
861 case 'x' | TYPE_IS_SHRIEKING:
862 if (!len) /* Avoid division by 0 */
863 len = 1;
864 star = total % len; /* Assumed: the start is aligned. */
865 if (star) /* Other portable ways? */
866 len = len - star;
867 else
868 len = 0;
869 /* FALL THROUGH */
870 case 'x':
871 case 'A':
872 case 'Z':
873 case 'a':
80a13697
NC
874 size = 1;
875 break;
876 case 'B':
877 case 'b':
878 len = (len + 7)/8;
879 size = 1;
880 break;
881 case 'H':
882 case 'h':
883 len = (len + 1)/2;
884 size = 1;
885 break;
78d46eaa 886
80a13697
NC
887 case 'P':
888 len = 1;
889 size = sizeof(char*);
78d46eaa
NC
890 break;
891 }
206947d2
IZ
892 }
893 total += len * size;
894 }
895 return total;
896}
897
49704364
LW
898
899/* locate matching closing parenthesis or bracket
900 * returns char pointer to char after match, or NULL
901 */
f7fe979e
AL
902STATIC const char *
903S_group_end(pTHX_ register const char *patptr, register const char *patend, char ender)
18529408 904{
49704364 905 while (patptr < patend) {
f7fe979e 906 const char c = *patptr++;
49704364
LW
907
908 if (isSPACE(c))
909 continue;
910 else if (c == ender)
911 return patptr-1;
912 else if (c == '#') {
913 while (patptr < patend && *patptr != '\n')
914 patptr++;
915 continue;
916 } else if (c == '(')
917 patptr = group_end(patptr, patend, ')') + 1;
918 else if (c == '[')
919 patptr = group_end(patptr, patend, ']') + 1;
18529408 920 }
49704364
LW
921 Perl_croak(aTHX_ "No group ending character '%c' found in template",
922 ender);
923 return 0;
18529408
IZ
924}
925
49704364
LW
926
927/* Convert unsigned decimal number to binary.
928 * Expects a pointer to the first digit and address of length variable
929 * Advances char pointer to 1st non-digit char and returns number
fc241834 930 */
f7fe979e
AL
931STATIC const char *
932S_get_num(pTHX_ register const char *patptr, I32 *lenptr )
49704364
LW
933{
934 I32 len = *patptr++ - '0';
935 while (isDIGIT(*patptr)) {
936 if (len >= 0x7FFFFFFF/10)
937 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
938 len = (len * 10) + (*patptr++ - '0');
939 }
940 *lenptr = len;
941 return patptr;
942}
943
944/* The marvellous template parsing routine: Using state stored in *symptr,
945 * locates next template code and count
946 */
947STATIC bool
f337b084 948S_next_symbol(pTHX_ tempsym_t* symptr )
18529408 949{
f7fe979e 950 const char* patptr = symptr->patptr;
0bcc34c2 951 const char* const patend = symptr->patend;
49704364
LW
952
953 symptr->flags &= ~FLAG_SLASH;
954
955 while (patptr < patend) {
956 if (isSPACE(*patptr))
957 patptr++;
958 else if (*patptr == '#') {
959 patptr++;
960 while (patptr < patend && *patptr != '\n')
961 patptr++;
962 if (patptr < patend)
963 patptr++;
964 } else {
fc241834 965 /* We should have found a template code */
49704364 966 I32 code = *patptr++ & 0xFF;
66c611c5 967 U32 inherited_modifiers = 0;
49704364
LW
968
969 if (code == ','){ /* grandfather in commas but with a warning */
970 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
971 symptr->flags |= FLAG_COMMA;
972 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
f7fe979e 973 "Invalid type ',' in %s", _action( symptr ) );
49704364
LW
974 }
975 continue;
976 }
fc241834 977
49704364 978 /* for '(', skip to ')' */
fc241834 979 if (code == '(') {
49704364
LW
980 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
981 Perl_croak(aTHX_ "()-group starts with a count in %s",
f7fe979e 982 _action( symptr ) );
49704364
LW
983 symptr->grpbeg = patptr;
984 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
985 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
986 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
f7fe979e 987 _action( symptr ) );
49704364
LW
988 }
989
66c611c5
MHM
990 /* look for group modifiers to inherit */
991 if (TYPE_ENDIANNESS(symptr->flags)) {
992 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
993 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
994 }
995
1109a392
MHM
996 /* look for modifiers */
997 while (patptr < patend) {
b7787f18
AL
998 const char *allowed;
999 I32 modifier;
1109a392
MHM
1000 switch (*patptr) {
1001 case '!':
1002 modifier = TYPE_IS_SHRIEKING;
7212898e 1003 allowed = SHRIEKING_ALLOWED_TYPES;
1109a392 1004 break;
7212898e 1005#ifdef PERL_PACK_CAN_BYTEORDER
1109a392
MHM
1006 case '>':
1007 modifier = TYPE_IS_BIG_ENDIAN;
66c611c5 1008 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392
MHM
1009 break;
1010 case '<':
1011 modifier = TYPE_IS_LITTLE_ENDIAN;
66c611c5 1012 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392 1013 break;
f337b084 1014#endif /* PERL_PACK_CAN_BYTEORDER */
1109a392 1015 default:
b7787f18
AL
1016 allowed = "";
1017 modifier = 0;
1109a392
MHM
1018 break;
1019 }
66c611c5 1020
1109a392
MHM
1021 if (modifier == 0)
1022 break;
66c611c5 1023
1109a392
MHM
1024 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
1025 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
f7fe979e 1026 allowed, _action( symptr ) );
66c611c5
MHM
1027
1028 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
1109a392 1029 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
f7fe979e 1030 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
66c611c5
MHM
1031 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
1032 TYPE_ENDIANNESS_MASK)
1033 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
f7fe979e 1034 *patptr, _action( symptr ) );
66c611c5 1035
041457d9 1036 if ((code & modifier) && ckWARN(WARN_UNPACK)) {
1109a392
MHM
1037 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
1038 "Duplicate modifier '%c' after '%c' in %s",
1039 *patptr, (int) TYPE_NO_MODIFIERS(code),
f7fe979e 1040 _action( symptr ) );
1109a392 1041 }
66c611c5 1042
1109a392
MHM
1043 code |= modifier;
1044 patptr++;
49704364
LW
1045 }
1046
66c611c5
MHM
1047 /* inherit modifiers */
1048 code |= inherited_modifiers;
1049
fc241834 1050 /* look for count and/or / */
49704364
LW
1051 if (patptr < patend) {
1052 if (isDIGIT(*patptr)) {
1053 patptr = get_num( patptr, &symptr->length );
1054 symptr->howlen = e_number;
1055
1056 } else if (*patptr == '*') {
1057 patptr++;
1058 symptr->howlen = e_star;
1059
1060 } else if (*patptr == '[') {
f7fe979e 1061 const char* lenptr = ++patptr;
49704364
LW
1062 symptr->howlen = e_number;
1063 patptr = group_end( patptr, patend, ']' ) + 1;
1064 /* what kind of [] is it? */
1065 if (isDIGIT(*lenptr)) {
1066 lenptr = get_num( lenptr, &symptr->length );
1067 if( *lenptr != ']' )
1068 Perl_croak(aTHX_ "Malformed integer in [] in %s",
f7fe979e 1069 _action( symptr ) );
49704364
LW
1070 } else {
1071 tempsym_t savsym = *symptr;
1072 symptr->patend = patptr-1;
1073 symptr->patptr = lenptr;
1074 savsym.length = measure_struct(symptr);
1075 *symptr = savsym;
1076 }
1077 } else {
1078 symptr->howlen = e_no_len;
1079 symptr->length = 1;
1080 }
1081
1082 /* try to find / */
1083 while (patptr < patend) {
1084 if (isSPACE(*patptr))
1085 patptr++;
1086 else if (*patptr == '#') {
1087 patptr++;
1088 while (patptr < patend && *patptr != '\n')
1089 patptr++;
1090 if (patptr < patend)
1091 patptr++;
1092 } else {
66c611c5 1093 if (*patptr == '/') {
49704364
LW
1094 symptr->flags |= FLAG_SLASH;
1095 patptr++;
66c611c5
MHM
1096 if (patptr < patend &&
1097 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
49704364 1098 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
f7fe979e 1099 _action( symptr ) );
49704364
LW
1100 }
1101 break;
1102 }
18529408 1103 }
49704364
LW
1104 } else {
1105 /* at end - no count, no / */
1106 symptr->howlen = e_no_len;
1107 symptr->length = 1;
1108 }
1109
1110 symptr->code = code;
fc241834 1111 symptr->patptr = patptr;
49704364 1112 return TRUE;
18529408 1113 }
49704364 1114 }
fc241834 1115 symptr->patptr = patptr;
49704364 1116 return FALSE;
18529408
IZ
1117}
1118
18529408 1119/*
fc241834 1120 There is no way to cleanly handle the case where we should process the
08ca2aa3 1121 string per byte in its upgraded form while it's really in downgraded form
fc241834
RGS
1122 (e.g. estimates like strend-s as an upper bound for the number of
1123 characters left wouldn't work). So if we foresee the need of this
1124 (pattern starts with U or contains U0), we want to work on the encoded
1125 version of the string. Users are advised to upgrade their pack string
08ca2aa3
TH
1126 themselves if they need to do a lot of unpacks like this on it
1127*/
fc241834 1128STATIC bool
08ca2aa3
TH
1129need_utf8(const char *pat, const char *patend)
1130{
1131 bool first = TRUE;
1132 while (pat < patend) {
1133 if (pat[0] == '#') {
1134 pat++;
f7fe979e 1135 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
1136 if (!pat) return FALSE;
1137 } else if (pat[0] == 'U') {
1138 if (first || pat[1] == '0') return TRUE;
1139 } else first = FALSE;
1140 pat++;
1141 }
1142 return FALSE;
1143}
1144
1145STATIC char
1146first_symbol(const char *pat, const char *patend) {
1147 while (pat < patend) {
1148 if (pat[0] != '#') return pat[0];
1149 pat++;
f7fe979e 1150 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
1151 if (!pat) return 0;
1152 pat++;
1153 }
1154 return 0;
1155}
1156
1157/*
7accc089
JH
1158=for apidoc unpackstring
1159
608d3aed
LW
1160The engine implementing unpack() Perl function. C<unpackstring> puts the
1161extracted list items on the stack and returns the number of elements.
1162Issue C<PUTBACK> before and C<SPAGAIN> after the call to this function.
7accc089
JH
1163
1164=cut */
1165
1166I32
f7fe979e 1167Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
7accc089 1168{
f7fe979e 1169 tempsym_t sym;
08ca2aa3 1170
f337b084 1171 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
08ca2aa3
TH
1172 else if (need_utf8(pat, patend)) {
1173 /* We probably should try to avoid this in case a scalar context call
1174 wouldn't get to the "U0" */
1175 STRLEN len = strend - s;
230e1fce 1176 s = (char *) bytes_to_utf8((U8 *) s, &len);
08ca2aa3
TH
1177 SAVEFREEPV(s);
1178 strend = s + len;
f337b084 1179 flags |= FLAG_DO_UTF8;
08ca2aa3
TH
1180 }
1181
f337b084
TH
1182 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
1183 flags |= FLAG_PARSE_UTF8;
08ca2aa3 1184
f7fe979e 1185 TEMPSYM_INIT(&sym, pat, patend, flags);
7accc089
JH
1186
1187 return unpack_rec(&sym, s, s, strend, NULL );
1188}
1189
49704364
LW
1190STATIC
1191I32
f7fe979e 1192S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
49704364 1193{
27da23d5 1194 dVAR; dSP;
18529408 1195 SV *sv;
f7fe979e 1196 const I32 start_sp_offset = SP - PL_stack_base;
49704364 1197 howlen_t howlen;
a6ec74c1 1198
a6ec74c1 1199 I32 checksum = 0;
92d41999 1200 UV cuv = 0;
a6ec74c1 1201 NV cdouble = 0.0;
f337b084 1202 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
49704364 1203 bool beyond = FALSE;
21c16052 1204 bool explicit_length;
9e27e96a 1205 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
f337b084 1206 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
28be1210 1207 symptr->strbeg = s - strbeg;
49704364 1208
49704364 1209 while (next_symbol(symptr)) {
a7a3cfaa 1210 packprops_t props;
9e27e96a 1211 I32 len;
f337b084 1212 I32 datumtype = symptr->code;
206947d2 1213 /* do first one only unless in list context
08ca2aa3 1214 / is implemented by unpacking the count, then popping it from the
206947d2 1215 stack, so must check that we're not in the middle of a / */
49704364 1216 if ( unpack_only_one
206947d2 1217 && (SP - PL_stack_base == start_sp_offset + 1)
49704364 1218 && (datumtype != '/') ) /* XXX can this be omitted */
206947d2 1219 break;
49704364 1220
f337b084 1221 switch (howlen = symptr->howlen) {
fc241834
RGS
1222 case e_star:
1223 len = strend - strbeg; /* long enough */
49704364 1224 break;
f337b084
TH
1225 default:
1226 /* e_no_len and e_number */
1227 len = symptr->length;
1228 break;
49704364 1229 }
18529408 1230
21c16052 1231 explicit_length = TRUE;
a6ec74c1 1232 redo_switch:
49704364 1233 beyond = s >= strend;
a7a3cfaa
TH
1234
1235 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
1236 if (props) {
1237 /* props nonzero means we can process this letter. */
9e27e96a
AL
1238 const long size = props & PACK_SIZE_MASK;
1239 const long howmany = (strend - s) / size;
a7a3cfaa
TH
1240 if (len > howmany)
1241 len = howmany;
1242
1243 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
1244 if (len && unpack_only_one) len = 1;
1245 EXTEND(SP, len);
1246 EXTEND_MORTAL(len);
78d46eaa
NC
1247 }
1248 }
a7a3cfaa 1249
1109a392 1250 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 1251 default:
1109a392 1252 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
49704364 1253
a6ec74c1 1254 case '%':
49704364 1255 if (howlen == e_no_len)
18529408 1256 len = 16; /* len is not specified */
a6ec74c1 1257 checksum = len;
92d41999 1258 cuv = 0;
a6ec74c1 1259 cdouble = 0;
18529408 1260 continue;
a6ec74c1 1261 break;
18529408
IZ
1262 case '(':
1263 {
49704364 1264 tempsym_t savsym = *symptr;
9e27e96a 1265 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
66c611c5 1266 symptr->flags |= group_modifiers;
49704364 1267 symptr->patend = savsym.grpend;
28be1210 1268 symptr->previous = &savsym;
49704364 1269 symptr->level++;
18529408
IZ
1270 PUTBACK;
1271 while (len--) {
49704364 1272 symptr->patptr = savsym.grpbeg;
f337b084
TH
1273 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
1274 else symptr->flags &= ~FLAG_PARSE_UTF8;
08ca2aa3
TH
1275 unpack_rec(symptr, s, strbeg, strend, &s);
1276 if (s == strend && savsym.howlen == e_star)
49704364 1277 break; /* No way to continue */
18529408
IZ
1278 }
1279 SPAGAIN;
28be1210 1280 savsym.flags = symptr->flags & ~group_modifiers;
49704364 1281 *symptr = savsym;
18529408
IZ
1282 break;
1283 }
28be1210
TH
1284#ifdef PERL_PACK_CAN_SHRIEKSIGN
1285 case '.' | TYPE_IS_SHRIEKING:
1286#endif
1287 case '.': {
9e27e96a 1288 const char *from;
28be1210
TH
1289 SV *sv;
1290#ifdef PERL_PACK_CAN_SHRIEKSIGN
9e27e96a 1291 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
28be1210 1292#else /* PERL_PACK_CAN_SHRIEKSIGN */
9e27e96a 1293 const bool u8 = utf8;
28be1210
TH
1294#endif
1295 if (howlen == e_star) from = strbeg;
1296 else if (len <= 0) from = s;
1297 else {
1298 tempsym_t *group = symptr;
1299
1300 while (--len && group) group = group->previous;
1301 from = group ? strbeg + group->strbeg : strbeg;
1302 }
1303 sv = from <= s ?
00646304
CB
1304 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
1305 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
28be1210
TH
1306 XPUSHs(sv_2mortal(sv));
1307 break;
1308 }
1309#ifdef PERL_PACK_CAN_SHRIEKSIGN
1310 case '@' | TYPE_IS_SHRIEKING:
1311#endif
a6ec74c1 1312 case '@':
28be1210
TH
1313 s = strbeg + symptr->strbeg;
1314#ifdef PERL_PACK_CAN_SHRIEKSIGN
1315 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
1316#else /* PERL_PACK_CAN_SHRIEKSIGN */
1317 if (utf8)
1318#endif
1319 {
08ca2aa3
TH
1320 while (len > 0) {
1321 if (s >= strend)
1322 Perl_croak(aTHX_ "'@' outside of string in unpack");
1323 s += UTF8SKIP(s);
1324 len--;
1325 }
1326 if (s > strend)
1327 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
1328 } else {
28be1210 1329 if (strend-s < len)
fc241834 1330 Perl_croak(aTHX_ "'@' outside of string in unpack");
28be1210 1331 s += len;
08ca2aa3 1332 }
a6ec74c1 1333 break;
62f95557
IZ
1334 case 'X' | TYPE_IS_SHRIEKING:
1335 if (!len) /* Avoid division by 0 */
1336 len = 1;
08ca2aa3 1337 if (utf8) {
f7fe979e 1338 const char *hop, *last;
f337b084
TH
1339 I32 l = len;
1340 hop = last = strbeg;
1341 while (hop < s) {
1342 hop += UTF8SKIP(hop);
1343 if (--l == 0) {
08ca2aa3 1344 last = hop;
f337b084
TH
1345 l = len;
1346 }
fc241834 1347 }
f337b084
TH
1348 if (last > s)
1349 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
08ca2aa3
TH
1350 s = last;
1351 break;
f337b084
TH
1352 }
1353 len = (s - strbeg) % len;
62f95557 1354 /* FALL THROUGH */
a6ec74c1 1355 case 'X':
08ca2aa3
TH
1356 if (utf8) {
1357 while (len > 0) {
1358 if (s <= strbeg)
1359 Perl_croak(aTHX_ "'X' outside of string in unpack");
f337b084 1360 while (--s, UTF8_IS_CONTINUATION(*s)) {
08ca2aa3
TH
1361 if (s <= strbeg)
1362 Perl_croak(aTHX_ "'X' outside of string in unpack");
1363 }
1364 len--;
1365 }
1366 } else {
fc241834
RGS
1367 if (len > s - strbeg)
1368 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1369 s -= len;
08ca2aa3 1370 }
a6ec74c1 1371 break;
9e27e96a
AL
1372 case 'x' | TYPE_IS_SHRIEKING: {
1373 I32 ai32;
62f95557
IZ
1374 if (!len) /* Avoid division by 0 */
1375 len = 1;
230e1fce
NC
1376 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1377 else ai32 = (s - strbeg) % len;
08ca2aa3
TH
1378 if (ai32 == 0) break;
1379 len -= ai32;
9e27e96a 1380 }
62f95557 1381 /* FALL THROUGH */
a6ec74c1 1382 case 'x':
08ca2aa3
TH
1383 if (utf8) {
1384 while (len>0) {
1385 if (s >= strend)
1386 Perl_croak(aTHX_ "'x' outside of string in unpack");
1387 s += UTF8SKIP(s);
1388 len--;
1389 }
1390 } else {
fc241834
RGS
1391 if (len > strend - s)
1392 Perl_croak(aTHX_ "'x' outside of string in unpack");
1393 s += len;
f337b084 1394 }
a6ec74c1
JH
1395 break;
1396 case '/':
49704364
LW
1397 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1398 break;
a6ec74c1
JH
1399 case 'A':
1400 case 'Z':
1401 case 'a':
08ca2aa3
TH
1402 if (checksum) {
1403 /* Preliminary length estimate is assumed done in 'W' */
1404 if (len > strend - s) len = strend - s;
1405 goto W_checksum;
1406 }
1407 if (utf8) {
1408 I32 l;
f7fe979e 1409 const char *hop;
08ca2aa3
TH
1410 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1411 if (hop >= strend) {
1412 if (hop > strend)
1413 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1414 break;
fc241834 1415 }
a6ec74c1 1416 }
08ca2aa3
TH
1417 if (hop > strend)
1418 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1419 len = hop - s;
1420 } else if (len > strend - s)
1421 len = strend - s;
1422
1423 if (datumtype == 'Z') {
1424 /* 'Z' strips stuff after first null */
f7fe979e 1425 const char *ptr, *end;
f337b084
TH
1426 end = s + len;
1427 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
08ca2aa3
TH
1428 sv = newSVpvn(s, ptr-s);
1429 if (howlen == e_star) /* exact for 'Z*' */
1430 len = ptr-s + (ptr != strend ? 1 : 0);
1431 } else if (datumtype == 'A') {
1432 /* 'A' strips both nulls and spaces */
f7fe979e 1433 const char *ptr;
18bdf90a
TH
1434 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1435 for (ptr = s+len-1; ptr >= s; ptr--)
1436 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
230e1fce 1437 !is_utf8_space((U8 *) ptr)) break;
18bdf90a
TH
1438 if (ptr >= s) ptr += UTF8SKIP(ptr);
1439 else ptr++;
28be1210 1440 if (ptr > s+len)
18bdf90a
TH
1441 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1442 } else {
1443 for (ptr = s+len-1; ptr >= s; ptr--)
1444 if (*ptr != 0 && !isSPACE(*ptr)) break;
1445 ptr++;
1446 }
08ca2aa3
TH
1447 sv = newSVpvn(s, ptr-s);
1448 } else sv = newSVpvn(s, len);
1449
1450 if (utf8) {
1451 SvUTF8_on(sv);
1452 /* Undo any upgrade done due to need_utf8() */
f337b084 1453 if (!(symptr->flags & FLAG_WAS_UTF8))
08ca2aa3 1454 sv_utf8_downgrade(sv, 0);
a6ec74c1
JH
1455 }
1456 XPUSHs(sv_2mortal(sv));
08ca2aa3 1457 s += len;
a6ec74c1
JH
1458 break;
1459 case 'B':
08ca2aa3
TH
1460 case 'b': {
1461 char *str;
49704364 1462 if (howlen == e_star || len > (strend - s) * 8)
a6ec74c1
JH
1463 len = (strend - s) * 8;
1464 if (checksum) {
1465 if (!PL_bitcount) {
08ca2aa3 1466 int bits;
a02a5408 1467 Newxz(PL_bitcount, 256, char);
a6ec74c1
JH
1468 for (bits = 1; bits < 256; bits++) {
1469 if (bits & 1) PL_bitcount[bits]++;
1470 if (bits & 2) PL_bitcount[bits]++;
1471 if (bits & 4) PL_bitcount[bits]++;
1472 if (bits & 8) PL_bitcount[bits]++;
1473 if (bits & 16) PL_bitcount[bits]++;
1474 if (bits & 32) PL_bitcount[bits]++;
1475 if (bits & 64) PL_bitcount[bits]++;
1476 if (bits & 128) PL_bitcount[bits]++;
1477 }
1478 }
f337b084 1479 if (utf8)
08ca2aa3 1480 while (len >= 8 && s < strend) {
f337b084 1481 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
08ca2aa3
TH
1482 len -= 8;
1483 }
f337b084 1484 else
fc241834 1485 while (len >= 8) {
08ca2aa3 1486 cuv += PL_bitcount[*(U8 *)s++];
fc241834
RGS
1487 len -= 8;
1488 }
08ca2aa3
TH
1489 if (len && s < strend) {
1490 U8 bits;
f337b084
TH
1491 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1492 if (datumtype == 'b')
a6ec74c1 1493 while (len-- > 0) {
92d41999 1494 if (bits & 1) cuv++;
a6ec74c1
JH
1495 bits >>= 1;
1496 }
f337b084 1497 else
a6ec74c1 1498 while (len-- > 0) {
08ca2aa3 1499 if (bits & 0x80) cuv++;
a6ec74c1
JH
1500 bits <<= 1;
1501 }
fc241834 1502 }
a6ec74c1
JH
1503 break;
1504 }
08ca2aa3 1505
561b68a9 1506 sv = sv_2mortal(newSV(len ? len : 1));
a6ec74c1
JH
1507 SvPOK_on(sv);
1508 str = SvPVX(sv);
1509 if (datumtype == 'b') {
f337b084 1510 U8 bits = 0;
f7fe979e 1511 const I32 ai32 = len;
08ca2aa3
TH
1512 for (len = 0; len < ai32; len++) {
1513 if (len & 7) bits >>= 1;
1514 else if (utf8) {
1515 if (s >= strend) break;
f337b084 1516 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1517 } else bits = *(U8 *) s++;
1518 *str++ = bits & 1 ? '1' : '0';
a6ec74c1 1519 }
08ca2aa3 1520 } else {
f337b084 1521 U8 bits = 0;
f7fe979e 1522 const I32 ai32 = len;
08ca2aa3
TH
1523 for (len = 0; len < ai32; len++) {
1524 if (len & 7) bits <<= 1;
1525 else if (utf8) {
1526 if (s >= strend) break;
f337b084 1527 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1528 } else bits = *(U8 *) s++;
1529 *str++ = bits & 0x80 ? '1' : '0';
a6ec74c1
JH
1530 }
1531 }
1532 *str = '\0';
aa07b2f6 1533 SvCUR_set(sv, str - SvPVX_const(sv));
08ca2aa3 1534 XPUSHs(sv);
a6ec74c1 1535 break;
08ca2aa3 1536 }
a6ec74c1 1537 case 'H':
08ca2aa3
TH
1538 case 'h': {
1539 char *str;
fc241834 1540 /* Preliminary length estimate, acceptable for utf8 too */
49704364 1541 if (howlen == e_star || len > (strend - s) * 2)
a6ec74c1 1542 len = (strend - s) * 2;
561b68a9 1543 sv = sv_2mortal(newSV(len ? len : 1));
a6ec74c1
JH
1544 SvPOK_on(sv);
1545 str = SvPVX(sv);
1546 if (datumtype == 'h') {
f337b084 1547 U8 bits = 0;
9e27e96a 1548 I32 ai32 = len;
fc241834
RGS
1549 for (len = 0; len < ai32; len++) {
1550 if (len & 1) bits >>= 4;
1551 else if (utf8) {
1552 if (s >= strend) break;
f337b084 1553 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
fc241834 1554 } else bits = * (U8 *) s++;
a6ec74c1
JH
1555 *str++ = PL_hexdigit[bits & 15];
1556 }
08ca2aa3 1557 } else {
f337b084 1558 U8 bits = 0;
f7fe979e 1559 const I32 ai32 = len;
08ca2aa3
TH
1560 for (len = 0; len < ai32; len++) {
1561 if (len & 1) bits <<= 4;
1562 else if (utf8) {
1563 if (s >= strend) break;
f337b084 1564 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3 1565 } else bits = *(U8 *) s++;
a6ec74c1
JH
1566 *str++ = PL_hexdigit[(bits >> 4) & 15];
1567 }
1568 }
1569 *str = '\0';
aa07b2f6 1570 SvCUR_set(sv, str - SvPVX_const(sv));
08ca2aa3 1571 XPUSHs(sv);
a6ec74c1 1572 break;
08ca2aa3 1573 }
a6ec74c1 1574 case 'c':
73cb7263 1575 while (len-- > 0) {
f337b084 1576 int aint = SHIFT_BYTE(utf8, s, strend, datumtype);
73cb7263
NC
1577 if (aint >= 128) /* fake up signed chars */
1578 aint -= 256;
08ca2aa3 1579 if (!checksum)
ac7f3b1b 1580 PUSHs(sv_2mortal(newSViv((IV)aint)));
73cb7263
NC
1581 else if (checksum > bits_in_uv)
1582 cdouble += (NV)aint;
1583 else
1584 cuv += aint;
a6ec74c1
JH
1585 }
1586 break;
1587 case 'C':
08ca2aa3
TH
1588 case 'W':
1589 W_checksum:
35bcd338 1590 if (len == 0) {
fc241834 1591 if (explicit_length && datumtype == 'C')
08ca2aa3 1592 /* Switch to "character" mode */
f337b084 1593 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
35bcd338
JH
1594 break;
1595 }
fc241834 1596 if (datumtype == 'C' ?
f337b084
TH
1597 (symptr->flags & FLAG_DO_UTF8) &&
1598 !(symptr->flags & FLAG_WAS_UTF8) : utf8) {
08ca2aa3 1599 while (len-- > 0 && s < strend) {
08ca2aa3 1600 STRLEN retlen;
f7fe979e 1601 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
f337b084 1602 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
08ca2aa3
TH
1603 if (retlen == (STRLEN) -1 || retlen == 0)
1604 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1605 s += retlen;
1606 if (!checksum)
1607 PUSHs(sv_2mortal(newSVuv((UV) val)));
1608 else if (checksum > bits_in_uv)
1609 cdouble += (NV) val;
d6d3e8bd 1610 else
08ca2aa3 1611 cuv += val;
fc241834 1612 }
08ca2aa3 1613 } else if (!checksum)
a6ec74c1 1614 while (len-- > 0) {
f7fe979e 1615 const U8 ch = *(U8 *) s++;
08ca2aa3 1616 PUSHs(sv_2mortal(newSVuv((UV) ch)));
a6ec74c1 1617 }
08ca2aa3
TH
1618 else if (checksum > bits_in_uv)
1619 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1620 else
1621 while (len-- > 0) cuv += *(U8 *) s++;
a6ec74c1
JH
1622 break;
1623 case 'U':
35bcd338 1624 if (len == 0) {
08ca2aa3
TH
1625 if (explicit_length) {
1626 /* Switch to "bytes in UTF-8" mode */
f337b084 1627 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
08ca2aa3
TH
1628 else
1629 /* Should be impossible due to the need_utf8() test */
1630 Perl_croak(aTHX_ "U0 mode on a byte string");
1631 }
35bcd338
JH
1632 break;
1633 }
08ca2aa3 1634 if (len > strend - s) len = strend - s;
fc241834 1635 if (!checksum) {
08ca2aa3
TH
1636 if (len && unpack_only_one) len = 1;
1637 EXTEND(SP, len);
1638 EXTEND_MORTAL(len);
fc241834 1639 }
08ca2aa3
TH
1640 while (len-- > 0 && s < strend) {
1641 STRLEN retlen;
1642 UV auv;
1643 if (utf8) {
1644 U8 result[UTF8_MAXLEN];
f7fe979e 1645 const char *ptr = s;
08ca2aa3 1646 STRLEN len;
08ca2aa3
TH
1647 /* Bug: warns about bad utf8 even if we are short on bytes
1648 and will break out of the loop */
230e1fce
NC
1649 if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1650 'U'))
08ca2aa3
TH
1651 break;
1652 len = UTF8SKIP(result);
fc241834 1653 if (!uni_to_bytes(aTHX_ &ptr, strend,
230e1fce 1654 (char *) &result[1], len-1, 'U')) break;
08ca2aa3
TH
1655 auv = utf8n_to_uvuni(result, len, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1656 s = ptr;
1657 } else {
1658 auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1659 if (retlen == (STRLEN) -1 || retlen == 0)
1660 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1661 s += retlen;
1662 }
1663 if (!checksum)
1664 PUSHs(sv_2mortal(newSVuv((UV) auv)));
73cb7263 1665 else if (checksum > bits_in_uv)
08ca2aa3 1666 cdouble += (NV) auv;
73cb7263 1667 else
08ca2aa3 1668 cuv += auv;
a6ec74c1
JH
1669 }
1670 break;
49704364
LW
1671 case 's' | TYPE_IS_SHRIEKING:
1672#if SHORTSIZE != SIZE16
73cb7263 1673 while (len-- > 0) {
08ca2aa3 1674 short ashort;
f337b084
TH
1675 SHIFT_VAR(utf8, s, strend, ashort, datumtype);
1676 DO_BO_UNPACK(ashort, s);
08ca2aa3 1677 if (!checksum)
ac7f3b1b 1678 PUSHs(sv_2mortal(newSViv((IV)ashort)));
73cb7263
NC
1679 else if (checksum > bits_in_uv)
1680 cdouble += (NV)ashort;
1681 else
1682 cuv += ashort;
49704364
LW
1683 }
1684 break;
1685#else
1686 /* Fallthrough! */
a6ec74c1 1687#endif
49704364 1688 case 's':
73cb7263 1689 while (len-- > 0) {
08ca2aa3
TH
1690 I16 ai16;
1691
1692#if U16SIZE > SIZE16
1693 ai16 = 0;
1694#endif
f337b084 1695 SHIFT16(utf8, s, strend, &ai16, datumtype);
73cb7263 1696 DO_BO_UNPACK(ai16, 16);
1109a392 1697#if U16SIZE > SIZE16
73cb7263
NC
1698 if (ai16 > 32767)
1699 ai16 -= 65536;
a6ec74c1 1700#endif
08ca2aa3 1701 if (!checksum)
ac7f3b1b 1702 PUSHs(sv_2mortal(newSViv((IV)ai16)));
73cb7263
NC
1703 else if (checksum > bits_in_uv)
1704 cdouble += (NV)ai16;
1705 else
1706 cuv += ai16;
a6ec74c1
JH
1707 }
1708 break;
49704364
LW
1709 case 'S' | TYPE_IS_SHRIEKING:
1710#if SHORTSIZE != SIZE16
73cb7263 1711 while (len-- > 0) {
08ca2aa3 1712 unsigned short aushort;
f337b084
TH
1713 SHIFT_VAR(utf8, s, strend, aushort, datumtype);
1714 DO_BO_UNPACK(aushort, s);
08ca2aa3
TH
1715 if (!checksum)
1716 PUSHs(sv_2mortal(newSVuv((UV) aushort)));
73cb7263
NC
1717 else if (checksum > bits_in_uv)
1718 cdouble += (NV)aushort;
1719 else
1720 cuv += aushort;
49704364
LW
1721 }
1722 break;
1723#else
1724 /* Fallhrough! */
1725#endif
a6ec74c1
JH
1726 case 'v':
1727 case 'n':
1728 case 'S':
73cb7263 1729 while (len-- > 0) {
08ca2aa3
TH
1730 U16 au16;
1731#if U16SIZE > SIZE16
1732 au16 = 0;
1733#endif
f337b084 1734 SHIFT16(utf8, s, strend, &au16, datumtype);
08ca2aa3 1735 DO_BO_UNPACK(au16, 16);
a6ec74c1 1736#ifdef HAS_NTOHS
73cb7263
NC
1737 if (datumtype == 'n')
1738 au16 = PerlSock_ntohs(au16);
a6ec74c1
JH
1739#endif
1740#ifdef HAS_VTOHS
73cb7263
NC
1741 if (datumtype == 'v')
1742 au16 = vtohs(au16);
a6ec74c1 1743#endif
08ca2aa3
TH
1744 if (!checksum)
1745 PUSHs(sv_2mortal(newSVuv((UV)au16)));
73cb7263 1746 else if (checksum > bits_in_uv)
f337b084 1747 cdouble += (NV) au16;
73cb7263
NC
1748 else
1749 cuv += au16;
a6ec74c1
JH
1750 }
1751 break;
7212898e 1752#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7
MHM
1753 case 'v' | TYPE_IS_SHRIEKING:
1754 case 'n' | TYPE_IS_SHRIEKING:
73cb7263 1755 while (len-- > 0) {
08ca2aa3
TH
1756 I16 ai16;
1757# if U16SIZE > SIZE16
1758 ai16 = 0;
1759# endif
f337b084 1760 SHIFT16(utf8, s, strend, &ai16, datumtype);
08ca2aa3 1761# ifdef HAS_NTOHS
73cb7263 1762 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
08ca2aa3
TH
1763 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1764# endif /* HAS_NTOHS */
1765# ifdef HAS_VTOHS
73cb7263 1766 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
08ca2aa3
TH
1767 ai16 = (I16) vtohs((U16) ai16);
1768# endif /* HAS_VTOHS */
1769 if (!checksum)
ac7f3b1b 1770 PUSHs(sv_2mortal(newSViv((IV)ai16)));
73cb7263 1771 else if (checksum > bits_in_uv)
08ca2aa3 1772 cdouble += (NV) ai16;
73cb7263
NC
1773 else
1774 cuv += ai16;
068bd2e7
MHM
1775 }
1776 break;
08ca2aa3 1777#endif /* PERL_PACK_CAN_SHRIEKSIGN */
a6ec74c1 1778 case 'i':
49704364 1779 case 'i' | TYPE_IS_SHRIEKING:
73cb7263 1780 while (len-- > 0) {
08ca2aa3 1781 int aint;
f337b084
TH
1782 SHIFT_VAR(utf8, s, strend, aint, datumtype);
1783 DO_BO_UNPACK(aint, i);
08ca2aa3 1784 if (!checksum)
ac7f3b1b 1785 PUSHs(sv_2mortal(newSViv((IV)aint)));
73cb7263
NC
1786 else if (checksum > bits_in_uv)
1787 cdouble += (NV)aint;
1788 else
1789 cuv += aint;
a6ec74c1
JH
1790 }
1791 break;
1792 case 'I':
49704364 1793 case 'I' | TYPE_IS_SHRIEKING:
73cb7263 1794 while (len-- > 0) {
08ca2aa3 1795 unsigned int auint;
f337b084
TH
1796 SHIFT_VAR(utf8, s, strend, auint, datumtype);
1797 DO_BO_UNPACK(auint, i);
08ca2aa3 1798 if (!checksum)
ac7f3b1b 1799 PUSHs(sv_2mortal(newSVuv((UV)auint)));
73cb7263
NC
1800 else if (checksum > bits_in_uv)
1801 cdouble += (NV)auint;
1802 else
1803 cuv += auint;
a6ec74c1
JH
1804 }
1805 break;
92d41999 1806 case 'j':
73cb7263 1807 while (len-- > 0) {
08ca2aa3 1808 IV aiv;
f337b084 1809 SHIFT_VAR(utf8, s, strend, aiv, datumtype);
1109a392 1810#if IVSIZE == INTSIZE
f337b084 1811 DO_BO_UNPACK(aiv, i);
1109a392 1812#elif IVSIZE == LONGSIZE
f337b084 1813 DO_BO_UNPACK(aiv, l);
1109a392 1814#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
f337b084 1815 DO_BO_UNPACK(aiv, 64);
08ca2aa3
TH
1816#else
1817 Perl_croak(aTHX_ "'j' not supported on this platform");
1109a392 1818#endif
08ca2aa3 1819 if (!checksum)
ac7f3b1b 1820 PUSHs(sv_2mortal(newSViv(aiv)));
73cb7263
NC
1821 else if (checksum > bits_in_uv)
1822 cdouble += (NV)aiv;
1823 else
1824 cuv += aiv;
92d41999
JH
1825 }
1826 break;
1827 case 'J':
73cb7263 1828 while (len-- > 0) {
08ca2aa3 1829 UV auv;
f337b084 1830 SHIFT_VAR(utf8, s, strend, auv, datumtype);
08ca2aa3 1831#if IVSIZE == INTSIZE
f337b084 1832 DO_BO_UNPACK(auv, i);
08ca2aa3 1833#elif IVSIZE == LONGSIZE
f337b084 1834 DO_BO_UNPACK(auv, l);
08ca2aa3 1835#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
f337b084 1836 DO_BO_UNPACK(auv, 64);
08ca2aa3
TH
1837#else
1838 Perl_croak(aTHX_ "'J' not supported on this platform");
1109a392 1839#endif
08ca2aa3 1840 if (!checksum)
ac7f3b1b 1841 PUSHs(sv_2mortal(newSVuv(auv)));
73cb7263
NC
1842 else if (checksum > bits_in_uv)
1843 cdouble += (NV)auv;
1844 else
1845 cuv += auv;
92d41999
JH
1846 }
1847 break;
49704364
LW
1848 case 'l' | TYPE_IS_SHRIEKING:
1849#if LONGSIZE != SIZE32
73cb7263 1850 while (len-- > 0) {
08ca2aa3 1851 long along;
f337b084
TH
1852 SHIFT_VAR(utf8, s, strend, along, datumtype);
1853 DO_BO_UNPACK(along, l);
08ca2aa3 1854 if (!checksum)
ac7f3b1b 1855 PUSHs(sv_2mortal(newSViv((IV)along)));
73cb7263
NC
1856 else if (checksum > bits_in_uv)
1857 cdouble += (NV)along;
1858 else
1859 cuv += along;
49704364
LW
1860 }
1861 break;
1862#else
1863 /* Fallthrough! */
a6ec74c1 1864#endif
49704364 1865 case 'l':
73cb7263 1866 while (len-- > 0) {
08ca2aa3
TH
1867 I32 ai32;
1868#if U32SIZE > SIZE32
1869 ai32 = 0;
1870#endif
f337b084 1871 SHIFT32(utf8, s, strend, &ai32, datumtype);
73cb7263 1872 DO_BO_UNPACK(ai32, 32);
25a9bd2a 1873#if U32SIZE > SIZE32
08ca2aa3 1874 if (ai32 > 2147483647) ai32 -= 4294967296;
a6ec74c1 1875#endif
08ca2aa3 1876 if (!checksum)
ac7f3b1b 1877 PUSHs(sv_2mortal(newSViv((IV)ai32)));
73cb7263
NC
1878 else if (checksum > bits_in_uv)
1879 cdouble += (NV)ai32;
1880 else
1881 cuv += ai32;
a6ec74c1
JH
1882 }
1883 break;
49704364
LW
1884 case 'L' | TYPE_IS_SHRIEKING:
1885#if LONGSIZE != SIZE32
73cb7263 1886 while (len-- > 0) {
08ca2aa3 1887 unsigned long aulong;
f337b084
TH
1888 SHIFT_VAR(utf8, s, strend, aulong, datumtype);
1889 DO_BO_UNPACK(aulong, l);
08ca2aa3 1890 if (!checksum)
ac7f3b1b 1891 PUSHs(sv_2mortal(newSVuv((UV)aulong)));
73cb7263
NC
1892 else if (checksum > bits_in_uv)
1893 cdouble += (NV)aulong;
1894 else
1895 cuv += aulong;
49704364
LW
1896 }
1897 break;
1898#else
1899 /* Fall through! */
1900#endif
a6ec74c1
JH
1901 case 'V':
1902 case 'N':
1903 case 'L':
73cb7263 1904 while (len-- > 0) {
08ca2aa3
TH
1905 U32 au32;
1906#if U32SIZE > SIZE32
1907 au32 = 0;
1908#endif
f337b084 1909 SHIFT32(utf8, s, strend, &au32, datumtype);
08ca2aa3 1910 DO_BO_UNPACK(au32, 32);
a6ec74c1 1911#ifdef HAS_NTOHL
73cb7263
NC
1912 if (datumtype == 'N')
1913 au32 = PerlSock_ntohl(au32);
a6ec74c1
JH
1914#endif
1915#ifdef HAS_VTOHL
73cb7263
NC
1916 if (datumtype == 'V')
1917 au32 = vtohl(au32);
a6ec74c1 1918#endif
08ca2aa3 1919 if (!checksum)
fc241834
RGS
1920 PUSHs(sv_2mortal(newSVuv((UV)au32)));
1921 else if (checksum > bits_in_uv)
1922 cdouble += (NV)au32;
1923 else
1924 cuv += au32;
a6ec74c1
JH
1925 }
1926 break;
7212898e 1927#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7
MHM
1928 case 'V' | TYPE_IS_SHRIEKING:
1929 case 'N' | TYPE_IS_SHRIEKING:
73cb7263 1930 while (len-- > 0) {
08ca2aa3
TH
1931 I32 ai32;
1932# if U32SIZE > SIZE32
1933 ai32 = 0;
1934# endif
f337b084 1935 SHIFT32(utf8, s, strend, &ai32, datumtype);
08ca2aa3 1936# ifdef HAS_NTOHL
73cb7263
NC
1937 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1938 ai32 = (I32)PerlSock_ntohl((U32)ai32);
08ca2aa3
TH
1939# endif
1940# ifdef HAS_VTOHL
73cb7263
NC
1941 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1942 ai32 = (I32)vtohl((U32)ai32);
08ca2aa3
TH
1943# endif
1944 if (!checksum)
ac7f3b1b 1945 PUSHs(sv_2mortal(newSViv((IV)ai32)));
73cb7263
NC
1946 else if (checksum > bits_in_uv)
1947 cdouble += (NV)ai32;
1948 else
1949 cuv += ai32;
068bd2e7
MHM
1950 }
1951 break;
08ca2aa3 1952#endif /* PERL_PACK_CAN_SHRIEKSIGN */
a6ec74c1 1953 case 'p':
a6ec74c1 1954 while (len-- > 0) {
f7fe979e 1955 const char *aptr;
f337b084 1956 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
07409e01 1957 DO_BO_UNPACK_PC(aptr);
c4c5f44a
NC
1958 /* newSVpv generates undef if aptr is NULL */
1959 PUSHs(sv_2mortal(newSVpv(aptr, 0)));
a6ec74c1
JH
1960 }
1961 break;
1962 case 'w':
a6ec74c1
JH
1963 {
1964 UV auv = 0;
1965 U32 bytes = 0;
fc241834 1966
08ca2aa3
TH
1967 while (len > 0 && s < strend) {
1968 U8 ch;
f337b084 1969 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3 1970 auv = (auv << 7) | (ch & 0x7f);
a6ec74c1 1971 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
08ca2aa3 1972 if (ch < 0x80) {
a6ec74c1 1973 bytes = 0;
ac7f3b1b 1974 PUSHs(sv_2mortal(newSVuv(auv)));
a6ec74c1
JH
1975 len--;
1976 auv = 0;
08ca2aa3 1977 continue;
a6ec74c1 1978 }
08ca2aa3 1979 if (++bytes >= sizeof(UV)) { /* promote to string */
10516c54 1980 const char *t;
a6ec74c1
JH
1981
1982 sv = Perl_newSVpvf(aTHX_ "%.*"UVf, (int)TYPE_DIGITS(UV), auv);
1983 while (s < strend) {
f337b084 1984 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3
TH
1985 sv = mul128(sv, (U8)(ch & 0x7f));
1986 if (!(ch & 0x80)) {
a6ec74c1
JH
1987 bytes = 0;
1988 break;
1989 }
1990 }
10516c54 1991 t = SvPV_nolen_const(sv);
a6ec74c1
JH
1992 while (*t == '0')
1993 t++;
1994 sv_chop(sv, t);
1995 PUSHs(sv_2mortal(sv));
1996 len--;
1997 auv = 0;
1998 }
1999 }
2000 if ((s >= strend) && bytes)
49704364 2001 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
a6ec74c1
JH
2002 }
2003 break;
2004 case 'P':
49704364
LW
2005 if (symptr->howlen == e_star)
2006 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
a6ec74c1 2007 EXTEND(SP, 1);
2d3e0934 2008 if (s + sizeof(char*) <= strend) {
08ca2aa3 2009 char *aptr;
f337b084 2010 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
07409e01 2011 DO_BO_UNPACK_PC(aptr);
fc241834
RGS
2012 /* newSVpvn generates undef if aptr is NULL */
2013 PUSHs(sv_2mortal(newSVpvn(aptr, len)));
08ca2aa3 2014 }
a6ec74c1
JH
2015 break;
2016#ifdef HAS_QUAD
2017 case 'q':
73cb7263 2018 while (len-- > 0) {
08ca2aa3 2019 Quad_t aquad;
f337b084
TH
2020 SHIFT_VAR(utf8, s, strend, aquad, datumtype);
2021 DO_BO_UNPACK(aquad, 64);
08ca2aa3
TH
2022 if (!checksum)
2023 PUSHs(sv_2mortal(aquad >= IV_MIN && aquad <= IV_MAX ?
ac7f3b1b 2024 newSViv((IV)aquad) : newSVnv((NV)aquad)));
73cb7263
NC
2025 else if (checksum > bits_in_uv)
2026 cdouble += (NV)aquad;
2027 else
2028 cuv += aquad;
2029 }
a6ec74c1
JH
2030 break;
2031 case 'Q':
73cb7263 2032 while (len-- > 0) {
08ca2aa3 2033 Uquad_t auquad;
f337b084
TH
2034 SHIFT_VAR(utf8, s, strend, auquad, datumtype);
2035 DO_BO_UNPACK(auquad, 64);
08ca2aa3
TH
2036 if (!checksum)
2037 PUSHs(sv_2mortal(auquad <= UV_MAX ?
2038 newSVuv((UV)auquad):newSVnv((NV)auquad)));
73cb7263
NC
2039 else if (checksum > bits_in_uv)
2040 cdouble += (NV)auquad;
2041 else
2042 cuv += auquad;
a6ec74c1
JH
2043 }
2044 break;
08ca2aa3 2045#endif /* HAS_QUAD */
a6ec74c1
JH
2046 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
2047 case 'f':
73cb7263 2048 while (len-- > 0) {
08ca2aa3 2049 float afloat;
f337b084 2050 SHIFT_VAR(utf8, s, strend, afloat, datumtype);
08ca2aa3
TH
2051 DO_BO_UNPACK_N(afloat, float);
2052 if (!checksum)
2053 PUSHs(sv_2mortal(newSVnv((NV)afloat)));
2054 else
73cb7263 2055 cdouble += afloat;
fc241834 2056 }
a6ec74c1
JH
2057 break;
2058 case 'd':
73cb7263 2059 while (len-- > 0) {
08ca2aa3 2060 double adouble;
f337b084 2061 SHIFT_VAR(utf8, s, strend, adouble, datumtype);
08ca2aa3
TH
2062 DO_BO_UNPACK_N(adouble, double);
2063 if (!checksum)
2064 PUSHs(sv_2mortal(newSVnv((NV)adouble)));
2065 else
73cb7263 2066 cdouble += adouble;
fc241834 2067 }
a6ec74c1 2068 break;
92d41999 2069 case 'F':
73cb7263 2070 while (len-- > 0) {
08ca2aa3 2071 NV anv;
f337b084 2072 SHIFT_VAR(utf8, s, strend, anv, datumtype);
08ca2aa3
TH
2073 DO_BO_UNPACK_N(anv, NV);
2074 if (!checksum)
2075 PUSHs(sv_2mortal(newSVnv(anv)));
2076 else
73cb7263 2077 cdouble += anv;
fc241834 2078 }
92d41999
JH
2079 break;
2080#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2081 case 'D':
73cb7263 2082 while (len-- > 0) {
08ca2aa3 2083 long double aldouble;
f337b084 2084 SHIFT_VAR(utf8, s, strend, aldouble, datumtype);
08ca2aa3
TH
2085 DO_BO_UNPACK_N(aldouble, long double);
2086 if (!checksum)
2087 PUSHs(sv_2mortal(newSVnv((NV)aldouble)));
2088 else
2089 cdouble += aldouble;
92d41999
JH
2090 }
2091 break;
2092#endif
a6ec74c1
JH
2093 case 'u':
2094 /* MKS:
2095 * Initialise the decode mapping. By using a table driven
2096 * algorithm, the code will be character-set independent
2097 * (and just as fast as doing character arithmetic)
2098 */
2099 if (PL_uudmap['M'] == 0) {
2d3e0934 2100 size_t i;
a6ec74c1 2101
2d3e0934 2102 for (i = 0; i < sizeof(PL_uuemap); ++i)
a6ec74c1
JH
2103 PL_uudmap[(U8)PL_uuemap[i]] = i;
2104 /*
2105 * Because ' ' and '`' map to the same value,
2106 * we need to decode them both the same.
2107 */
2108 PL_uudmap[' '] = 0;
2109 }
08ca2aa3 2110 {
f7fe979e 2111 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
561b68a9 2112 sv = sv_2mortal(newSV(l));
08ca2aa3
TH
2113 if (l) SvPOK_on(sv);
2114 }
2115 if (utf8) {
2116 while (next_uni_uu(aTHX_ &s, strend, &len)) {
2117 I32 a, b, c, d;
2118 char hunk[4];
2119
2120 hunk[3] = '\0';
2121 while (len > 0) {
2122 next_uni_uu(aTHX_ &s, strend, &a);
2123 next_uni_uu(aTHX_ &s, strend, &b);
2124 next_uni_uu(aTHX_ &s, strend, &c);
2125 next_uni_uu(aTHX_ &s, strend, &d);
2126 hunk[0] = (char)((a << 2) | (b >> 4));
2127 hunk[1] = (char)((b << 4) | (c >> 2));
2128 hunk[2] = (char)((c << 6) | d);
2129 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2130 len -= 3;
2131 }
2132 if (s < strend) {
f7fe979e
AL
2133 if (*s == '\n') {
2134 s++;
2135 }
08ca2aa3
TH
2136 else {
2137 /* possible checksum byte */
f7fe979e
AL
2138 const char *skip = s+UTF8SKIP(s);
2139 if (skip < strend && *skip == '\n')
2140 s = skip+1;
08ca2aa3
TH
2141 }
2142 }
2143 }
2144 } else {
fc241834
RGS
2145 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
2146 I32 a, b, c, d;
2147 char hunk[4];
a6ec74c1 2148
fc241834
RGS
2149 hunk[3] = '\0';
2150 len = PL_uudmap[*(U8*)s++] & 077;
2151 while (len > 0) {
2152 if (s < strend && ISUUCHAR(*s))
2153 a = PL_uudmap[*(U8*)s++] & 077;
2154 else
2155 a = 0;
2156 if (s < strend && ISUUCHAR(*s))
2157 b = PL_uudmap[*(U8*)s++] & 077;
2158 else
2159 b = 0;
2160 if (s < strend && ISUUCHAR(*s))
2161 c = PL_uudmap[*(U8*)s++] & 077;
2162 else
2163 c = 0;
2164 if (s < strend && ISUUCHAR(*s))
2165 d = PL_uudmap[*(U8*)s++] & 077;
2166 else
2167 d = 0;
2168 hunk[0] = (char)((a << 2) | (b >> 4));
2169 hunk[1] = (char)((b << 4) | (c >> 2));
2170 hunk[2] = (char)((c << 6) | d);
2171 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2172 len -= 3;
2173 }
2174 if (*s == '\n')
2175 s++;
2176 else /* possible checksum byte */
2177 if (s + 1 < strend && s[1] == '\n')
2178 s += 2;
a6ec74c1 2179 }
08ca2aa3
TH
2180 }
2181 XPUSHs(sv);
a6ec74c1
JH
2182 break;
2183 }
49704364 2184
a6ec74c1 2185 if (checksum) {
1109a392 2186 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
92d41999 2187 (checksum > bits_in_uv &&
08ca2aa3
TH
2188 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
2189 NV trouble, anv;
a6ec74c1 2190
08ca2aa3 2191 anv = (NV) (1 << (checksum & 15));
a6ec74c1
JH
2192 while (checksum >= 16) {
2193 checksum -= 16;
08ca2aa3 2194 anv *= 65536.0;
a6ec74c1 2195 }
a6ec74c1 2196 while (cdouble < 0.0)
08ca2aa3
TH
2197 cdouble += anv;
2198 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
c4c5f44a 2199 sv = newSVnv(cdouble);
a6ec74c1
JH
2200 }
2201 else {
fa8ec7c1
NC
2202 if (checksum < bits_in_uv) {
2203 UV mask = ((UV)1 << checksum) - 1;
92d41999 2204 cuv &= mask;
a6ec74c1 2205 }
c4c5f44a 2206 sv = newSVuv(cuv);
a6ec74c1
JH
2207 }
2208 XPUSHs(sv_2mortal(sv));
2209 checksum = 0;
2210 }
fc241834 2211
49704364
LW
2212 if (symptr->flags & FLAG_SLASH){
2213 if (SP - PL_stack_base - start_sp_offset <= 0)
2214 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
2215 if( next_symbol(symptr) ){
2216 if( symptr->howlen == e_number )
2217 Perl_croak(aTHX_ "Count after length/code in unpack" );
2218 if( beyond ){
2219 /* ...end of char buffer then no decent length available */
2220 Perl_croak(aTHX_ "length/code after end of string in unpack" );
2221 } else {
2222 /* take top of stack (hope it's numeric) */
2223 len = POPi;
2224 if( len < 0 )
2225 Perl_croak(aTHX_ "Negative '/' count in unpack" );
2226 }
2227 } else {
2228 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
2229 }
2230 datumtype = symptr->code;
21c16052 2231 explicit_length = FALSE;
49704364
LW
2232 goto redo_switch;
2233 }
a6ec74c1 2234 }
49704364 2235
18529408
IZ
2236 if (new_s)
2237 *new_s = s;
2238 PUTBACK;
2239 return SP - PL_stack_base - start_sp_offset;
2240}
2241
2242PP(pp_unpack)
2243{
97aff369 2244 dVAR;
18529408 2245 dSP;
bab9c0ac 2246 dPOPPOPssrl;
18529408
IZ
2247 I32 gimme = GIMME_V;
2248 STRLEN llen;
2249 STRLEN rlen;
5c144d81
NC
2250 const char *pat = SvPV_const(left, llen);
2251 const char *s = SvPV_const(right, rlen);
f7fe979e
AL
2252 const char *strend = s + rlen;
2253 const char *patend = pat + llen;
08ca2aa3 2254 I32 cnt;
18529408
IZ
2255
2256 PUTBACK;
7accc089 2257 cnt = unpackstring(pat, patend, s, strend,
49704364 2258 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
f337b084 2259 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
49704364 2260
18529408
IZ
2261 SPAGAIN;
2262 if ( !cnt && gimme == G_SCALAR )
2263 PUSHs(&PL_sv_undef);
a6ec74c1
JH
2264 RETURN;
2265}
2266
f337b084 2267STATIC U8 *
f7fe979e 2268doencodes(U8 *h, const char *s, I32 len)
a6ec74c1 2269{
f337b084 2270 *h++ = PL_uuemap[len];
a6ec74c1 2271 while (len > 2) {
f337b084
TH
2272 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2273 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
2274 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
2275 *h++ = PL_uuemap[(077 & (s[2] & 077))];
a6ec74c1
JH
2276 s += 3;
2277 len -= 3;
2278 }
2279 if (len > 0) {
f7fe979e 2280 const char r = (len > 1 ? s[1] : '\0');
f337b084
TH
2281 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2282 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
2283 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
2284 *h++ = PL_uuemap[0];
a6ec74c1 2285 }
f337b084
TH
2286 *h++ = '\n';
2287 return h;
a6ec74c1
JH
2288}
2289
2290STATIC SV *
f7fe979e 2291S_is_an_int(pTHX_ const char *s, STRLEN l)
a6ec74c1 2292{
8b6e33c7
AL
2293 SV *result = newSVpvn(s, l);
2294 char *const result_c = SvPV_nolen(result); /* convenience */
2295 char *out = result_c;
2296 bool skip = 1;
2297 bool ignore = 0;
a6ec74c1
JH
2298
2299 while (*s) {
2300 switch (*s) {
2301 case ' ':
2302 break;
2303 case '+':
2304 if (!skip) {
2305 SvREFCNT_dec(result);
2306 return (NULL);
2307 }
2308 break;
2309 case '0':
2310 case '1':
2311 case '2':
2312 case '3':
2313 case '4':
2314 case '5':
2315 case '6':
2316 case '7':
2317 case '8':
2318 case '9':
2319 skip = 0;
2320 if (!ignore) {
2321 *(out++) = *s;
2322 }
2323 break;
2324 case '.':
2325 ignore = 1;
2326 break;
2327 default:
2328 SvREFCNT_dec(result);
2329 return (NULL);
2330 }
2331 s++;
2332 }
2333 *(out++) = '\0';
2334 SvCUR_set(result, out - result_c);
2335 return (result);
2336}
2337
2338/* pnum must be '\0' terminated */
2339STATIC int
2340S_div128(pTHX_ SV *pnum, bool *done)
2341{
8b6e33c7
AL
2342 STRLEN len;
2343 char * const s = SvPV(pnum, len);
2344 char *t = s;
2345 int m = 0;
2346
2347 *done = 1;
2348 while (*t) {
2349 const int i = m * 10 + (*t - '0');
2350 const int r = (i >> 7); /* r < 10 */
2351 m = i & 0x7F;
2352 if (r) {
2353 *done = 0;
2354 }
2355 *(t++) = '0' + r;
a6ec74c1 2356 }
8b6e33c7
AL
2357 *(t++) = '\0';
2358 SvCUR_set(pnum, (STRLEN) (t - s));
2359 return (m);
a6ec74c1
JH
2360}
2361
18529408 2362/*
7accc089
JH
2363=for apidoc packlist
2364
2365The engine implementing pack() Perl function.
2366
bfce84ec
AL
2367=cut
2368*/
7accc089
JH
2369
2370void
f7fe979e 2371Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, register SV **beglist, SV **endlist )
7accc089 2372{
97aff369 2373 dVAR;
aadb217d
JH
2374 tempsym_t sym;
2375
f7fe979e 2376 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
49704364 2377
f337b084
TH
2378 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
2379 Also make sure any UTF8 flag is loaded */
56eb0262 2380 SvPV_force_nolen(cat);
bfce84ec
AL
2381 if (DO_UTF8(cat))
2382 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
f337b084 2383
49704364
LW
2384 (void)pack_rec( cat, &sym, beglist, endlist );
2385}
2386
f337b084
TH
2387/* like sv_utf8_upgrade, but also repoint the group start markers */
2388STATIC void
2389marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2390 STRLEN len;
2391 tempsym_t *group;
f7fe979e
AL
2392 const char *from_ptr, *from_start, *from_end, **marks, **m;
2393 char *to_start, *to_ptr;
f337b084
TH
2394
2395 if (SvUTF8(sv)) return;
2396
aa07b2f6 2397 from_start = SvPVX_const(sv);
f337b084
TH
2398 from_end = from_start + SvCUR(sv);
2399 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2400 if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
2401 if (from_ptr == from_end) {
2402 /* Simple case: no character needs to be changed */
2403 SvUTF8_on(sv);
2404 return;
2405 }
2406
3473cf63 2407 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
a02a5408 2408 Newx(to_start, len, char);
f337b084
TH
2409 Copy(from_start, to_start, from_ptr-from_start, char);
2410 to_ptr = to_start + (from_ptr-from_start);
2411
a02a5408 2412 Newx(marks, sym_ptr->level+2, const char *);
f337b084
TH
2413 for (group=sym_ptr; group; group = group->previous)
2414 marks[group->level] = from_start + group->strbeg;
2415 marks[sym_ptr->level+1] = from_end+1;
2416 for (m = marks; *m < from_ptr; m++)
2417 *m = to_start + (*m-from_start);
2418
2419 for (;from_ptr < from_end; from_ptr++) {
2420 while (*m == from_ptr) *m++ = to_ptr;
230e1fce 2421 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
f337b084
TH
2422 }
2423 *to_ptr = 0;
2424
2425 while (*m == from_ptr) *m++ = to_ptr;
2426 if (m != marks + sym_ptr->level+1) {
2427 Safefree(marks);
2428 Safefree(to_start);
2429 Perl_croak(aTHX_ "Assertion: marks beyond string end");
2430 }
2431 for (group=sym_ptr; group; group = group->previous)
2432 group->strbeg = marks[group->level] - to_start;
2433 Safefree(marks);
2434
2435 if (SvOOK(sv)) {
2436 if (SvIVX(sv)) {
b162af07 2437 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
f337b084
TH
2438 from_start -= SvIVX(sv);
2439 SvIV_set(sv, 0);
2440 }
2441 SvFLAGS(sv) &= ~SVf_OOK;
2442 }
2443 if (SvLEN(sv) != 0)
2444 Safefree(from_start);
f880fe2f 2445 SvPV_set(sv, to_start);
b162af07
SP
2446 SvCUR_set(sv, to_ptr - to_start);
2447 SvLEN_set(sv, len);
f337b084
TH
2448 SvUTF8_on(sv);
2449}
2450
2451/* Exponential string grower. Makes string extension effectively O(n)
2452 needed says how many extra bytes we need (not counting the final '\0')
2453 Only grows the string if there is an actual lack of space
2454*/
2455STATIC char *
0bd48802 2456S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
f7fe979e
AL
2457 const STRLEN cur = SvCUR(sv);
2458 const STRLEN len = SvLEN(sv);
f337b084
TH
2459 STRLEN extend;
2460 if (len - cur > needed) return SvPVX(sv);
2461 extend = needed > len ? needed : len;
2462 return SvGROW(sv, len+extend+1);
2463}
49704364
LW
2464
2465STATIC
2466SV **
f337b084 2467S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
49704364 2468{
97aff369 2469 dVAR;
49704364 2470 tempsym_t lookahead;
f337b084
TH
2471 I32 items = endlist - beglist;
2472 bool found = next_symbol(symptr);
2473 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
041457d9 2474 bool warn_utf8 = ckWARN(WARN_UTF8);
f337b084
TH
2475
2476 if (symptr->level == 0 && found && symptr->code == 'U') {
2477 marked_upgrade(aTHX_ cat, symptr);
2478 symptr->flags |= FLAG_DO_UTF8;
2479 utf8 = 0;
49704364 2480 }
f337b084 2481 symptr->strbeg = SvCUR(cat);
49704364
LW
2482
2483 while (found) {
f337b084
TH
2484 SV *fromstr;
2485 STRLEN fromlen;
2486 I32 len;
a0714e2c 2487 SV *lengthcode = NULL;
49704364 2488 I32 datumtype = symptr->code;
f337b084
TH
2489 howlen_t howlen = symptr->howlen;
2490 char *start = SvPVX(cat);
2491 char *cur = start + SvCUR(cat);
49704364 2492
f337b084
TH
2493#define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2494
2495 switch (howlen) {
fc241834 2496 case e_star:
f337b084
TH
2497 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2498 0 : items;
2499 break;
2500 default:
2501 /* e_no_len and e_number */
2502 len = symptr->length;
49704364
LW
2503 break;
2504 }
2505
f337b084 2506 if (len) {
a7a3cfaa 2507 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
f337b084 2508
a7a3cfaa
TH
2509 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2510 /* We can process this letter. */
2511 STRLEN size = props & PACK_SIZE_MASK;
2512 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2513 }
f337b084
TH
2514 }
2515
49704364
LW
2516 /* Look ahead for next symbol. Do we have code/code? */
2517 lookahead = *symptr;
2518 found = next_symbol(&lookahead);
246f24af
TH
2519 if (symptr->flags & FLAG_SLASH) {
2520 IV count;
f337b084 2521 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
246f24af
TH
2522 if (strchr("aAZ", lookahead.code)) {
2523 if (lookahead.howlen == e_number) count = lookahead.length;
2524 else {
ce399ba6
NC
2525 if (items > 0) {
2526 if (SvGAMAGIC(*beglist)) {
2527 /* Avoid reading the active data more than once
2528 by copying it to a temporary. */
2529 STRLEN len;
2530 const char *const pv = SvPV_const(*beglist, len);
2531 SV *const temp = sv_2mortal(newSVpvn(pv, len));
2532 if (SvUTF8(*beglist))
2533 SvUTF8_on(temp);
2534 *beglist = temp;
2535 }
246f24af
TH
2536 count = DO_UTF8(*beglist) ?
2537 sv_len_utf8(*beglist) : sv_len(*beglist);
ce399ba6 2538 }
246f24af
TH
2539 else count = 0;
2540 if (lookahead.code == 'Z') count++;
2541 }
2542 } else {
2543 if (lookahead.howlen == e_number && lookahead.length < items)
2544 count = lookahead.length;
2545 else count = items;
2546 }
2547 lookahead.howlen = e_number;
2548 lookahead.length = count;
2549 lengthcode = sv_2mortal(newSViv(count));
a6ec74c1 2550 }
49704364 2551
fc241834
RGS
2552 /* Code inside the switch must take care to properly update
2553 cat (CUR length and '\0' termination) if it updated *cur and
f337b084 2554 doesn't simply leave using break */
1109a392 2555 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 2556 default:
f337b084
TH
2557 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2558 (int) TYPE_NO_MODIFIERS(datumtype));
a6ec74c1 2559 case '%':
49704364 2560 Perl_croak(aTHX_ "'%%' may not be used in pack");
28be1210
TH
2561 {
2562 char *from;
2563#ifdef PERL_PACK_CAN_SHRIEKSIGN
2564 case '.' | TYPE_IS_SHRIEKING:
2565#endif
2566 case '.':
2567 if (howlen == e_star) from = start;
2568 else if (len == 0) from = cur;
2569 else {
2570 tempsym_t *group = symptr;
2571
2572 while (--len && group) group = group->previous;
2573 from = group ? start + group->strbeg : start;
2574 }
2575 fromstr = NEXTFROM;
2576 len = SvIV(fromstr);
2577 goto resize;
2578#ifdef PERL_PACK_CAN_SHRIEKSIGN
2579 case '@' | TYPE_IS_SHRIEKING:
2580#endif
a6ec74c1 2581 case '@':
28be1210
TH
2582 from = start + symptr->strbeg;
2583 resize:
2584#ifdef PERL_PACK_CAN_SHRIEKSIGN
2585 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
2586#else /* PERL_PACK_CAN_SHRIEKSIGN */
2587 if (utf8)
2588#endif
2589 if (len >= 0) {
2590 while (len && from < cur) {
2591 from += UTF8SKIP(from);
2592 len--;
2593 }
2594 if (from > cur)
2595 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2596 if (len) {
2597 /* Here we know from == cur */
2598 grow:
2599 GROWING(0, cat, start, cur, len);
2600 Zero(cur, len, char);
2601 cur += len;
2602 } else if (from < cur) {
2603 len = cur - from;
2604 goto shrink;
2605 } else goto no_change;
2606 } else {
2607 cur = from;
2608 len = -len;
2609 goto utf8_shrink;
f337b084 2610 }
28be1210
TH
2611 else {
2612 len -= cur - from;
f337b084 2613 if (len > 0) goto grow;
28be1210 2614 if (len == 0) goto no_change;
fc241834 2615 len = -len;
28be1210 2616 goto shrink;
f337b084 2617 }
a6ec74c1 2618 break;
28be1210 2619 }
fc241834 2620 case '(': {
49704364 2621 tempsym_t savsym = *symptr;
66c611c5
MHM
2622 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2623 symptr->flags |= group_modifiers;
49704364
LW
2624 symptr->patend = savsym.grpend;
2625 symptr->level++;
f337b084 2626 symptr->previous = &lookahead;
18529408 2627 while (len--) {
f337b084
TH
2628 U32 was_utf8;
2629 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2630 else symptr->flags &= ~FLAG_PARSE_UTF8;
2631 was_utf8 = SvUTF8(cat);
49704364 2632 symptr->patptr = savsym.grpbeg;
f337b084
TH
2633 beglist = pack_rec(cat, symptr, beglist, endlist);
2634 if (SvUTF8(cat) != was_utf8)
2635 /* This had better be an upgrade while in utf8==0 mode */
2636 utf8 = 1;
2637
49704364 2638 if (savsym.howlen == e_star && beglist == endlist)
18529408
IZ
2639 break; /* No way to continue */
2640 }
f337b084
TH
2641 lookahead.flags = symptr->flags & ~group_modifiers;
2642 goto no_change;
18529408 2643 }
62f95557
IZ
2644 case 'X' | TYPE_IS_SHRIEKING:
2645 if (!len) /* Avoid division by 0 */
2646 len = 1;
f337b084
TH
2647 if (utf8) {
2648 char *hop, *last;
2649 I32 l = len;
2650 hop = last = start;
2651 while (hop < cur) {
2652 hop += UTF8SKIP(hop);
2653 if (--l == 0) {
2654 last = hop;
2655 l = len;
2656 }
2657 }
2658 if (last > cur)
2659 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2660 cur = last;
2661 break;
2662 }
2663 len = (cur-start) % len;
62f95557 2664 /* FALL THROUGH */
a6ec74c1 2665 case 'X':
f337b084
TH
2666 if (utf8) {
2667 if (len < 1) goto no_change;
28be1210 2668 utf8_shrink:
f337b084
TH
2669 while (len > 0) {
2670 if (cur <= start)
28be1210
TH
2671 Perl_croak(aTHX_ "'%c' outside of string in pack",
2672 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2673 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2674 if (cur <= start)
28be1210
TH
2675 Perl_croak(aTHX_ "'%c' outside of string in pack",
2676 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2677 }
2678 len--;
2679 }
2680 } else {
fc241834 2681 shrink:
f337b084 2682 if (cur - start < len)
28be1210
TH
2683 Perl_croak(aTHX_ "'%c' outside of string in pack",
2684 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2685 cur -= len;
2686 }
2687 if (cur < start+symptr->strbeg) {
2688 /* Make sure group starts don't point into the void */
2689 tempsym_t *group;
9e27e96a 2690 const STRLEN length = cur-start;
f337b084
TH
2691 for (group = symptr;
2692 group && length < group->strbeg;
2693 group = group->previous) group->strbeg = length;
2694 lookahead.strbeg = length;
2695 }
a6ec74c1 2696 break;
fc241834
RGS
2697 case 'x' | TYPE_IS_SHRIEKING: {
2698 I32 ai32;
62f95557
IZ
2699 if (!len) /* Avoid division by 0 */
2700 len = 1;
230e1fce 2701 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
fc241834
RGS
2702 else ai32 = (cur - start) % len;
2703 if (ai32 == 0) goto no_change;
2704 len -= ai32;
2705 }
2706 /* FALL THROUGH */
a6ec74c1 2707 case 'x':
f337b084 2708 goto grow;
a6ec74c1
JH
2709 case 'A':
2710 case 'Z':
f337b084 2711 case 'a': {
f7fe979e 2712 const char *aptr;
f337b084 2713
a6ec74c1 2714 fromstr = NEXTFROM;
e62f0680 2715 aptr = SvPV_const(fromstr, fromlen);
f337b084 2716 if (DO_UTF8(fromstr)) {
f7fe979e 2717 const char *end, *s;
f337b084
TH
2718
2719 if (!utf8 && !SvUTF8(cat)) {
2720 marked_upgrade(aTHX_ cat, symptr);
2721 lookahead.flags |= FLAG_DO_UTF8;
2722 lookahead.strbeg = symptr->strbeg;
2723 utf8 = 1;
2724 start = SvPVX(cat);
2725 cur = start + SvCUR(cat);
2726 }
fc241834 2727 if (howlen == e_star) {
f337b084
TH
2728 if (utf8) goto string_copy;
2729 len = fromlen+1;
2730 }
2731 s = aptr;
2732 end = aptr + fromlen;
2733 fromlen = datumtype == 'Z' ? len-1 : len;
2734 while ((I32) fromlen > 0 && s < end) {
2735 s += UTF8SKIP(s);
2736 fromlen--;
2737 }
2738 if (s > end)
2739 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2740 if (utf8) {
fc241834 2741 len = fromlen;
f337b084
TH
2742 if (datumtype == 'Z') len++;
2743 fromlen = s-aptr;
2744 len += fromlen;
fc241834 2745
f337b084 2746 goto string_copy;
fc241834 2747 }
f337b084
TH
2748 fromlen = len - fromlen;
2749 if (datumtype == 'Z') fromlen--;
2750 if (howlen == e_star) {
2751 len = fromlen;
2752 if (datumtype == 'Z') len++;
fc241834 2753 }
f337b084 2754 GROWING(0, cat, start, cur, len);
fc241834 2755 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
f337b084
TH
2756 datumtype | TYPE_IS_PACK))
2757 Perl_croak(aTHX_ "Perl bug: predicted utf8 length not available");
2758 cur += fromlen;
a6ec74c1 2759 len -= fromlen;
f337b084
TH
2760 } else if (utf8) {
2761 if (howlen == e_star) {
2762 len = fromlen;
2763 if (datumtype == 'Z') len++;
a6ec74c1 2764 }
f337b084
TH
2765 if (len <= (I32) fromlen) {
2766 fromlen = len;
2767 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2768 }
fc241834 2769 /* assumes a byte expands to at most UTF8_EXPAND bytes on
3473cf63
RGS
2770 upgrade, so:
2771 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2772 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
f337b084
TH
2773 len -= fromlen;
2774 while (fromlen > 0) {
230e1fce 2775 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
f337b084
TH
2776 aptr++;
2777 fromlen--;
fc241834 2778 }
f337b084
TH
2779 } else {
2780 string_copy:
2781 if (howlen == e_star) {
2782 len = fromlen;
2783 if (datumtype == 'Z') len++;
2784 }
2785 if (len <= (I32) fromlen) {
2786 fromlen = len;
2787 if (datumtype == 'Z' && fromlen > 0) fromlen--;
a6ec74c1 2788 }
f337b084
TH
2789 GROWING(0, cat, start, cur, len);
2790 Copy(aptr, cur, fromlen, char);
2791 cur += fromlen;
2792 len -= fromlen;
a6ec74c1 2793 }
f337b084
TH
2794 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2795 cur += len;
a6ec74c1 2796 break;
f337b084 2797 }
a6ec74c1 2798 case 'B':
f337b084 2799 case 'b': {
b83604b4 2800 const char *str, *end;
f337b084
TH
2801 I32 l, field_len;
2802 U8 bits;
2803 bool utf8_source;
2804 U32 utf8_flags;
a6ec74c1 2805
fc241834 2806 fromstr = NEXTFROM;
b83604b4 2807 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2808 end = str + fromlen;
2809 if (DO_UTF8(fromstr)) {
2810 utf8_source = TRUE;
041457d9 2811 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2812 } else {
2813 utf8_source = FALSE;
2814 utf8_flags = 0; /* Unused, but keep compilers happy */
2815 }
2816 if (howlen == e_star) len = fromlen;
2817 field_len = (len+7)/8;
2818 GROWING(utf8, cat, start, cur, field_len);
2819 if (len > (I32)fromlen) len = fromlen;
2820 bits = 0;
2821 l = 0;
2822 if (datumtype == 'B')
2823 while (l++ < len) {
2824 if (utf8_source) {
95b63a38 2825 UV val = 0;
f337b084
TH
2826 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2827 bits |= val & 1;
2828 } else bits |= *str++ & 1;
2829 if (l & 7) bits <<= 1;
fc241834 2830 else {
f337b084
TH
2831 PUSH_BYTE(utf8, cur, bits);
2832 bits = 0;
a6ec74c1
JH
2833 }
2834 }
f337b084
TH
2835 else
2836 /* datumtype == 'b' */
2837 while (l++ < len) {
2838 if (utf8_source) {
95b63a38 2839 UV val = 0;
f337b084
TH
2840 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2841 if (val & 1) bits |= 0x80;
2842 } else if (*str++ & 1)
2843 bits |= 0x80;
2844 if (l & 7) bits >>= 1;
fc241834 2845 else {
f337b084
TH
2846 PUSH_BYTE(utf8, cur, bits);
2847 bits = 0;
a6ec74c1
JH
2848 }
2849 }
f337b084
TH
2850 l--;
2851 if (l & 7) {
fc241834 2852 if (datumtype == 'B')
f337b084 2853 bits <<= 7 - (l & 7);
fc241834 2854 else
f337b084
TH
2855 bits >>= 7 - (l & 7);
2856 PUSH_BYTE(utf8, cur, bits);
2857 l += 7;
a6ec74c1 2858 }
f337b084
TH
2859 /* Determine how many chars are left in the requested field */
2860 l /= 8;
2861 if (howlen == e_star) field_len = 0;
2862 else field_len -= l;
2863 Zero(cur, field_len, char);
2864 cur += field_len;
a6ec74c1 2865 break;
f337b084 2866 }
a6ec74c1 2867 case 'H':
f337b084 2868 case 'h': {
10516c54 2869 const char *str, *end;
f337b084
TH
2870 I32 l, field_len;
2871 U8 bits;
2872 bool utf8_source;
2873 U32 utf8_flags;
a6ec74c1 2874
fc241834 2875 fromstr = NEXTFROM;
10516c54 2876 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2877 end = str + fromlen;
2878 if (DO_UTF8(fromstr)) {
2879 utf8_source = TRUE;
041457d9 2880 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2881 } else {
2882 utf8_source = FALSE;
2883 utf8_flags = 0; /* Unused, but keep compilers happy */
2884 }
2885 if (howlen == e_star) len = fromlen;
2886 field_len = (len+1)/2;
2887 GROWING(utf8, cat, start, cur, field_len);
2888 if (!utf8 && len > (I32)fromlen) len = fromlen;
2889 bits = 0;
2890 l = 0;
2891 if (datumtype == 'H')
2892 while (l++ < len) {
2893 if (utf8_source) {
95b63a38 2894 UV val = 0;
f337b084
TH
2895 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2896 if (val < 256 && isALPHA(val))
2897 bits |= (val + 9) & 0xf;
a6ec74c1 2898 else
f337b084
TH
2899 bits |= val & 0xf;
2900 } else if (isALPHA(*str))
2901 bits |= (*str++ + 9) & 0xf;
2902 else
2903 bits |= *str++ & 0xf;
2904 if (l & 1) bits <<= 4;
fc241834 2905 else {
f337b084
TH
2906 PUSH_BYTE(utf8, cur, bits);
2907 bits = 0;
a6ec74c1
JH
2908 }
2909 }
f337b084
TH
2910 else
2911 while (l++ < len) {
2912 if (utf8_source) {
95b63a38 2913 UV val = 0;
f337b084
TH
2914 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2915 if (val < 256 && isALPHA(val))
2916 bits |= ((val + 9) & 0xf) << 4;
a6ec74c1 2917 else
f337b084
TH
2918 bits |= (val & 0xf) << 4;
2919 } else if (isALPHA(*str))
2920 bits |= ((*str++ + 9) & 0xf) << 4;
2921 else
2922 bits |= (*str++ & 0xf) << 4;
2923 if (l & 1) bits >>= 4;
fc241834 2924 else {
f337b084
TH
2925 PUSH_BYTE(utf8, cur, bits);
2926 bits = 0;
a6ec74c1 2927 }
fc241834 2928 }
f337b084
TH
2929 l--;
2930 if (l & 1) {
2931 PUSH_BYTE(utf8, cur, bits);
2932 l++;
2933 }
2934 /* Determine how many chars are left in the requested field */
2935 l /= 2;
2936 if (howlen == e_star) field_len = 0;
2937 else field_len -= l;
2938 Zero(cur, field_len, char);
2939 cur += field_len;
2940 break;
fc241834
RGS
2941 }
2942 case 'c':
f337b084
TH
2943 while (len-- > 0) {
2944 IV aiv;
2945 fromstr = NEXTFROM;
2946 aiv = SvIV(fromstr);
2947 if ((-128 > aiv || aiv > 127) &&
2948 ckWARN(WARN_PACK))
2949 Perl_warner(aTHX_ packWARN(WARN_PACK),
2950 "Character in 'c' format wrapped in pack");
585ec06d 2951 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
a6ec74c1
JH
2952 }
2953 break;
2954 case 'C':
f337b084
TH
2955 if (len == 0) {
2956 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2957 break;
2958 }
2959 GROWING(0, cat, start, cur, len);
a6ec74c1 2960 while (len-- > 0) {
f337b084 2961 IV aiv;
a6ec74c1 2962 fromstr = NEXTFROM;
f337b084
TH
2963 aiv = SvIV(fromstr);
2964 if ((0 > aiv || aiv > 0xff) &&
fc241834
RGS
2965 ckWARN(WARN_PACK))
2966 Perl_warner(aTHX_ packWARN(WARN_PACK),
2967 "Character in 'C' format wrapped in pack");
585ec06d 2968 *cur++ = (char)(aiv & 0xff);
f337b084 2969 }
fc241834
RGS
2970 break;
2971 case 'W': {
2972 char *end;
2973 U8 in_bytes = IN_BYTES;
2974
2975 end = start+SvLEN(cat)-1;
2976 if (utf8) end -= UTF8_MAXLEN-1;
2977 while (len-- > 0) {
2978 UV auv;
2979 fromstr = NEXTFROM;
2980 auv = SvUV(fromstr);
2981 if (in_bytes) auv = auv % 0x100;
2982 if (utf8) {
2983 W_utf8:
2984 if (cur > end) {
2985 *cur = '\0';
b162af07 2986 SvCUR_set(cat, cur - start);
fc241834
RGS
2987
2988 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2989 end = start+SvLEN(cat)-UTF8_MAXLEN;
2990 }
230e1fce
NC
2991 cur = (char *) uvuni_to_utf8_flags((U8 *) cur,
2992 NATIVE_TO_UNI(auv),
041457d9 2993 warn_utf8 ?
230e1fce 2994 0 : UNICODE_ALLOW_ANY);
fc241834
RGS
2995 } else {
2996 if (auv >= 0x100) {
2997 if (!SvUTF8(cat)) {
2998 *cur = '\0';
b162af07 2999 SvCUR_set(cat, cur - start);
fc241834
RGS
3000 marked_upgrade(aTHX_ cat, symptr);
3001 lookahead.flags |= FLAG_DO_UTF8;
3002 lookahead.strbeg = symptr->strbeg;
3003 utf8 = 1;
3004 start = SvPVX(cat);
3005 cur = start + SvCUR(cat);
3006 end = start+SvLEN(cat)-UTF8_MAXLEN;
3007 goto W_utf8;
3008 }
3009 if (ckWARN(WARN_PACK))
3010 Perl_warner(aTHX_ packWARN(WARN_PACK),
3011 "Character in 'W' format wrapped in pack");
3012 auv &= 0xff;
3013 }
3014 if (cur >= end) {
3015 *cur = '\0';
b162af07 3016 SvCUR_set(cat, cur - start);
fc241834
RGS
3017 GROWING(0, cat, start, cur, len+1);
3018 end = start+SvLEN(cat)-1;
3019 }
fe2774ed 3020 *(U8 *) cur++ = (U8)auv;
a6ec74c1
JH
3021 }
3022 }
3023 break;
fc241834
RGS
3024 }
3025 case 'U': {
3026 char *end;
3027
3028 if (len == 0) {
3029 if (!(symptr->flags & FLAG_DO_UTF8)) {
3030 marked_upgrade(aTHX_ cat, symptr);
3031 lookahead.flags |= FLAG_DO_UTF8;
3032 lookahead.strbeg = symptr->strbeg;
3033 }
3034 utf8 = 0;
3035 goto no_change;
3036 }
3037
3038 end = start+SvLEN(cat);
3039 if (!utf8) end -= UTF8_MAXLEN;
a6ec74c1 3040 while (len-- > 0) {
fc241834 3041 UV auv;
a6ec74c1 3042 fromstr = NEXTFROM;
fc241834
RGS
3043 auv = SvUV(fromstr);
3044 if (utf8) {
230e1fce 3045 U8 buffer[UTF8_MAXLEN], *endb;
fc241834 3046 endb = uvuni_to_utf8_flags(buffer, auv,
041457d9 3047 warn_utf8 ?
fc241834
RGS
3048 0 : UNICODE_ALLOW_ANY);
3049 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
3050 *cur = '\0';
b162af07 3051 SvCUR_set(cat, cur - start);
fc241834
RGS
3052 GROWING(0, cat, start, cur,
3053 len+(endb-buffer)*UTF8_EXPAND);
3054 end = start+SvLEN(cat);
3055 }
64844641 3056 cur = bytes_to_uni(buffer, endb-buffer, cur);
fc241834
RGS
3057 } else {
3058 if (cur >= end) {
3059 *cur = '\0';
b162af07 3060 SvCUR_set(cat, cur - start);
fc241834
RGS
3061 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3062 end = start+SvLEN(cat)-UTF8_MAXLEN;
3063 }
230e1fce 3064 cur = (char *) uvuni_to_utf8_flags((U8 *) cur, auv,
041457d9 3065 warn_utf8 ?
230e1fce 3066 0 : UNICODE_ALLOW_ANY);
fc241834 3067 }
a6ec74c1 3068 }
a6ec74c1 3069 break;
fc241834 3070 }
a6ec74c1
JH
3071 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
3072 case 'f':
a6ec74c1 3073 while (len-- > 0) {
f337b084
TH
3074 float afloat;
3075 NV anv;
a6ec74c1 3076 fromstr = NEXTFROM;
f337b084 3077 anv = SvNV(fromstr);
5cdb9e01 3078#ifdef __VOS__
f337b084 3079 /* VOS does not automatically map a floating-point overflow
fc241834
RGS
3080 during conversion from double to float into infinity, so we
3081 do it by hand. This code should either be generalized for
3082 any OS that needs it, or removed if and when VOS implements
3083 posix-976 (suggestion to support mapping to infinity).
3084 Paul.Green@stratus.com 02-04-02. */
f337b084 3085 if (anv > FLT_MAX)
fc241834 3086 afloat = _float_constants[0]; /* single prec. inf. */
f337b084 3087 else if (anv < -FLT_MAX)
fc241834 3088 afloat = _float_constants[0]; /* single prec. inf. */
f337b084
TH
3089 else afloat = (float) anv;
3090#else /* __VOS__ */
baf3cf9c 3091# if defined(VMS) && !defined(__IEEE_FP)
f337b084 3092 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
3093 * on Alpha; fake it if we don't have them.
3094 */
f337b084 3095 if (anv > FLT_MAX)
fc241834 3096 afloat = FLT_MAX;
f337b084 3097 else if (anv < -FLT_MAX)
fc241834 3098 afloat = -FLT_MAX;
f337b084 3099 else afloat = (float)anv;
baf3cf9c 3100# else
f337b084 3101 afloat = (float)anv;
baf3cf9c 3102# endif
f337b084 3103#endif /* __VOS__ */
1109a392 3104 DO_BO_PACK_N(afloat, float);
f337b084 3105 PUSH_VAR(utf8, cur, afloat);
a6ec74c1
JH
3106 }
3107 break;
3108 case 'd':
a6ec74c1 3109 while (len-- > 0) {
f337b084
TH
3110 double adouble;
3111 NV anv;
a6ec74c1 3112 fromstr = NEXTFROM;
f337b084 3113 anv = SvNV(fromstr);
5cdb9e01 3114#ifdef __VOS__
f337b084 3115 /* VOS does not automatically map a floating-point overflow
fc241834
RGS
3116 during conversion from long double to double into infinity,
3117 so we do it by hand. This code should either be generalized
3118 for any OS that needs it, or removed if and when VOS
3119 implements posix-976 (suggestion to support mapping to
3120 infinity). Paul.Green@stratus.com 02-04-02. */
f337b084 3121 if (anv > DBL_MAX)
fc241834 3122 adouble = _double_constants[0]; /* double prec. inf. */
f337b084 3123 else if (anv < -DBL_MAX)
fc241834 3124 adouble = _double_constants[0]; /* double prec. inf. */
f337b084
TH
3125 else adouble = (double) anv;
3126#else /* __VOS__ */
baf3cf9c 3127# if defined(VMS) && !defined(__IEEE_FP)
f337b084 3128 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
3129 * on Alpha; fake it if we don't have them.
3130 */
f337b084 3131 if (anv > DBL_MAX)
fc241834 3132 adouble = DBL_MAX;
f337b084 3133 else if (anv < -DBL_MAX)
fc241834 3134 adouble = -DBL_MAX;
f337b084 3135 else adouble = (double)anv;
baf3cf9c 3136# else
f337b084 3137 adouble = (double)anv;
baf3cf9c 3138# endif
f337b084 3139#endif /* __VOS__ */
1109a392 3140 DO_BO_PACK_N(adouble, double);
f337b084 3141 PUSH_VAR(utf8, cur, adouble);
a6ec74c1
JH
3142 }
3143 break;
fc241834
RGS
3144 case 'F': {
3145 NV anv;
1109a392 3146 Zero(&anv, 1, NV); /* can be long double with unused bits */
92d41999
JH
3147 while (len-- > 0) {
3148 fromstr = NEXTFROM;
3149 anv = SvNV(fromstr);
1109a392 3150 DO_BO_PACK_N(anv, NV);
fc241834 3151 PUSH_VAR(utf8, cur, anv);
92d41999
JH
3152 }
3153 break;
fc241834 3154 }
92d41999 3155#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
fc241834
RGS
3156 case 'D': {
3157 long double aldouble;
1109a392
MHM
3158 /* long doubles can have unused bits, which may be nonzero */
3159 Zero(&aldouble, 1, long double);
92d41999
JH
3160 while (len-- > 0) {
3161 fromstr = NEXTFROM;
3162 aldouble = (long double)SvNV(fromstr);
1109a392 3163 DO_BO_PACK_N(aldouble, long double);
fc241834 3164 PUSH_VAR(utf8, cur, aldouble);
92d41999
JH
3165 }
3166 break;
fc241834 3167 }
92d41999 3168#endif
7212898e 3169#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3170 case 'n' | TYPE_IS_SHRIEKING:
7212898e 3171#endif
a6ec74c1
JH
3172 case 'n':
3173 while (len-- > 0) {
f337b084 3174 I16 ai16;
a6ec74c1 3175 fromstr = NEXTFROM;
ef108786 3176 ai16 = (I16)SvIV(fromstr);
a6ec74c1 3177#ifdef HAS_HTONS
ef108786 3178 ai16 = PerlSock_htons(ai16);
a6ec74c1 3179#endif
f337b084 3180 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3181 }
3182 break;
7212898e 3183#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3184 case 'v' | TYPE_IS_SHRIEKING:
7212898e 3185#endif
a6ec74c1
JH
3186 case 'v':
3187 while (len-- > 0) {
f337b084 3188 I16 ai16;
a6ec74c1 3189 fromstr = NEXTFROM;
ef108786 3190 ai16 = (I16)SvIV(fromstr);
a6ec74c1 3191#ifdef HAS_HTOVS
ef108786 3192 ai16 = htovs(ai16);
a6ec74c1 3193#endif
f337b084 3194 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3195 }
3196 break;
49704364 3197 case 'S' | TYPE_IS_SHRIEKING:
a6ec74c1 3198#if SHORTSIZE != SIZE16
fc241834 3199 while (len-- > 0) {
f337b084 3200 unsigned short aushort;
fc241834
RGS
3201 fromstr = NEXTFROM;
3202 aushort = SvUV(fromstr);
3203 DO_BO_PACK(aushort, s);
f337b084 3204 PUSH_VAR(utf8, cur, aushort);
fc241834 3205 }
49704364
LW
3206 break;
3207#else
3208 /* Fall through! */
a6ec74c1 3209#endif
49704364 3210 case 'S':
fc241834 3211 while (len-- > 0) {
f337b084 3212 U16 au16;
fc241834
RGS
3213 fromstr = NEXTFROM;
3214 au16 = (U16)SvUV(fromstr);
3215 DO_BO_PACK(au16, 16);
f337b084 3216 PUSH16(utf8, cur, &au16);
a6ec74c1
JH
3217 }
3218 break;
49704364 3219 case 's' | TYPE_IS_SHRIEKING:
a6ec74c1 3220#if SHORTSIZE != SIZE16
fc241834 3221 while (len-- > 0) {
f337b084 3222 short ashort;
fc241834
RGS
3223 fromstr = NEXTFROM;
3224 ashort = SvIV(fromstr);
3225 DO_BO_PACK(ashort, s);
f337b084 3226 PUSH_VAR(utf8, cur, ashort);
a6ec74c1 3227 }
49704364
LW
3228 break;
3229#else
3230 /* Fall through! */
a6ec74c1 3231#endif
49704364
LW
3232 case 's':
3233 while (len-- > 0) {
f337b084 3234 I16 ai16;
49704364 3235 fromstr = NEXTFROM;
ef108786
MHM
3236 ai16 = (I16)SvIV(fromstr);
3237 DO_BO_PACK(ai16, 16);
f337b084 3238 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3239 }
3240 break;
3241 case 'I':
49704364 3242 case 'I' | TYPE_IS_SHRIEKING:
a6ec74c1 3243 while (len-- > 0) {
f337b084 3244 unsigned int auint;
a6ec74c1
JH
3245 fromstr = NEXTFROM;
3246 auint = SvUV(fromstr);
1109a392 3247 DO_BO_PACK(auint, i);
f337b084 3248 PUSH_VAR(utf8, cur, auint);
a6ec74c1
JH
3249 }
3250 break;
92d41999
JH
3251 case 'j':
3252 while (len-- > 0) {
f337b084 3253 IV aiv;
92d41999
JH
3254 fromstr = NEXTFROM;
3255 aiv = SvIV(fromstr);
1109a392
MHM
3256#if IVSIZE == INTSIZE
3257 DO_BO_PACK(aiv, i);
3258#elif IVSIZE == LONGSIZE
3259 DO_BO_PACK(aiv, l);
3260#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
3261 DO_BO_PACK(aiv, 64);
f337b084
TH
3262#else
3263 Perl_croak(aTHX_ "'j' not supported on this platform");
1109a392 3264#endif
f337b084 3265 PUSH_VAR(utf8, cur, aiv);
92d41999
JH
3266 }
3267 break;
3268 case 'J':
3269 while (len-- > 0) {
f337b084 3270 UV auv;
92d41999
JH
3271 fromstr = NEXTFROM;
3272 auv = SvUV(fromstr);
1109a392
MHM
3273#if UVSIZE == INTSIZE
3274 DO_BO_PACK(auv, i);
3275#elif UVSIZE == LONGSIZE
3276 DO_BO_PACK(auv, l);
3277#elif defined(HAS_QUAD) && UVSIZE == U64SIZE
3278 DO_BO_PACK(auv, 64);
f337b084
TH
3279#else
3280 Perl_croak(aTHX_ "'J' not supported on this platform");
1109a392 3281#endif
f337b084 3282 PUSH_VAR(utf8, cur, auv);
92d41999
JH
3283 }
3284 break;
a6ec74c1
JH
3285 case 'w':
3286 while (len-- > 0) {
f337b084 3287 NV anv;
a6ec74c1 3288 fromstr = NEXTFROM;
15e9f109 3289 anv = SvNV(fromstr);
a6ec74c1 3290
f337b084
TH
3291 if (anv < 0) {
3292 *cur = '\0';
b162af07 3293 SvCUR_set(cat, cur - start);
49704364 3294 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
f337b084 3295 }
a6ec74c1 3296
196b62db
NC
3297 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
3298 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
3299 any negative IVs will have already been got by the croak()
3300 above. IOK is untrue for fractions, so we test them
3301 against UV_MAX_P1. */
f337b084
TH
3302 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
3303 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
a6ec74c1 3304 char *in = buf + sizeof(buf);
196b62db 3305 UV auv = SvUV(fromstr);
a6ec74c1
JH
3306
3307 do {
eb160463 3308 *--in = (char)((auv & 0x7f) | 0x80);
a6ec74c1
JH
3309 auv >>= 7;
3310 } while (auv);
3311 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
3312 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3313 in, (buf + sizeof(buf)) - in);
3314 } else if (SvPOKp(fromstr))
3315 goto w_string;
a6ec74c1 3316 else if (SvNOKp(fromstr)) {
0258719b
NC
3317 /* 10**NV_MAX_10_EXP is the largest power of 10
3318 so 10**(NV_MAX_10_EXP+1) is definately unrepresentable
3319 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
3320 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
3321 And with that many bytes only Inf can overflow.
8f8d40ab
PG
3322 Some C compilers are strict about integral constant
3323 expressions so we conservatively divide by a slightly
3324 smaller integer instead of multiplying by the exact
3325 floating-point value.
0258719b
NC
3326 */
3327#ifdef NV_MAX_10_EXP
f337b084 3328 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
8f8d40ab 3329 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
0258719b 3330#else
f337b084 3331 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
8f8d40ab 3332 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
0258719b 3333#endif
a6ec74c1
JH
3334 char *in = buf + sizeof(buf);
3335
8b6e33c7 3336 anv = Perl_floor(anv);
a6ec74c1 3337 do {
8b6e33c7 3338 const NV next = Perl_floor(anv / 128);
a6ec74c1 3339 if (in <= buf) /* this cannot happen ;-) */
49704364 3340 Perl_croak(aTHX_ "Cannot compress integer in pack");
0258719b 3341 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
15e9f109
NC
3342 anv = next;
3343 } while (anv > 0);
a6ec74c1 3344 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
3345 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3346 in, (buf + sizeof(buf)) - in);
3347 } else {
8b6e33c7
AL
3348 const char *from;
3349 char *result, *in;
735b914b
JH
3350 SV *norm;
3351 STRLEN len;
3352 bool done;
3353
f337b084 3354 w_string:
735b914b 3355 /* Copy string and check for compliance */
349d4f2f 3356 from = SvPV_const(fromstr, len);
735b914b 3357 if ((norm = is_an_int(from, len)) == NULL)
49704364 3358 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
735b914b 3359
a02a5408 3360 Newx(result, len, char);
735b914b
JH
3361 in = result + len;
3362 done = FALSE;
f337b084 3363 while (!done) *--in = div128(norm, &done) | 0x80;
735b914b 3364 result[len - 1] &= 0x7F; /* clear continue bit */
f337b084
TH
3365 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3366 in, (result + len) - in);
735b914b
JH
3367 Safefree(result);
3368 SvREFCNT_dec(norm); /* free norm */
fc241834 3369 }
a6ec74c1
JH
3370 }
3371 break;
3372 case 'i':
49704364 3373 case 'i' | TYPE_IS_SHRIEKING:
a6ec74c1 3374 while (len-- > 0) {
f337b084 3375 int aint;
a6ec74c1
JH
3376 fromstr = NEXTFROM;
3377 aint = SvIV(fromstr);
1109a392 3378 DO_BO_PACK(aint, i);
f337b084 3379 PUSH_VAR(utf8, cur, aint);
a6ec74c1
JH
3380 }
3381 break;
7212898e 3382#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3383 case 'N' | TYPE_IS_SHRIEKING:
7212898e 3384#endif
a6ec74c1
JH
3385 case 'N':
3386 while (len-- > 0) {
f337b084 3387 U32 au32;
a6ec74c1 3388 fromstr = NEXTFROM;
ef108786 3389 au32 = SvUV(fromstr);
a6ec74c1 3390#ifdef HAS_HTONL
ef108786 3391 au32 = PerlSock_htonl(au32);
a6ec74c1 3392#endif
f337b084 3393 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3394 }
3395 break;
7212898e 3396#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3397 case 'V' | TYPE_IS_SHRIEKING:
7212898e 3398#endif
a6ec74c1
JH
3399 case 'V':
3400 while (len-- > 0) {
f337b084 3401 U32 au32;
a6ec74c1 3402 fromstr = NEXTFROM;
ef108786 3403 au32 = SvUV(fromstr);
a6ec74c1 3404#ifdef HAS_HTOVL
ef108786 3405 au32 = htovl(au32);
a6ec74c1 3406#endif
f337b084 3407 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3408 }
3409 break;
49704364 3410 case 'L' | TYPE_IS_SHRIEKING:
a6ec74c1 3411#if LONGSIZE != SIZE32
fc241834 3412 while (len-- > 0) {
f337b084 3413 unsigned long aulong;
fc241834
RGS
3414 fromstr = NEXTFROM;
3415 aulong = SvUV(fromstr);
3416 DO_BO_PACK(aulong, l);
f337b084 3417 PUSH_VAR(utf8, cur, aulong);
a6ec74c1 3418 }
49704364
LW
3419 break;
3420#else
3421 /* Fall though! */
a6ec74c1 3422#endif
49704364 3423 case 'L':
fc241834 3424 while (len-- > 0) {
f337b084 3425 U32 au32;
fc241834
RGS
3426 fromstr = NEXTFROM;
3427 au32 = SvUV(fromstr);
3428 DO_BO_PACK(au32, 32);
f337b084 3429 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3430 }
3431 break;
49704364 3432 case 'l' | TYPE_IS_SHRIEKING:
a6ec74c1 3433#if LONGSIZE != SIZE32
fc241834 3434 while (len-- > 0) {
f337b084 3435 long along;
fc241834
RGS
3436 fromstr = NEXTFROM;
3437 along = SvIV(fromstr);
3438 DO_BO_PACK(along, l);
f337b084 3439 PUSH_VAR(utf8, cur, along);
a6ec74c1 3440 }
49704364
LW
3441 break;
3442#else
3443 /* Fall though! */
a6ec74c1 3444#endif
49704364
LW
3445 case 'l':
3446 while (len-- > 0) {
f337b084 3447 I32 ai32;
49704364 3448 fromstr = NEXTFROM;
ef108786
MHM
3449 ai32 = SvIV(fromstr);
3450 DO_BO_PACK(ai32, 32);
f337b084 3451 PUSH32(utf8, cur, &ai32);
a6ec74c1
JH
3452 }
3453 break;
3454#ifdef HAS_QUAD
3455 case 'Q':
3456 while (len-- > 0) {
f337b084 3457 Uquad_t auquad;
a6ec74c1 3458 fromstr = NEXTFROM;
f337b084 3459 auquad = (Uquad_t) SvUV(fromstr);
1109a392 3460 DO_BO_PACK(auquad, 64);
f337b084 3461 PUSH_VAR(utf8, cur, auquad);
a6ec74c1
JH
3462 }
3463 break;
3464 case 'q':
3465 while (len-- > 0) {
f337b084 3466 Quad_t aquad;
a6ec74c1
JH
3467 fromstr = NEXTFROM;
3468 aquad = (Quad_t)SvIV(fromstr);
1109a392 3469 DO_BO_PACK(aquad, 64);
f337b084 3470 PUSH_VAR(utf8, cur, aquad);
a6ec74c1
JH
3471 }
3472 break;
f337b084 3473#endif /* HAS_QUAD */
a6ec74c1
JH
3474 case 'P':
3475 len = 1; /* assume SV is correct length */
f337b084 3476 GROWING(utf8, cat, start, cur, sizeof(char *));
49704364 3477 /* Fall through! */
a6ec74c1
JH
3478 case 'p':
3479 while (len-- > 0) {
83003860 3480 const char *aptr;
f337b084 3481
a6ec74c1 3482 fromstr = NEXTFROM;
28a4f200 3483 SvGETMAGIC(fromstr);
3484 if (!SvOK(fromstr)) aptr = NULL;
a6ec74c1 3485 else {
a6ec74c1
JH
3486 /* XXX better yet, could spirit away the string to
3487 * a safe spot and hang on to it until the result
3488 * of pack() (and all copies of the result) are
3489 * gone.
3490 */
041457d9
DM
3491 if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
3492 !SvREADONLY(fromstr))) && ckWARN(WARN_PACK)) {
9014280d 3493 Perl_warner(aTHX_ packWARN(WARN_PACK),
fc241834 3494 "Attempt to pack pointer to temporary value");
a6ec74c1
JH
3495 }
3496 if (SvPOK(fromstr) || SvNIOK(fromstr))
2596d9fe 3497 aptr = SvPV_nomg_const_nolen(fromstr);
a6ec74c1 3498 else
2596d9fe 3499 aptr = SvPV_force_flags_nolen(fromstr, 0);
a6ec74c1 3500 }
07409e01 3501 DO_BO_PACK_PC(aptr);
f337b084 3502 PUSH_VAR(utf8, cur, aptr);
a6ec74c1
JH
3503 }
3504 break;
fc241834 3505 case 'u': {
f7fe979e 3506 const char *aptr, *aend;
fc241834 3507 bool from_utf8;
f337b084 3508
a6ec74c1 3509 fromstr = NEXTFROM;
fc241834
RGS
3510 if (len <= 2) len = 45;
3511 else len = len / 3 * 3;
3512 if (len >= 64) {
0fdc08b6
RGS
3513 if (ckWARN(WARN_PACK))
3514 Perl_warner(aTHX_ packWARN(WARN_PACK),
fc241834
RGS
3515 "Field too wide in 'u' format in pack");
3516 len = 63;
3517 }
83003860 3518 aptr = SvPV_const(fromstr, fromlen);
fc241834
RGS
3519 from_utf8 = DO_UTF8(fromstr);
3520 if (from_utf8) {
3521 aend = aptr + fromlen;
3522 fromlen = sv_len_utf8(fromstr);
3523 } else aend = NULL; /* Unused, but keep compilers happy */
3524 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/l