This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
One part of pp_pack couldn't correctly handle surprises from UTF-8
[perl5.git] / pp_pack.c
CommitLineData
a6ec74c1
JH
1/* pp_pack.c
2 *
4bb101f2 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
acde74e1 4 * 2000, 2001, 2002, 2003, 2004, 2005, 2006, by Larry Wall and others
a6ec74c1
JH
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
9 */
10
d31a8517
AT
11/*
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
16 * some salt.
17 */
18
166f8a29
DM
19/* This file contains pp ("push/pop") functions that
20 * execute the opcodes that make up a perl program. A typical pp function
21 * expects to find its arguments on the stack, and usually pushes its
22 * results onto the stack, hence the 'pp' terminology. Each OP structure
23 * contains a pointer to the relevant pp_foo() function.
24 *
25 * This particular file just contains pp_pack() and pp_unpack(). See the
26 * other pp*.c files for the rest of the pp_ functions.
27 */
28
29
a6ec74c1
JH
30#include "EXTERN.h"
31#define PERL_IN_PP_PACK_C
32#include "perl.h"
33
f7fe979e
AL
34/* Types used by pack/unpack */
35typedef enum {
36 e_no_len, /* no length */
37 e_number, /* number, [] */
38 e_star /* asterisk */
39} howlen_t;
40
41typedef struct tempsym {
42 const char* patptr; /* current template char */
43 const char* patend; /* one after last char */
44 const char* grpbeg; /* 1st char of ()-group */
45 const char* grpend; /* end of ()-group */
46 I32 code; /* template code (!<>) */
47 I32 length; /* length/repeat count */
48 howlen_t howlen; /* how length is given */
49 int level; /* () nesting level */
50 U32 flags; /* /=4, comma=2, pack=1 */
51 /* and group modifiers */
52 STRLEN strbeg; /* offset of group start */
53 struct tempsym *previous; /* previous group */
54} tempsym_t;
55
56#define TEMPSYM_INIT(symptr, p, e, f) \
57 STMT_START { \
58 (symptr)->patptr = (p); \
59 (symptr)->patend = (e); \
60 (symptr)->grpbeg = NULL; \
61 (symptr)->grpend = NULL; \
62 (symptr)->grpend = NULL; \
63 (symptr)->code = 0; \
64 (symptr)->length = 0; \
65 (symptr)->howlen = 0; \
66 (symptr)->level = 0; \
67 (symptr)->flags = (f); \
68 (symptr)->strbeg = 0; \
69 (symptr)->previous = NULL; \
70 } STMT_END
71
7212898e 72#if PERL_VERSION >= 9
f337b084
TH
73# define PERL_PACK_CAN_BYTEORDER
74# define PERL_PACK_CAN_SHRIEKSIGN
75#endif
76
77#ifndef CHAR_BIT
78# define CHAR_BIT 8
7212898e 79#endif
3473cf63
RGS
80/* Maximum number of bytes to which a byte can grow due to upgrade */
81#define UTF8_EXPAND 2
7212898e 82
a6ec74c1 83/*
a6ec74c1
JH
84 * Offset for integer pack/unpack.
85 *
86 * On architectures where I16 and I32 aren't really 16 and 32 bits,
87 * which for now are all Crays, pack and unpack have to play games.
88 */
89
90/*
91 * These values are required for portability of pack() output.
92 * If they're not right on your machine, then pack() and unpack()
93 * wouldn't work right anyway; you'll need to apply the Cray hack.
94 * (I'd like to check them with #if, but you can't use sizeof() in
95 * the preprocessor.) --???
96 */
97/*
98 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
99 defines are now in config.h. --Andy Dougherty April 1998
100 */
101#define SIZE16 2
102#define SIZE32 4
103
104/* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
105 --jhi Feb 1999 */
106
1109a392
MHM
107#if U16SIZE > SIZE16 || U32SIZE > SIZE32
108# if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
08ca2aa3
TH
109# define OFF16(p) ((char*)(p))
110# define OFF32(p) ((char*)(p))
a6ec74c1 111# else
1109a392 112# if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
a6ec74c1
JH
113# define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
114# define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
115# else
08ca2aa3 116 ++++ bad cray byte order
a6ec74c1
JH
117# endif
118# endif
a6ec74c1 119#else
08ca2aa3
TH
120# define OFF16(p) ((char *) (p))
121# define OFF32(p) ((char *) (p))
a6ec74c1
JH
122#endif
123
f337b084
TH
124/* Only to be used inside a loop (see the break) */
125#define SHIFT16(utf8, s, strend, p, datumtype) STMT_START { \
126 if (utf8) { \
127 if (!uni_to_bytes(aTHX_ &(s), strend, OFF16(p), SIZE16, datumtype)) break; \
128 } else { \
129 Copy(s, OFF16(p), SIZE16, char); \
130 (s) += SIZE16; \
131 } \
132} STMT_END
133
134/* Only to be used inside a loop (see the break) */
135#define SHIFT32(utf8, s, strend, p, datumtype) STMT_START { \
136 if (utf8) { \
137 if (!uni_to_bytes(aTHX_ &(s), strend, OFF32(p), SIZE32, datumtype)) break; \
138 } else { \
139 Copy(s, OFF32(p), SIZE32, char); \
140 (s) += SIZE32; \
141 } \
142} STMT_END
143
144#define PUSH16(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF16(p), SIZE16)
145#define PUSH32(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF32(p), SIZE32)
08ca2aa3
TH
146
147/* Only to be used inside a loop (see the break) */
f337b084 148#define SHIFT_VAR(utf8, s, strend, var, datumtype) \
08ca2aa3
TH
149STMT_START { \
150 if (utf8) { \
f337b084
TH
151 if (!uni_to_bytes(aTHX_ &s, strend, \
152 (char *) &var, sizeof(var), datumtype)) break;\
08ca2aa3
TH
153 } else { \
154 Copy(s, (char *) &var, sizeof(var), char); \
155 s += sizeof(var); \
156 } \
08ca2aa3
TH
157} STMT_END
158
f337b084 159#define PUSH_VAR(utf8, aptr, var) \
230e1fce 160 PUSH_BYTES(utf8, aptr, &(var), sizeof(var))
f337b084 161
49704364
LW
162/* Avoid stack overflow due to pathological templates. 100 should be plenty. */
163#define MAX_SUB_TEMPLATE_LEVEL 100
164
66c611c5 165/* flags (note that type modifiers can also be used as flags!) */
f337b084
TH
166#define FLAG_WAS_UTF8 0x40
167#define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
49704364 168#define FLAG_UNPACK_ONLY_ONE 0x10
f337b084 169#define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
49704364
LW
170#define FLAG_SLASH 0x04
171#define FLAG_COMMA 0x02
172#define FLAG_PACK 0x01
173
a6ec74c1
JH
174STATIC SV *
175S_mul128(pTHX_ SV *sv, U8 m)
176{
177 STRLEN len;
178 char *s = SvPV(sv, len);
179 char *t;
a6ec74c1
JH
180
181 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
396482e1 182 SV * const tmpNew = newSVpvs("0000000000");
a6ec74c1
JH
183
184 sv_catsv(tmpNew, sv);
185 SvREFCNT_dec(sv); /* free old sv */
186 sv = tmpNew;
187 s = SvPV(sv, len);
188 }
189 t = s + len - 1;
190 while (!*t) /* trailing '\0'? */
191 t--;
192 while (t > s) {
f7fe979e 193 const U32 i = ((*t - '0') << 7) + m;
eb160463
GS
194 *(t--) = '0' + (char)(i % 10);
195 m = (char)(i / 10);
a6ec74c1
JH
196 }
197 return (sv);
198}
199
200/* Explosives and implosives. */
201
202#if 'I' == 73 && 'J' == 74
203/* On an ASCII/ISO kind of system */
204#define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
205#else
206/*
207 Some other sort of character set - use memchr() so we don't match
208 the null byte.
209 */
210#define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
211#endif
212
66c611c5 213/* type modifiers */
62f95557 214#define TYPE_IS_SHRIEKING 0x100
1109a392
MHM
215#define TYPE_IS_BIG_ENDIAN 0x200
216#define TYPE_IS_LITTLE_ENDIAN 0x400
f337b084 217#define TYPE_IS_PACK 0x800
1109a392 218#define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
66c611c5 219#define TYPE_MODIFIERS(t) ((t) & ~0xFF)
1109a392
MHM
220#define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
221
7212898e 222#ifdef PERL_PACK_CAN_SHRIEKSIGN
28be1210 223# define SHRIEKING_ALLOWED_TYPES "sSiIlLxXnNvV@."
7212898e 224#else
28be1210 225# define SHRIEKING_ALLOWED_TYPES "sSiIlLxX"
7212898e
NC
226#endif
227
228#ifndef PERL_PACK_CAN_BYTEORDER
229/* Put "can't" first because it is shorter */
230# define TYPE_ENDIANNESS(t) 0
231# define TYPE_NO_ENDIANNESS(t) (t)
232
233# define ENDIANNESS_ALLOWED_TYPES ""
234
235# define DO_BO_UNPACK(var, type)
236# define DO_BO_PACK(var, type)
07409e01
NC
237# define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast)
238# define DO_BO_PACK_PTR(var, type, pre_cast, post_cast)
7212898e
NC
239# define DO_BO_UNPACK_N(var, type)
240# define DO_BO_PACK_N(var, type)
241# define DO_BO_UNPACK_P(var)
242# define DO_BO_PACK_P(var)
45f723e1
NC
243# define DO_BO_UNPACK_PC(var)
244# define DO_BO_PACK_PC(var)
66c611c5 245
f337b084 246#else /* PERL_PACK_CAN_BYTEORDER */
7212898e
NC
247
248# define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
249# define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
250
251# define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
252
253# define DO_BO_UNPACK(var, type) \
1109a392 254 STMT_START { \
66c611c5 255 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
256 case TYPE_IS_BIG_ENDIAN: var = my_betoh ## type (var); break; \
257 case TYPE_IS_LITTLE_ENDIAN: var = my_letoh ## type (var); break; \
258 default: break; \
259 } \
260 } STMT_END
261
7212898e 262# define DO_BO_PACK(var, type) \
1109a392 263 STMT_START { \
66c611c5 264 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
265 case TYPE_IS_BIG_ENDIAN: var = my_htobe ## type (var); break; \
266 case TYPE_IS_LITTLE_ENDIAN: var = my_htole ## type (var); break; \
267 default: break; \
268 } \
269 } STMT_END
270
07409e01 271# define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast) \
1109a392 272 STMT_START { \
66c611c5 273 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392 274 case TYPE_IS_BIG_ENDIAN: \
07409e01 275 var = (post_cast*) my_betoh ## type ((pre_cast) var); \
1109a392
MHM
276 break; \
277 case TYPE_IS_LITTLE_ENDIAN: \
07409e01 278 var = (post_cast *) my_letoh ## type ((pre_cast) var); \
1109a392
MHM
279 break; \
280 default: \
281 break; \
282 } \
283 } STMT_END
284
07409e01 285# define DO_BO_PACK_PTR(var, type, pre_cast, post_cast) \
1109a392 286 STMT_START { \
66c611c5 287 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392 288 case TYPE_IS_BIG_ENDIAN: \
07409e01 289 var = (post_cast *) my_htobe ## type ((pre_cast) var); \
1109a392
MHM
290 break; \
291 case TYPE_IS_LITTLE_ENDIAN: \
07409e01 292 var = (post_cast *) my_htole ## type ((pre_cast) var); \
1109a392
MHM
293 break; \
294 default: \
295 break; \
296 } \
297 } STMT_END
298
7212898e 299# define BO_CANT_DOIT(action, type) \
66c611c5
MHM
300 STMT_START { \
301 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
302 case TYPE_IS_BIG_ENDIAN: \
303 Perl_croak(aTHX_ "Can't %s big-endian %ss on this " \
304 "platform", #action, #type); \
305 break; \
306 case TYPE_IS_LITTLE_ENDIAN: \
307 Perl_croak(aTHX_ "Can't %s little-endian %ss on this " \
308 "platform", #action, #type); \
309 break; \
310 default: \
311 break; \
312 } \
313 } STMT_END
314
7212898e 315# if PTRSIZE == INTSIZE
07409e01
NC
316# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, i, int, void)
317# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, i, int, void)
318# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, i, int, char)
319# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, i, int, char)
7212898e 320# elif PTRSIZE == LONGSIZE
07409e01
NC
321# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, long, void)
322# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, long, void)
323# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, long, char)
324# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, long, char)
2b00a750
JD
325# elif PTRSIZE == IVSIZE
326# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, IV, void)
327# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, IV, void)
328# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, IV, char)
329# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, IV, char)
7212898e
NC
330# else
331# define DO_BO_UNPACK_P(var) BO_CANT_DOIT(unpack, pointer)
332# define DO_BO_PACK_P(var) BO_CANT_DOIT(pack, pointer)
45f723e1
NC
333# define DO_BO_UNPACK_PC(var) BO_CANT_DOIT(unpack, pointer)
334# define DO_BO_PACK_PC(var) BO_CANT_DOIT(pack, pointer)
7212898e 335# endif
1109a392 336
7212898e 337# if defined(my_htolen) && defined(my_letohn) && \
1109a392 338 defined(my_htoben) && defined(my_betohn)
7212898e 339# define DO_BO_UNPACK_N(var, type) \
1109a392 340 STMT_START { \
66c611c5 341 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
342 case TYPE_IS_BIG_ENDIAN: my_betohn(&var, sizeof(type)); break;\
343 case TYPE_IS_LITTLE_ENDIAN: my_letohn(&var, sizeof(type)); break;\
344 default: break; \
345 } \
346 } STMT_END
347
7212898e 348# define DO_BO_PACK_N(var, type) \
1109a392 349 STMT_START { \
66c611c5 350 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
351 case TYPE_IS_BIG_ENDIAN: my_htoben(&var, sizeof(type)); break;\
352 case TYPE_IS_LITTLE_ENDIAN: my_htolen(&var, sizeof(type)); break;\
353 default: break; \
354 } \
355 } STMT_END
7212898e
NC
356# else
357# define DO_BO_UNPACK_N(var, type) BO_CANT_DOIT(unpack, type)
358# define DO_BO_PACK_N(var, type) BO_CANT_DOIT(pack, type)
359# endif
360
f337b084 361#endif /* PERL_PACK_CAN_BYTEORDER */
62f95557 362
78d46eaa 363#define PACK_SIZE_CANNOT_CSUM 0x80
f337b084 364#define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
78d46eaa
NC
365#define PACK_SIZE_MASK 0x3F
366
78d46eaa
NC
367/* These tables are regenerated by genpacksizetables.pl (and then hand pasted
368 in). You're unlikely ever to need to regenerate them. */
a7a3cfaa
TH
369
370#if TYPE_IS_SHRIEKING != 0x100
371 ++++shriek offset should be 256
372#endif
373
374typedef U8 packprops_t;
78d46eaa
NC
375#if 'J'-'I' == 1
376/* ASCII */
f43a9a31 377STATIC const packprops_t packprops[512] = {
a7a3cfaa
TH
378 /* normal */
379 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
380 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
381 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
382 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
383 0, 0, 0,
384 /* C */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
80a13697 385#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
a7a3cfaa 386 /* D */ LONG_DOUBLESIZE,
80a13697 387#else
a7a3cfaa 388 0,
80a13697 389#endif
a7a3cfaa
TH
390 0,
391 /* F */ NVSIZE,
392 0, 0,
393 /* I */ sizeof(unsigned int),
394 /* J */ UVSIZE,
395 0,
396 /* L */ SIZE32,
397 0,
398 /* N */ SIZE32,
399 0, 0,
80a13697 400#if defined(HAS_QUAD)
a7a3cfaa 401 /* Q */ sizeof(Uquad_t),
80a13697 402#else
a7a3cfaa 403 0,
80a13697 404#endif
a7a3cfaa
TH
405 0,
406 /* S */ SIZE16,
407 0,
408 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
409 /* V */ SIZE32,
410 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
411 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
412 /* c */ sizeof(char),
413 /* d */ sizeof(double),
414 0,
415 /* f */ sizeof(float),
416 0, 0,
417 /* i */ sizeof(int),
418 /* j */ IVSIZE,
419 0,
420 /* l */ SIZE32,
421 0,
422 /* n */ SIZE16,
423 0,
424 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
80a13697 425#if defined(HAS_QUAD)
a7a3cfaa 426 /* q */ sizeof(Quad_t),
80a13697 427#else
a7a3cfaa 428 0,
80a13697 429#endif
a7a3cfaa
TH
430 0,
431 /* s */ SIZE16,
432 0, 0,
433 /* v */ SIZE16,
434 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
435 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
436 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
437 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
438 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
439 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
440 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
441 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
442 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
443 0, 0, 0, 0, 0, 0, 0, 0,
444 /* shrieking */
445 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
446 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
447 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
448 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
449 0, 0, 0, 0, 0, 0, 0, 0, 0,
450 /* I */ sizeof(unsigned int),
451 0, 0,
452 /* L */ sizeof(unsigned long),
453 0,
7212898e 454#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 455 /* N */ SIZE32,
7212898e 456#else
a7a3cfaa 457 0,
7212898e 458#endif
a7a3cfaa
TH
459 0, 0, 0, 0,
460 /* S */ sizeof(unsigned short),
461 0, 0,
7212898e 462#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 463 /* V */ SIZE32,
7212898e 464#else
a7a3cfaa 465 0,
7212898e 466#endif
a7a3cfaa
TH
467 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
468 0, 0,
469 /* i */ sizeof(int),
470 0, 0,
471 /* l */ sizeof(long),
472 0,
7212898e 473#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 474 /* n */ SIZE16,
7212898e 475#else
a7a3cfaa 476 0,
7212898e 477#endif
a7a3cfaa
TH
478 0, 0, 0, 0,
479 /* s */ sizeof(short),
480 0, 0,
7212898e 481#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 482 /* v */ SIZE16,
7212898e 483#else
a7a3cfaa 484 0,
7212898e 485#endif
a7a3cfaa
TH
486 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
487 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
488 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
489 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
490 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
491 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
492 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
493 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
494 0, 0, 0, 0, 0, 0, 0, 0, 0
78d46eaa
NC
495};
496#else
497/* EBCDIC (or bust) */
f43a9a31 498STATIC const packprops_t packprops[512] = {
a7a3cfaa
TH
499 /* normal */
500 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
501 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
502 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
503 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
504 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
505 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
506 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
507 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
508 0, 0, 0,
509 /* c */ sizeof(char),
510 /* d */ sizeof(double),
511 0,
512 /* f */ sizeof(float),
513 0, 0,
514 /* i */ sizeof(int),
515 0, 0, 0, 0, 0, 0, 0,
516 /* j */ IVSIZE,
517 0,
518 /* l */ SIZE32,
519 0,
520 /* n */ SIZE16,
521 0,
522 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
80a13697 523#if defined(HAS_QUAD)
a7a3cfaa 524 /* q */ sizeof(Quad_t),
80a13697 525#else
a7a3cfaa 526 0,
80a13697 527#endif
a7a3cfaa
TH
528 0, 0, 0, 0, 0, 0, 0, 0, 0,
529 /* s */ SIZE16,
530 0, 0,
531 /* v */ SIZE16,
532 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
533 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
534 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
535 /* C */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
80a13697 536#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
a7a3cfaa 537 /* D */ LONG_DOUBLESIZE,
80a13697 538#else
a7a3cfaa 539 0,
80a13697 540#endif
a7a3cfaa
TH
541 0,
542 /* F */ NVSIZE,
543 0, 0,
544 /* I */ sizeof(unsigned int),
545 0, 0, 0, 0, 0, 0, 0,
546 /* J */ UVSIZE,
547 0,
548 /* L */ SIZE32,
549 0,
550 /* N */ SIZE32,
551 0, 0,
80a13697 552#if defined(HAS_QUAD)
a7a3cfaa 553 /* Q */ sizeof(Uquad_t),
80a13697 554#else
a7a3cfaa 555 0,
80a13697 556#endif
a7a3cfaa
TH
557 0, 0, 0, 0, 0, 0, 0, 0, 0,
558 /* S */ SIZE16,
559 0,
560 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
561 /* V */ SIZE32,
562 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
563 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
564 0, 0, 0, 0, 0, 0, 0, 0, 0,
565 /* shrieking */
566 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
567 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
568 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
569 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
570 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
571 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
572 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
573 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
574 0, 0, 0, 0, 0, 0, 0, 0, 0,
575 /* i */ sizeof(int),
576 0, 0, 0, 0, 0, 0, 0, 0, 0,
577 /* l */ sizeof(long),
578 0,
7212898e 579#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 580 /* n */ SIZE16,
7212898e 581#else
a7a3cfaa 582 0,
7212898e 583#endif
a7a3cfaa
TH
584 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
585 /* s */ sizeof(short),
586 0, 0,
7212898e 587#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 588 /* v */ SIZE16,
7212898e 589#else
a7a3cfaa 590 0,
7212898e 591#endif
a7a3cfaa
TH
592 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
593 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
594 0, 0, 0,
595 /* I */ sizeof(unsigned int),
596 0, 0, 0, 0, 0, 0, 0, 0, 0,
597 /* L */ sizeof(unsigned long),
598 0,
7212898e 599#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 600 /* N */ SIZE32,
7212898e 601#else
a7a3cfaa 602 0,
7212898e 603#endif
a7a3cfaa
TH
604 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
605 /* S */ sizeof(unsigned short),
606 0, 0,
7212898e 607#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 608 /* V */ SIZE32,
7212898e 609#else
a7a3cfaa 610 0,
7212898e 611#endif
a7a3cfaa
TH
612 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
613 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
78d46eaa
NC
614};
615#endif
616
08ca2aa3 617STATIC U8
f7fe979e 618uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
08ca2aa3 619{
08ca2aa3 620 STRLEN retlen;
0bcc34c2 621 UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
f337b084 622 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
08ca2aa3
TH
623 /* We try to process malformed UTF-8 as much as possible (preferrably with
624 warnings), but these two mean we make no progress in the string and
625 might enter an infinite loop */
626 if (retlen == (STRLEN) -1 || retlen == 0)
f337b084
TH
627 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
628 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3 629 if (val >= 0x100) {
f337b084 630 if (ckWARN(WARN_UNPACK))
08ca2aa3
TH
631 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
632 "Character in '%c' format wrapped in unpack",
f337b084 633 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3
TH
634 val &= 0xff;
635 }
636 *s += retlen;
fe2774ed 637 return (U8)val;
08ca2aa3
TH
638}
639
f337b084
TH
640#define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
641 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
08ca2aa3
TH
642 *(U8 *)(s)++)
643
644STATIC bool
f7fe979e 645uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
08ca2aa3
TH
646{
647 UV val;
648 STRLEN retlen;
f7fe979e 649 const char *from = *s;
08ca2aa3 650 int bad = 0;
f7fe979e 651 const U32 flags = ckWARN(WARN_UTF8) ?
08ca2aa3
TH
652 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
653 for (;buf_len > 0; buf_len--) {
654 if (from >= end) return FALSE;
f337b084 655 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
08ca2aa3
TH
656 if (retlen == (STRLEN) -1 || retlen == 0) {
657 from += UTF8SKIP(from);
658 bad |= 1;
659 } else from += retlen;
660 if (val >= 0x100) {
661 bad |= 2;
662 val &= 0xff;
663 }
fe2774ed 664 *(U8 *)buf++ = (U8)val;
08ca2aa3
TH
665 }
666 /* We have enough characters for the buffer. Did we have problems ? */
667 if (bad) {
668 if (bad & 1) {
669 /* Rewalk the string fragment while warning */
f7fe979e 670 const char *ptr;
9e27e96a 671 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
08ca2aa3
TH
672 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
673 if (ptr >= end) break;
f337b084 674 utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
08ca2aa3
TH
675 }
676 if (from > end) from = end;
677 }
678 if ((bad & 2) && ckWARN(WARN_UNPACK))
fc241834 679 Perl_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
f337b084
TH
680 WARN_PACK : WARN_UNPACK),
681 "Character(s) in '%c' format wrapped in %s",
fc241834 682 (int) TYPE_NO_MODIFIERS(datumtype),
f337b084 683 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
08ca2aa3
TH
684 }
685 *s = from;
686 return TRUE;
687}
688
689STATIC bool
f7fe979e 690next_uni_uu(pTHX_ const char **s, const char *end, I32 *out)
08ca2aa3 691{
97aff369 692 dVAR;
08ca2aa3 693 STRLEN retlen;
0bcc34c2 694 const UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
08ca2aa3
TH
695 if (val >= 0x100 || !ISUUCHAR(val) ||
696 retlen == (STRLEN) -1 || retlen == 0) {
697 *out = 0;
698 return FALSE;
699 }
700 *out = PL_uudmap[val] & 077;
f337b084 701 *s += retlen;
08ca2aa3
TH
702 return TRUE;
703}
78d46eaa 704
f337b084 705STATIC void
f7fe979e 706bytes_to_uni(pTHX_ const U8 *start, STRLEN len, char **dest) {
f337b084 707 U8 buffer[UTF8_MAXLEN];
0bcc34c2 708 const U8 * const end = start + len;
f337b084
TH
709 char *d = *dest;
710 while (start < end) {
f7fe979e 711 const int length =
f337b084
TH
712 uvuni_to_utf8_flags(buffer, NATIVE_TO_UNI(*start), 0) - buffer;
713 switch(length) {
714 case 1:
715 *d++ = buffer[0];
716 break;
717 case 2:
718 *d++ = buffer[0];
719 *d++ = buffer[1];
720 break;
721 default:
722 Perl_croak(aTHX_ "Perl bug: value %d UTF-8 expands to %d bytes",
723 *start, length);
724 }
725 start++;
726 }
727 *dest = d;
728}
729
230e1fce
NC
730#define PUSH_BYTES(utf8, cur, buf, len) \
731STMT_START { \
732 if (utf8) bytes_to_uni(aTHX_ (U8 *) buf, len, &(cur)); \
733 else { \
734 Copy(buf, cur, len, char); \
735 (cur) += (len); \
736 } \
f337b084
TH
737} STMT_END
738
739#define GROWING(utf8, cat, start, cur, in_len) \
740STMT_START { \
741 STRLEN glen = (in_len); \
3473cf63 742 if (utf8) glen *= UTF8_EXPAND; \
f337b084 743 if ((cur) + glen >= (start) + SvLEN(cat)) { \
0bd48802 744 (start) = sv_exp_grow(cat, glen); \
f337b084
TH
745 (cur) = (start) + SvCUR(cat); \
746 } \
747} STMT_END
748
749#define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
750STMT_START { \
f7fe979e 751 const STRLEN glen = (in_len); \
f337b084 752 STRLEN gl = glen; \
3473cf63 753 if (utf8) gl *= UTF8_EXPAND; \
f337b084
TH
754 if ((cur) + gl >= (start) + SvLEN(cat)) { \
755 *cur = '\0'; \
b162af07 756 SvCUR_set((cat), (cur) - (start)); \
0bd48802 757 (start) = sv_exp_grow(cat, gl); \
f337b084
TH
758 (cur) = (start) + SvCUR(cat); \
759 } \
760 PUSH_BYTES(utf8, cur, buf, glen); \
761} STMT_END
762
763#define PUSH_BYTE(utf8, s, byte) \
764STMT_START { \
765 if (utf8) { \
f7fe979e 766 const U8 au8 = (byte); \
f337b084
TH
767 bytes_to_uni(aTHX_ &au8, 1, &(s)); \
768 } else *(U8 *)(s)++ = (byte); \
769} STMT_END
770
771/* Only to be used inside a loop (see the break) */
772#define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
773STMT_START { \
774 STRLEN retlen; \
775 if (str >= end) break; \
776 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
777 if (retlen == (STRLEN) -1 || retlen == 0) { \
778 *cur = '\0'; \
779 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
780 } \
781 str += retlen; \
782} STMT_END
783
f7fe979e
AL
784static const char *_action( const tempsym_t* symptr )
785{
786 return ( symptr->flags & FLAG_PACK ) ? "pack" : "unpack";
787}
788
206947d2 789/* Returns the sizeof() struct described by pat */
028d1f6d 790STATIC I32
f337b084 791S_measure_struct(pTHX_ tempsym_t* symptr)
206947d2 792{
f337b084 793 I32 total = 0;
206947d2 794
49704364 795 while (next_symbol(symptr)) {
f337b084 796 I32 len;
f7fe979e 797 int size;
f337b084
TH
798
799 switch (symptr->howlen) {
fc241834 800 case e_star:
49704364 801 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
f7fe979e 802 _action( symptr ) );
49704364 803 break;
f337b084
TH
804 default:
805 /* e_no_len and e_number */
806 len = symptr->length;
807 break;
49704364
LW
808 }
809
a7a3cfaa 810 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
80a13697 811 if (!size) {
f7fe979e 812 int star;
80a13697
NC
813 /* endianness doesn't influence the size of a type */
814 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
815 default:
816 Perl_croak(aTHX_ "Invalid type '%c' in %s",
817 (int)TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 818 _action( symptr ) );
28be1210
TH
819#ifdef PERL_PACK_CAN_SHRIEKSIGN
820 case '.' | TYPE_IS_SHRIEKING:
821 case '@' | TYPE_IS_SHRIEKING:
822#endif
80a13697 823 case '@':
28be1210 824 case '.':
80a13697
NC
825 case '/':
826 case 'U': /* XXXX Is it correct? */
827 case 'w':
828 case 'u':
829 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
28be1210 830 (int) TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 831 _action( symptr ) );
80a13697
NC
832 case '%':
833 size = 0;
834 break;
835 case '(':
fc241834
RGS
836 {
837 tempsym_t savsym = *symptr;
838 symptr->patptr = savsym.grpbeg;
839 symptr->patend = savsym.grpend;
840 /* XXXX Theoretically, we need to measure many times at
841 different positions, since the subexpression may contain
842 alignment commands, but be not of aligned length.
843 Need to detect this and croak(). */
844 size = measure_struct(symptr);
845 *symptr = savsym;
846 break;
847 }
80a13697
NC
848 case 'X' | TYPE_IS_SHRIEKING:
849 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
850 */
851 if (!len) /* Avoid division by 0 */
852 len = 1;
853 len = total % len; /* Assumed: the start is aligned. */
854 /* FALL THROUGH */
855 case 'X':
856 size = -1;
857 if (total < len)
f7fe979e 858 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
80a13697
NC
859 break;
860 case 'x' | TYPE_IS_SHRIEKING:
861 if (!len) /* Avoid division by 0 */
862 len = 1;
863 star = total % len; /* Assumed: the start is aligned. */
864 if (star) /* Other portable ways? */
865 len = len - star;
866 else
867 len = 0;
868 /* FALL THROUGH */
869 case 'x':
870 case 'A':
871 case 'Z':
872 case 'a':
80a13697
NC
873 size = 1;
874 break;
875 case 'B':
876 case 'b':
877 len = (len + 7)/8;
878 size = 1;
879 break;
880 case 'H':
881 case 'h':
882 len = (len + 1)/2;
883 size = 1;
884 break;
78d46eaa 885
80a13697
NC
886 case 'P':
887 len = 1;
888 size = sizeof(char*);
78d46eaa
NC
889 break;
890 }
206947d2
IZ
891 }
892 total += len * size;
893 }
894 return total;
895}
896
49704364
LW
897
898/* locate matching closing parenthesis or bracket
899 * returns char pointer to char after match, or NULL
900 */
f7fe979e
AL
901STATIC const char *
902S_group_end(pTHX_ register const char *patptr, register const char *patend, char ender)
18529408 903{
49704364 904 while (patptr < patend) {
f7fe979e 905 const char c = *patptr++;
49704364
LW
906
907 if (isSPACE(c))
908 continue;
909 else if (c == ender)
910 return patptr-1;
911 else if (c == '#') {
912 while (patptr < patend && *patptr != '\n')
913 patptr++;
914 continue;
915 } else if (c == '(')
916 patptr = group_end(patptr, patend, ')') + 1;
917 else if (c == '[')
918 patptr = group_end(patptr, patend, ']') + 1;
18529408 919 }
49704364
LW
920 Perl_croak(aTHX_ "No group ending character '%c' found in template",
921 ender);
922 return 0;
18529408
IZ
923}
924
49704364
LW
925
926/* Convert unsigned decimal number to binary.
927 * Expects a pointer to the first digit and address of length variable
928 * Advances char pointer to 1st non-digit char and returns number
fc241834 929 */
f7fe979e
AL
930STATIC const char *
931S_get_num(pTHX_ register const char *patptr, I32 *lenptr )
49704364
LW
932{
933 I32 len = *patptr++ - '0';
934 while (isDIGIT(*patptr)) {
935 if (len >= 0x7FFFFFFF/10)
936 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
937 len = (len * 10) + (*patptr++ - '0');
938 }
939 *lenptr = len;
940 return patptr;
941}
942
943/* The marvellous template parsing routine: Using state stored in *symptr,
944 * locates next template code and count
945 */
946STATIC bool
f337b084 947S_next_symbol(pTHX_ tempsym_t* symptr )
18529408 948{
f7fe979e 949 const char* patptr = symptr->patptr;
0bcc34c2 950 const char* const patend = symptr->patend;
49704364
LW
951
952 symptr->flags &= ~FLAG_SLASH;
953
954 while (patptr < patend) {
955 if (isSPACE(*patptr))
956 patptr++;
957 else if (*patptr == '#') {
958 patptr++;
959 while (patptr < patend && *patptr != '\n')
960 patptr++;
961 if (patptr < patend)
962 patptr++;
963 } else {
fc241834 964 /* We should have found a template code */
49704364 965 I32 code = *patptr++ & 0xFF;
66c611c5 966 U32 inherited_modifiers = 0;
49704364
LW
967
968 if (code == ','){ /* grandfather in commas but with a warning */
969 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
970 symptr->flags |= FLAG_COMMA;
971 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
f7fe979e 972 "Invalid type ',' in %s", _action( symptr ) );
49704364
LW
973 }
974 continue;
975 }
fc241834 976
49704364 977 /* for '(', skip to ')' */
fc241834 978 if (code == '(') {
49704364
LW
979 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
980 Perl_croak(aTHX_ "()-group starts with a count in %s",
f7fe979e 981 _action( symptr ) );
49704364
LW
982 symptr->grpbeg = patptr;
983 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
984 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
985 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
f7fe979e 986 _action( symptr ) );
49704364
LW
987 }
988
66c611c5
MHM
989 /* look for group modifiers to inherit */
990 if (TYPE_ENDIANNESS(symptr->flags)) {
991 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
992 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
993 }
994
1109a392
MHM
995 /* look for modifiers */
996 while (patptr < patend) {
b7787f18
AL
997 const char *allowed;
998 I32 modifier;
1109a392
MHM
999 switch (*patptr) {
1000 case '!':
1001 modifier = TYPE_IS_SHRIEKING;
7212898e 1002 allowed = SHRIEKING_ALLOWED_TYPES;
1109a392 1003 break;
7212898e 1004#ifdef PERL_PACK_CAN_BYTEORDER
1109a392
MHM
1005 case '>':
1006 modifier = TYPE_IS_BIG_ENDIAN;
66c611c5 1007 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392
MHM
1008 break;
1009 case '<':
1010 modifier = TYPE_IS_LITTLE_ENDIAN;
66c611c5 1011 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392 1012 break;
f337b084 1013#endif /* PERL_PACK_CAN_BYTEORDER */
1109a392 1014 default:
b7787f18
AL
1015 allowed = "";
1016 modifier = 0;
1109a392
MHM
1017 break;
1018 }
66c611c5 1019
1109a392
MHM
1020 if (modifier == 0)
1021 break;
66c611c5 1022
1109a392
MHM
1023 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
1024 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
f7fe979e 1025 allowed, _action( symptr ) );
66c611c5
MHM
1026
1027 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
1109a392 1028 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
f7fe979e 1029 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
66c611c5
MHM
1030 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
1031 TYPE_ENDIANNESS_MASK)
1032 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
f7fe979e 1033 *patptr, _action( symptr ) );
66c611c5 1034
041457d9 1035 if ((code & modifier) && ckWARN(WARN_UNPACK)) {
1109a392
MHM
1036 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
1037 "Duplicate modifier '%c' after '%c' in %s",
1038 *patptr, (int) TYPE_NO_MODIFIERS(code),
f7fe979e 1039 _action( symptr ) );
1109a392 1040 }
66c611c5 1041
1109a392
MHM
1042 code |= modifier;
1043 patptr++;
49704364
LW
1044 }
1045
66c611c5
MHM
1046 /* inherit modifiers */
1047 code |= inherited_modifiers;
1048
fc241834 1049 /* look for count and/or / */
49704364
LW
1050 if (patptr < patend) {
1051 if (isDIGIT(*patptr)) {
1052 patptr = get_num( patptr, &symptr->length );
1053 symptr->howlen = e_number;
1054
1055 } else if (*patptr == '*') {
1056 patptr++;
1057 symptr->howlen = e_star;
1058
1059 } else if (*patptr == '[') {
f7fe979e 1060 const char* lenptr = ++patptr;
49704364
LW
1061 symptr->howlen = e_number;
1062 patptr = group_end( patptr, patend, ']' ) + 1;
1063 /* what kind of [] is it? */
1064 if (isDIGIT(*lenptr)) {
1065 lenptr = get_num( lenptr, &symptr->length );
1066 if( *lenptr != ']' )
1067 Perl_croak(aTHX_ "Malformed integer in [] in %s",
f7fe979e 1068 _action( symptr ) );
49704364
LW
1069 } else {
1070 tempsym_t savsym = *symptr;
1071 symptr->patend = patptr-1;
1072 symptr->patptr = lenptr;
1073 savsym.length = measure_struct(symptr);
1074 *symptr = savsym;
1075 }
1076 } else {
1077 symptr->howlen = e_no_len;
1078 symptr->length = 1;
1079 }
1080
1081 /* try to find / */
1082 while (patptr < patend) {
1083 if (isSPACE(*patptr))
1084 patptr++;
1085 else if (*patptr == '#') {
1086 patptr++;
1087 while (patptr < patend && *patptr != '\n')
1088 patptr++;
1089 if (patptr < patend)
1090 patptr++;
1091 } else {
66c611c5 1092 if (*patptr == '/') {
49704364
LW
1093 symptr->flags |= FLAG_SLASH;
1094 patptr++;
66c611c5
MHM
1095 if (patptr < patend &&
1096 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
49704364 1097 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
f7fe979e 1098 _action( symptr ) );
49704364
LW
1099 }
1100 break;
1101 }
18529408 1102 }
49704364
LW
1103 } else {
1104 /* at end - no count, no / */
1105 symptr->howlen = e_no_len;
1106 symptr->length = 1;
1107 }
1108
1109 symptr->code = code;
fc241834 1110 symptr->patptr = patptr;
49704364 1111 return TRUE;
18529408 1112 }
49704364 1113 }
fc241834 1114 symptr->patptr = patptr;
49704364 1115 return FALSE;
18529408
IZ
1116}
1117
18529408 1118/*
fc241834 1119 There is no way to cleanly handle the case where we should process the
08ca2aa3 1120 string per byte in its upgraded form while it's really in downgraded form
fc241834
RGS
1121 (e.g. estimates like strend-s as an upper bound for the number of
1122 characters left wouldn't work). So if we foresee the need of this
1123 (pattern starts with U or contains U0), we want to work on the encoded
1124 version of the string. Users are advised to upgrade their pack string
08ca2aa3
TH
1125 themselves if they need to do a lot of unpacks like this on it
1126*/
fc241834 1127STATIC bool
08ca2aa3
TH
1128need_utf8(const char *pat, const char *patend)
1129{
1130 bool first = TRUE;
1131 while (pat < patend) {
1132 if (pat[0] == '#') {
1133 pat++;
f7fe979e 1134 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
1135 if (!pat) return FALSE;
1136 } else if (pat[0] == 'U') {
1137 if (first || pat[1] == '0') return TRUE;
1138 } else first = FALSE;
1139 pat++;
1140 }
1141 return FALSE;
1142}
1143
1144STATIC char
1145first_symbol(const char *pat, const char *patend) {
1146 while (pat < patend) {
1147 if (pat[0] != '#') return pat[0];
1148 pat++;
f7fe979e 1149 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
1150 if (!pat) return 0;
1151 pat++;
1152 }
1153 return 0;
1154}
1155
1156/*
7accc089
JH
1157=for apidoc unpackstring
1158
608d3aed
LW
1159The engine implementing unpack() Perl function. C<unpackstring> puts the
1160extracted list items on the stack and returns the number of elements.
1161Issue C<PUTBACK> before and C<SPAGAIN> after the call to this function.
7accc089
JH
1162
1163=cut */
1164
1165I32
f7fe979e 1166Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
7accc089 1167{
f7fe979e 1168 tempsym_t sym;
08ca2aa3 1169
f337b084 1170 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
08ca2aa3
TH
1171 else if (need_utf8(pat, patend)) {
1172 /* We probably should try to avoid this in case a scalar context call
1173 wouldn't get to the "U0" */
1174 STRLEN len = strend - s;
230e1fce 1175 s = (char *) bytes_to_utf8((U8 *) s, &len);
08ca2aa3
TH
1176 SAVEFREEPV(s);
1177 strend = s + len;
f337b084 1178 flags |= FLAG_DO_UTF8;
08ca2aa3
TH
1179 }
1180
f337b084
TH
1181 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
1182 flags |= FLAG_PARSE_UTF8;
08ca2aa3 1183
f7fe979e 1184 TEMPSYM_INIT(&sym, pat, patend, flags);
7accc089
JH
1185
1186 return unpack_rec(&sym, s, s, strend, NULL );
1187}
1188
49704364
LW
1189STATIC
1190I32
f7fe979e 1191S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
49704364 1192{
27da23d5 1193 dVAR; dSP;
18529408 1194 SV *sv;
f7fe979e 1195 const I32 start_sp_offset = SP - PL_stack_base;
49704364 1196 howlen_t howlen;
a6ec74c1 1197
a6ec74c1 1198 I32 checksum = 0;
92d41999 1199 UV cuv = 0;
a6ec74c1 1200 NV cdouble = 0.0;
f337b084 1201 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
49704364 1202 bool beyond = FALSE;
21c16052 1203 bool explicit_length;
9e27e96a 1204 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
f337b084 1205 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
28be1210 1206 symptr->strbeg = s - strbeg;
49704364 1207
49704364 1208 while (next_symbol(symptr)) {
a7a3cfaa 1209 packprops_t props;
9e27e96a 1210 I32 len;
f337b084 1211 I32 datumtype = symptr->code;
206947d2 1212 /* do first one only unless in list context
08ca2aa3 1213 / is implemented by unpacking the count, then popping it from the
206947d2 1214 stack, so must check that we're not in the middle of a / */
49704364 1215 if ( unpack_only_one
206947d2 1216 && (SP - PL_stack_base == start_sp_offset + 1)
49704364 1217 && (datumtype != '/') ) /* XXX can this be omitted */
206947d2 1218 break;
49704364 1219
f337b084 1220 switch (howlen = symptr->howlen) {
fc241834
RGS
1221 case e_star:
1222 len = strend - strbeg; /* long enough */
49704364 1223 break;
f337b084
TH
1224 default:
1225 /* e_no_len and e_number */
1226 len = symptr->length;
1227 break;
49704364 1228 }
18529408 1229
21c16052 1230 explicit_length = TRUE;
a6ec74c1 1231 redo_switch:
49704364 1232 beyond = s >= strend;
a7a3cfaa
TH
1233
1234 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
1235 if (props) {
1236 /* props nonzero means we can process this letter. */
9e27e96a
AL
1237 const long size = props & PACK_SIZE_MASK;
1238 const long howmany = (strend - s) / size;
a7a3cfaa
TH
1239 if (len > howmany)
1240 len = howmany;
1241
1242 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
1243 if (len && unpack_only_one) len = 1;
1244 EXTEND(SP, len);
1245 EXTEND_MORTAL(len);
78d46eaa
NC
1246 }
1247 }
a7a3cfaa 1248
1109a392 1249 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 1250 default:
1109a392 1251 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
49704364 1252
a6ec74c1 1253 case '%':
49704364 1254 if (howlen == e_no_len)
18529408 1255 len = 16; /* len is not specified */
a6ec74c1 1256 checksum = len;
92d41999 1257 cuv = 0;
a6ec74c1 1258 cdouble = 0;
18529408 1259 continue;
a6ec74c1 1260 break;
18529408
IZ
1261 case '(':
1262 {
49704364 1263 tempsym_t savsym = *symptr;
9e27e96a 1264 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
66c611c5 1265 symptr->flags |= group_modifiers;
49704364 1266 symptr->patend = savsym.grpend;
28be1210 1267 symptr->previous = &savsym;
49704364 1268 symptr->level++;
18529408
IZ
1269 PUTBACK;
1270 while (len--) {
49704364 1271 symptr->patptr = savsym.grpbeg;
f337b084
TH
1272 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
1273 else symptr->flags &= ~FLAG_PARSE_UTF8;
08ca2aa3
TH
1274 unpack_rec(symptr, s, strbeg, strend, &s);
1275 if (s == strend && savsym.howlen == e_star)
49704364 1276 break; /* No way to continue */
18529408
IZ
1277 }
1278 SPAGAIN;
28be1210 1279 savsym.flags = symptr->flags & ~group_modifiers;
49704364 1280 *symptr = savsym;
18529408
IZ
1281 break;
1282 }
28be1210
TH
1283#ifdef PERL_PACK_CAN_SHRIEKSIGN
1284 case '.' | TYPE_IS_SHRIEKING:
1285#endif
1286 case '.': {
9e27e96a 1287 const char *from;
28be1210
TH
1288 SV *sv;
1289#ifdef PERL_PACK_CAN_SHRIEKSIGN
9e27e96a 1290 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
28be1210 1291#else /* PERL_PACK_CAN_SHRIEKSIGN */
9e27e96a 1292 const bool u8 = utf8;
28be1210
TH
1293#endif
1294 if (howlen == e_star) from = strbeg;
1295 else if (len <= 0) from = s;
1296 else {
1297 tempsym_t *group = symptr;
1298
1299 while (--len && group) group = group->previous;
1300 from = group ? strbeg + group->strbeg : strbeg;
1301 }
1302 sv = from <= s ?
00646304
CB
1303 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
1304 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
28be1210
TH
1305 XPUSHs(sv_2mortal(sv));
1306 break;
1307 }
1308#ifdef PERL_PACK_CAN_SHRIEKSIGN
1309 case '@' | TYPE_IS_SHRIEKING:
1310#endif
a6ec74c1 1311 case '@':
28be1210
TH
1312 s = strbeg + symptr->strbeg;
1313#ifdef PERL_PACK_CAN_SHRIEKSIGN
1314 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
1315#else /* PERL_PACK_CAN_SHRIEKSIGN */
1316 if (utf8)
1317#endif
1318 {
08ca2aa3
TH
1319 while (len > 0) {
1320 if (s >= strend)
1321 Perl_croak(aTHX_ "'@' outside of string in unpack");
1322 s += UTF8SKIP(s);
1323 len--;
1324 }
1325 if (s > strend)
1326 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
1327 } else {
28be1210 1328 if (strend-s < len)
fc241834 1329 Perl_croak(aTHX_ "'@' outside of string in unpack");
28be1210 1330 s += len;
08ca2aa3 1331 }
a6ec74c1 1332 break;
62f95557
IZ
1333 case 'X' | TYPE_IS_SHRIEKING:
1334 if (!len) /* Avoid division by 0 */
1335 len = 1;
08ca2aa3 1336 if (utf8) {
f7fe979e 1337 const char *hop, *last;
f337b084
TH
1338 I32 l = len;
1339 hop = last = strbeg;
1340 while (hop < s) {
1341 hop += UTF8SKIP(hop);
1342 if (--l == 0) {
08ca2aa3 1343 last = hop;
f337b084
TH
1344 l = len;
1345 }
fc241834 1346 }
f337b084
TH
1347 if (last > s)
1348 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
08ca2aa3
TH
1349 s = last;
1350 break;
f337b084
TH
1351 }
1352 len = (s - strbeg) % len;
62f95557 1353 /* FALL THROUGH */
a6ec74c1 1354 case 'X':
08ca2aa3
TH
1355 if (utf8) {
1356 while (len > 0) {
1357 if (s <= strbeg)
1358 Perl_croak(aTHX_ "'X' outside of string in unpack");
f337b084 1359 while (--s, UTF8_IS_CONTINUATION(*s)) {
08ca2aa3
TH
1360 if (s <= strbeg)
1361 Perl_croak(aTHX_ "'X' outside of string in unpack");
1362 }
1363 len--;
1364 }
1365 } else {
fc241834
RGS
1366 if (len > s - strbeg)
1367 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1368 s -= len;
08ca2aa3 1369 }
a6ec74c1 1370 break;
9e27e96a
AL
1371 case 'x' | TYPE_IS_SHRIEKING: {
1372 I32 ai32;
62f95557
IZ
1373 if (!len) /* Avoid division by 0 */
1374 len = 1;
230e1fce
NC
1375 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1376 else ai32 = (s - strbeg) % len;
08ca2aa3
TH
1377 if (ai32 == 0) break;
1378 len -= ai32;
9e27e96a 1379 }
62f95557 1380 /* FALL THROUGH */
a6ec74c1 1381 case 'x':
08ca2aa3
TH
1382 if (utf8) {
1383 while (len>0) {
1384 if (s >= strend)
1385 Perl_croak(aTHX_ "'x' outside of string in unpack");
1386 s += UTF8SKIP(s);
1387 len--;
1388 }
1389 } else {
fc241834
RGS
1390 if (len > strend - s)
1391 Perl_croak(aTHX_ "'x' outside of string in unpack");
1392 s += len;
f337b084 1393 }
a6ec74c1
JH
1394 break;
1395 case '/':
49704364
LW
1396 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1397 break;
a6ec74c1
JH
1398 case 'A':
1399 case 'Z':
1400 case 'a':
08ca2aa3
TH
1401 if (checksum) {
1402 /* Preliminary length estimate is assumed done in 'W' */
1403 if (len > strend - s) len = strend - s;
1404 goto W_checksum;
1405 }
1406 if (utf8) {
1407 I32 l;
f7fe979e 1408 const char *hop;
08ca2aa3
TH
1409 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1410 if (hop >= strend) {
1411 if (hop > strend)
1412 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1413 break;
fc241834 1414 }
a6ec74c1 1415 }
08ca2aa3
TH
1416 if (hop > strend)
1417 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1418 len = hop - s;
1419 } else if (len > strend - s)
1420 len = strend - s;
1421
1422 if (datumtype == 'Z') {
1423 /* 'Z' strips stuff after first null */
f7fe979e 1424 const char *ptr, *end;
f337b084
TH
1425 end = s + len;
1426 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
08ca2aa3
TH
1427 sv = newSVpvn(s, ptr-s);
1428 if (howlen == e_star) /* exact for 'Z*' */
1429 len = ptr-s + (ptr != strend ? 1 : 0);
1430 } else if (datumtype == 'A') {
1431 /* 'A' strips both nulls and spaces */
f7fe979e 1432 const char *ptr;
18bdf90a
TH
1433 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1434 for (ptr = s+len-1; ptr >= s; ptr--)
1435 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
230e1fce 1436 !is_utf8_space((U8 *) ptr)) break;
18bdf90a
TH
1437 if (ptr >= s) ptr += UTF8SKIP(ptr);
1438 else ptr++;
28be1210 1439 if (ptr > s+len)
18bdf90a
TH
1440 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1441 } else {
1442 for (ptr = s+len-1; ptr >= s; ptr--)
1443 if (*ptr != 0 && !isSPACE(*ptr)) break;
1444 ptr++;
1445 }
08ca2aa3
TH
1446 sv = newSVpvn(s, ptr-s);
1447 } else sv = newSVpvn(s, len);
1448
1449 if (utf8) {
1450 SvUTF8_on(sv);
1451 /* Undo any upgrade done due to need_utf8() */
f337b084 1452 if (!(symptr->flags & FLAG_WAS_UTF8))
08ca2aa3 1453 sv_utf8_downgrade(sv, 0);
a6ec74c1
JH
1454 }
1455 XPUSHs(sv_2mortal(sv));
08ca2aa3 1456 s += len;
a6ec74c1
JH
1457 break;
1458 case 'B':
08ca2aa3
TH
1459 case 'b': {
1460 char *str;
49704364 1461 if (howlen == e_star || len > (strend - s) * 8)
a6ec74c1
JH
1462 len = (strend - s) * 8;
1463 if (checksum) {
1464 if (!PL_bitcount) {
08ca2aa3 1465 int bits;
a02a5408 1466 Newxz(PL_bitcount, 256, char);
a6ec74c1
JH
1467 for (bits = 1; bits < 256; bits++) {
1468 if (bits & 1) PL_bitcount[bits]++;
1469 if (bits & 2) PL_bitcount[bits]++;
1470 if (bits & 4) PL_bitcount[bits]++;
1471 if (bits & 8) PL_bitcount[bits]++;
1472 if (bits & 16) PL_bitcount[bits]++;
1473 if (bits & 32) PL_bitcount[bits]++;
1474 if (bits & 64) PL_bitcount[bits]++;
1475 if (bits & 128) PL_bitcount[bits]++;
1476 }
1477 }
f337b084 1478 if (utf8)
08ca2aa3 1479 while (len >= 8 && s < strend) {
f337b084 1480 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
08ca2aa3
TH
1481 len -= 8;
1482 }
f337b084 1483 else
fc241834 1484 while (len >= 8) {
08ca2aa3 1485 cuv += PL_bitcount[*(U8 *)s++];
fc241834
RGS
1486 len -= 8;
1487 }
08ca2aa3
TH
1488 if (len && s < strend) {
1489 U8 bits;
f337b084
TH
1490 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1491 if (datumtype == 'b')
a6ec74c1 1492 while (len-- > 0) {
92d41999 1493 if (bits & 1) cuv++;
a6ec74c1
JH
1494 bits >>= 1;
1495 }
f337b084 1496 else
a6ec74c1 1497 while (len-- > 0) {
08ca2aa3 1498 if (bits & 0x80) cuv++;
a6ec74c1
JH
1499 bits <<= 1;
1500 }
fc241834 1501 }
a6ec74c1
JH
1502 break;
1503 }
08ca2aa3 1504
561b68a9 1505 sv = sv_2mortal(newSV(len ? len : 1));
a6ec74c1
JH
1506 SvPOK_on(sv);
1507 str = SvPVX(sv);
1508 if (datumtype == 'b') {
f337b084 1509 U8 bits = 0;
f7fe979e 1510 const I32 ai32 = len;
08ca2aa3
TH
1511 for (len = 0; len < ai32; len++) {
1512 if (len & 7) bits >>= 1;
1513 else if (utf8) {
1514 if (s >= strend) break;
f337b084 1515 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1516 } else bits = *(U8 *) s++;
1517 *str++ = bits & 1 ? '1' : '0';
a6ec74c1 1518 }
08ca2aa3 1519 } else {
f337b084 1520 U8 bits = 0;
f7fe979e 1521 const I32 ai32 = len;
08ca2aa3
TH
1522 for (len = 0; len < ai32; len++) {
1523 if (len & 7) bits <<= 1;
1524 else if (utf8) {
1525 if (s >= strend) break;
f337b084 1526 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1527 } else bits = *(U8 *) s++;
1528 *str++ = bits & 0x80 ? '1' : '0';
a6ec74c1
JH
1529 }
1530 }
1531 *str = '\0';
aa07b2f6 1532 SvCUR_set(sv, str - SvPVX_const(sv));
08ca2aa3 1533 XPUSHs(sv);
a6ec74c1 1534 break;
08ca2aa3 1535 }
a6ec74c1 1536 case 'H':
08ca2aa3
TH
1537 case 'h': {
1538 char *str;
fc241834 1539 /* Preliminary length estimate, acceptable for utf8 too */
49704364 1540 if (howlen == e_star || len > (strend - s) * 2)
a6ec74c1 1541 len = (strend - s) * 2;
561b68a9 1542 sv = sv_2mortal(newSV(len ? len : 1));
a6ec74c1
JH
1543 SvPOK_on(sv);
1544 str = SvPVX(sv);
1545 if (datumtype == 'h') {
f337b084 1546 U8 bits = 0;
9e27e96a 1547 I32 ai32 = len;
fc241834
RGS
1548 for (len = 0; len < ai32; len++) {
1549 if (len & 1) bits >>= 4;
1550 else if (utf8) {
1551 if (s >= strend) break;
f337b084 1552 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
fc241834 1553 } else bits = * (U8 *) s++;
a6ec74c1
JH
1554 *str++ = PL_hexdigit[bits & 15];
1555 }
08ca2aa3 1556 } else {
f337b084 1557 U8 bits = 0;
f7fe979e 1558 const I32 ai32 = len;
08ca2aa3
TH
1559 for (len = 0; len < ai32; len++) {
1560 if (len & 1) bits <<= 4;
1561 else if (utf8) {
1562 if (s >= strend) break;
f337b084 1563 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3 1564 } else bits = *(U8 *) s++;
a6ec74c1
JH
1565 *str++ = PL_hexdigit[(bits >> 4) & 15];
1566 }
1567 }
1568 *str = '\0';
aa07b2f6 1569 SvCUR_set(sv, str - SvPVX_const(sv));
08ca2aa3 1570 XPUSHs(sv);
a6ec74c1 1571 break;
08ca2aa3 1572 }
a6ec74c1 1573 case 'c':
73cb7263 1574 while (len-- > 0) {
f337b084 1575 int aint = SHIFT_BYTE(utf8, s, strend, datumtype);
73cb7263
NC
1576 if (aint >= 128) /* fake up signed chars */
1577 aint -= 256;
08ca2aa3 1578 if (!checksum)
ac7f3b1b 1579 PUSHs(sv_2mortal(newSViv((IV)aint)));
73cb7263
NC
1580 else if (checksum > bits_in_uv)
1581 cdouble += (NV)aint;
1582 else
1583 cuv += aint;
a6ec74c1
JH
1584 }
1585 break;
1586 case 'C':
08ca2aa3
TH
1587 case 'W':
1588 W_checksum:
35bcd338 1589 if (len == 0) {
fc241834 1590 if (explicit_length && datumtype == 'C')
08ca2aa3 1591 /* Switch to "character" mode */
f337b084 1592 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
35bcd338
JH
1593 break;
1594 }
fc241834 1595 if (datumtype == 'C' ?
f337b084
TH
1596 (symptr->flags & FLAG_DO_UTF8) &&
1597 !(symptr->flags & FLAG_WAS_UTF8) : utf8) {
08ca2aa3 1598 while (len-- > 0 && s < strend) {
08ca2aa3 1599 STRLEN retlen;
f7fe979e 1600 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
f337b084 1601 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
08ca2aa3
TH
1602 if (retlen == (STRLEN) -1 || retlen == 0)
1603 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1604 s += retlen;
1605 if (!checksum)
1606 PUSHs(sv_2mortal(newSVuv((UV) val)));
1607 else if (checksum > bits_in_uv)
1608 cdouble += (NV) val;
d6d3e8bd 1609 else
08ca2aa3 1610 cuv += val;
fc241834 1611 }
08ca2aa3 1612 } else if (!checksum)
a6ec74c1 1613 while (len-- > 0) {
f7fe979e 1614 const U8 ch = *(U8 *) s++;
08ca2aa3 1615 PUSHs(sv_2mortal(newSVuv((UV) ch)));
a6ec74c1 1616 }
08ca2aa3
TH
1617 else if (checksum > bits_in_uv)
1618 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1619 else
1620 while (len-- > 0) cuv += *(U8 *) s++;
a6ec74c1
JH
1621 break;
1622 case 'U':
35bcd338 1623 if (len == 0) {
08ca2aa3
TH
1624 if (explicit_length) {
1625 /* Switch to "bytes in UTF-8" mode */
f337b084 1626 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
08ca2aa3
TH
1627 else
1628 /* Should be impossible due to the need_utf8() test */
1629 Perl_croak(aTHX_ "U0 mode on a byte string");
1630 }
35bcd338
JH
1631 break;
1632 }
08ca2aa3 1633 if (len > strend - s) len = strend - s;
fc241834 1634 if (!checksum) {
08ca2aa3
TH
1635 if (len && unpack_only_one) len = 1;
1636 EXTEND(SP, len);
1637 EXTEND_MORTAL(len);
fc241834 1638 }
08ca2aa3
TH
1639 while (len-- > 0 && s < strend) {
1640 STRLEN retlen;
1641 UV auv;
1642 if (utf8) {
1643 U8 result[UTF8_MAXLEN];
f7fe979e 1644 const char *ptr = s;
08ca2aa3 1645 STRLEN len;
08ca2aa3
TH
1646 /* Bug: warns about bad utf8 even if we are short on bytes
1647 and will break out of the loop */
230e1fce
NC
1648 if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1649 'U'))
08ca2aa3
TH
1650 break;
1651 len = UTF8SKIP(result);
fc241834 1652 if (!uni_to_bytes(aTHX_ &ptr, strend,
230e1fce 1653 (char *) &result[1], len-1, 'U')) break;
08ca2aa3
TH
1654 auv = utf8n_to_uvuni(result, len, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1655 s = ptr;
1656 } else {
1657 auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1658 if (retlen == (STRLEN) -1 || retlen == 0)
1659 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1660 s += retlen;
1661 }
1662 if (!checksum)
1663 PUSHs(sv_2mortal(newSVuv((UV) auv)));
73cb7263 1664 else if (checksum > bits_in_uv)
08ca2aa3 1665 cdouble += (NV) auv;
73cb7263 1666 else
08ca2aa3 1667 cuv += auv;
a6ec74c1
JH
1668 }
1669 break;
49704364
LW
1670 case 's' | TYPE_IS_SHRIEKING:
1671#if SHORTSIZE != SIZE16
73cb7263 1672 while (len-- > 0) {
08ca2aa3 1673 short ashort;
f337b084
TH
1674 SHIFT_VAR(utf8, s, strend, ashort, datumtype);
1675 DO_BO_UNPACK(ashort, s);
08ca2aa3 1676 if (!checksum)
ac7f3b1b 1677 PUSHs(sv_2mortal(newSViv((IV)ashort)));
73cb7263
NC
1678 else if (checksum > bits_in_uv)
1679 cdouble += (NV)ashort;
1680 else
1681 cuv += ashort;
49704364
LW
1682 }
1683 break;
1684#else
1685 /* Fallthrough! */
a6ec74c1 1686#endif
49704364 1687 case 's':
73cb7263 1688 while (len-- > 0) {
08ca2aa3
TH
1689 I16 ai16;
1690
1691#if U16SIZE > SIZE16
1692 ai16 = 0;
1693#endif
f337b084 1694 SHIFT16(utf8, s, strend, &ai16, datumtype);
73cb7263 1695 DO_BO_UNPACK(ai16, 16);
1109a392 1696#if U16SIZE > SIZE16
73cb7263
NC
1697 if (ai16 > 32767)
1698 ai16 -= 65536;
a6ec74c1 1699#endif
08ca2aa3 1700 if (!checksum)
ac7f3b1b 1701 PUSHs(sv_2mortal(newSViv((IV)ai16)));
73cb7263
NC
1702 else if (checksum > bits_in_uv)
1703 cdouble += (NV)ai16;
1704 else
1705 cuv += ai16;
a6ec74c1
JH
1706 }
1707 break;
49704364
LW
1708 case 'S' | TYPE_IS_SHRIEKING:
1709#if SHORTSIZE != SIZE16
73cb7263 1710 while (len-- > 0) {
08ca2aa3 1711 unsigned short aushort;
f337b084
TH
1712 SHIFT_VAR(utf8, s, strend, aushort, datumtype);
1713 DO_BO_UNPACK(aushort, s);
08ca2aa3
TH
1714 if (!checksum)
1715 PUSHs(sv_2mortal(newSVuv((UV) aushort)));
73cb7263
NC
1716 else if (checksum > bits_in_uv)
1717 cdouble += (NV)aushort;
1718 else
1719 cuv += aushort;
49704364
LW
1720 }
1721 break;
1722#else
1723 /* Fallhrough! */
1724#endif
a6ec74c1
JH
1725 case 'v':
1726 case 'n':
1727 case 'S':
73cb7263 1728 while (len-- > 0) {
08ca2aa3
TH
1729 U16 au16;
1730#if U16SIZE > SIZE16
1731 au16 = 0;
1732#endif
f337b084 1733 SHIFT16(utf8, s, strend, &au16, datumtype);
08ca2aa3 1734 DO_BO_UNPACK(au16, 16);
a6ec74c1 1735#ifdef HAS_NTOHS
73cb7263
NC
1736 if (datumtype == 'n')
1737 au16 = PerlSock_ntohs(au16);
a6ec74c1
JH
1738#endif
1739#ifdef HAS_VTOHS
73cb7263
NC
1740 if (datumtype == 'v')
1741 au16 = vtohs(au16);
a6ec74c1 1742#endif
08ca2aa3
TH
1743 if (!checksum)
1744 PUSHs(sv_2mortal(newSVuv((UV)au16)));
73cb7263 1745 else if (checksum > bits_in_uv)
f337b084 1746 cdouble += (NV) au16;
73cb7263
NC
1747 else
1748 cuv += au16;
a6ec74c1
JH
1749 }
1750 break;
7212898e 1751#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7
MHM
1752 case 'v' | TYPE_IS_SHRIEKING:
1753 case 'n' | TYPE_IS_SHRIEKING:
73cb7263 1754 while (len-- > 0) {
08ca2aa3
TH
1755 I16 ai16;
1756# if U16SIZE > SIZE16
1757 ai16 = 0;
1758# endif
f337b084 1759 SHIFT16(utf8, s, strend, &ai16, datumtype);
08ca2aa3 1760# ifdef HAS_NTOHS
73cb7263 1761 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
08ca2aa3
TH
1762 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1763# endif /* HAS_NTOHS */
1764# ifdef HAS_VTOHS
73cb7263 1765 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
08ca2aa3
TH
1766 ai16 = (I16) vtohs((U16) ai16);
1767# endif /* HAS_VTOHS */
1768 if (!checksum)
ac7f3b1b 1769 PUSHs(sv_2mortal(newSViv((IV)ai16)));
73cb7263 1770 else if (checksum > bits_in_uv)
08ca2aa3 1771 cdouble += (NV) ai16;
73cb7263
NC
1772 else
1773 cuv += ai16;
068bd2e7
MHM
1774 }
1775 break;
08ca2aa3 1776#endif /* PERL_PACK_CAN_SHRIEKSIGN */
a6ec74c1 1777 case 'i':
49704364 1778 case 'i' | TYPE_IS_SHRIEKING:
73cb7263 1779 while (len-- > 0) {
08ca2aa3 1780 int aint;
f337b084
TH
1781 SHIFT_VAR(utf8, s, strend, aint, datumtype);
1782 DO_BO_UNPACK(aint, i);
08ca2aa3 1783 if (!checksum)
ac7f3b1b 1784 PUSHs(sv_2mortal(newSViv((IV)aint)));
73cb7263
NC
1785 else if (checksum > bits_in_uv)
1786 cdouble += (NV)aint;
1787 else
1788 cuv += aint;
a6ec74c1
JH
1789 }
1790 break;
1791 case 'I':
49704364 1792 case 'I' | TYPE_IS_SHRIEKING:
73cb7263 1793 while (len-- > 0) {
08ca2aa3 1794 unsigned int auint;
f337b084
TH
1795 SHIFT_VAR(utf8, s, strend, auint, datumtype);
1796 DO_BO_UNPACK(auint, i);
08ca2aa3 1797 if (!checksum)
ac7f3b1b 1798 PUSHs(sv_2mortal(newSVuv((UV)auint)));
73cb7263
NC
1799 else if (checksum > bits_in_uv)
1800 cdouble += (NV)auint;
1801 else
1802 cuv += auint;
a6ec74c1
JH
1803 }
1804 break;
92d41999 1805 case 'j':
73cb7263 1806 while (len-- > 0) {
08ca2aa3 1807 IV aiv;
f337b084 1808 SHIFT_VAR(utf8, s, strend, aiv, datumtype);
1109a392 1809#if IVSIZE == INTSIZE
f337b084 1810 DO_BO_UNPACK(aiv, i);
1109a392 1811#elif IVSIZE == LONGSIZE
f337b084 1812 DO_BO_UNPACK(aiv, l);
1109a392 1813#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
f337b084 1814 DO_BO_UNPACK(aiv, 64);
08ca2aa3
TH
1815#else
1816 Perl_croak(aTHX_ "'j' not supported on this platform");
1109a392 1817#endif
08ca2aa3 1818 if (!checksum)
ac7f3b1b 1819 PUSHs(sv_2mortal(newSViv(aiv)));
73cb7263
NC
1820 else if (checksum > bits_in_uv)
1821 cdouble += (NV)aiv;
1822 else
1823 cuv += aiv;
92d41999
JH
1824 }
1825 break;
1826 case 'J':
73cb7263 1827 while (len-- > 0) {
08ca2aa3 1828 UV auv;
f337b084 1829 SHIFT_VAR(utf8, s, strend, auv, datumtype);
08ca2aa3 1830#if IVSIZE == INTSIZE
f337b084 1831 DO_BO_UNPACK(auv, i);
08ca2aa3 1832#elif IVSIZE == LONGSIZE
f337b084 1833 DO_BO_UNPACK(auv, l);
08ca2aa3 1834#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
f337b084 1835 DO_BO_UNPACK(auv, 64);
08ca2aa3
TH
1836#else
1837 Perl_croak(aTHX_ "'J' not supported on this platform");
1109a392 1838#endif
08ca2aa3 1839 if (!checksum)
ac7f3b1b 1840 PUSHs(sv_2mortal(newSVuv(auv)));
73cb7263
NC
1841 else if (checksum > bits_in_uv)
1842 cdouble += (NV)auv;
1843 else
1844 cuv += auv;
92d41999
JH
1845 }
1846 break;
49704364
LW
1847 case 'l' | TYPE_IS_SHRIEKING:
1848#if LONGSIZE != SIZE32
73cb7263 1849 while (len-- > 0) {
08ca2aa3 1850 long along;
f337b084
TH
1851 SHIFT_VAR(utf8, s, strend, along, datumtype);
1852 DO_BO_UNPACK(along, l);
08ca2aa3 1853 if (!checksum)
ac7f3b1b 1854 PUSHs(sv_2mortal(newSViv((IV)along)));
73cb7263
NC
1855 else if (checksum > bits_in_uv)
1856 cdouble += (NV)along;
1857 else
1858 cuv += along;
49704364
LW
1859 }
1860 break;
1861#else
1862 /* Fallthrough! */
a6ec74c1 1863#endif
49704364 1864 case 'l':
73cb7263 1865 while (len-- > 0) {
08ca2aa3
TH
1866 I32 ai32;
1867#if U32SIZE > SIZE32
1868 ai32 = 0;
1869#endif
f337b084 1870 SHIFT32(utf8, s, strend, &ai32, datumtype);
73cb7263 1871 DO_BO_UNPACK(ai32, 32);
25a9bd2a 1872#if U32SIZE > SIZE32
08ca2aa3 1873 if (ai32 > 2147483647) ai32 -= 4294967296;
a6ec74c1 1874#endif
08ca2aa3 1875 if (!checksum)
ac7f3b1b 1876 PUSHs(sv_2mortal(newSViv((IV)ai32)));
73cb7263
NC
1877 else if (checksum > bits_in_uv)
1878 cdouble += (NV)ai32;
1879 else
1880 cuv += ai32;
a6ec74c1
JH
1881 }
1882 break;
49704364
LW
1883 case 'L' | TYPE_IS_SHRIEKING:
1884#if LONGSIZE != SIZE32
73cb7263 1885 while (len-- > 0) {
08ca2aa3 1886 unsigned long aulong;
f337b084
TH
1887 SHIFT_VAR(utf8, s, strend, aulong, datumtype);
1888 DO_BO_UNPACK(aulong, l);
08ca2aa3 1889 if (!checksum)
ac7f3b1b 1890 PUSHs(sv_2mortal(newSVuv((UV)aulong)));
73cb7263
NC
1891 else if (checksum > bits_in_uv)
1892 cdouble += (NV)aulong;
1893 else
1894 cuv += aulong;
49704364
LW
1895 }
1896 break;
1897#else
1898 /* Fall through! */
1899#endif
a6ec74c1
JH
1900 case 'V':
1901 case 'N':
1902 case 'L':
73cb7263 1903 while (len-- > 0) {
08ca2aa3
TH
1904 U32 au32;
1905#if U32SIZE > SIZE32
1906 au32 = 0;
1907#endif
f337b084 1908 SHIFT32(utf8, s, strend, &au32, datumtype);
08ca2aa3 1909 DO_BO_UNPACK(au32, 32);
a6ec74c1 1910#ifdef HAS_NTOHL
73cb7263
NC
1911 if (datumtype == 'N')
1912 au32 = PerlSock_ntohl(au32);
a6ec74c1
JH
1913#endif
1914#ifdef HAS_VTOHL
73cb7263
NC
1915 if (datumtype == 'V')
1916 au32 = vtohl(au32);
a6ec74c1 1917#endif
08ca2aa3 1918 if (!checksum)
fc241834
RGS
1919 PUSHs(sv_2mortal(newSVuv((UV)au32)));
1920 else if (checksum > bits_in_uv)
1921 cdouble += (NV)au32;
1922 else
1923 cuv += au32;
a6ec74c1
JH
1924 }
1925 break;
7212898e 1926#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7
MHM
1927 case 'V' | TYPE_IS_SHRIEKING:
1928 case 'N' | TYPE_IS_SHRIEKING:
73cb7263 1929 while (len-- > 0) {
08ca2aa3
TH
1930 I32 ai32;
1931# if U32SIZE > SIZE32
1932 ai32 = 0;
1933# endif
f337b084 1934 SHIFT32(utf8, s, strend, &ai32, datumtype);
08ca2aa3 1935# ifdef HAS_NTOHL
73cb7263
NC
1936 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1937 ai32 = (I32)PerlSock_ntohl((U32)ai32);
08ca2aa3
TH
1938# endif
1939# ifdef HAS_VTOHL
73cb7263
NC
1940 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1941 ai32 = (I32)vtohl((U32)ai32);
08ca2aa3
TH
1942# endif
1943 if (!checksum)
ac7f3b1b 1944 PUSHs(sv_2mortal(newSViv((IV)ai32)));
73cb7263
NC
1945 else if (checksum > bits_in_uv)
1946 cdouble += (NV)ai32;
1947 else
1948 cuv += ai32;
068bd2e7
MHM
1949 }
1950 break;
08ca2aa3 1951#endif /* PERL_PACK_CAN_SHRIEKSIGN */
a6ec74c1 1952 case 'p':
a6ec74c1 1953 while (len-- > 0) {
f7fe979e 1954 const char *aptr;
f337b084 1955 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
07409e01 1956 DO_BO_UNPACK_PC(aptr);
c4c5f44a
NC
1957 /* newSVpv generates undef if aptr is NULL */
1958 PUSHs(sv_2mortal(newSVpv(aptr, 0)));
a6ec74c1
JH
1959 }
1960 break;
1961 case 'w':
a6ec74c1
JH
1962 {
1963 UV auv = 0;
1964 U32 bytes = 0;
fc241834 1965
08ca2aa3
TH
1966 while (len > 0 && s < strend) {
1967 U8 ch;
f337b084 1968 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3 1969 auv = (auv << 7) | (ch & 0x7f);
a6ec74c1 1970 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
08ca2aa3 1971 if (ch < 0x80) {
a6ec74c1 1972 bytes = 0;
ac7f3b1b 1973 PUSHs(sv_2mortal(newSVuv(auv)));
a6ec74c1
JH
1974 len--;
1975 auv = 0;
08ca2aa3 1976 continue;
a6ec74c1 1977 }
08ca2aa3 1978 if (++bytes >= sizeof(UV)) { /* promote to string */
10516c54 1979 const char *t;
a6ec74c1
JH
1980
1981 sv = Perl_newSVpvf(aTHX_ "%.*"UVf, (int)TYPE_DIGITS(UV), auv);
1982 while (s < strend) {
f337b084 1983 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3
TH
1984 sv = mul128(sv, (U8)(ch & 0x7f));
1985 if (!(ch & 0x80)) {
a6ec74c1
JH
1986 bytes = 0;
1987 break;
1988 }
1989 }
10516c54 1990 t = SvPV_nolen_const(sv);
a6ec74c1
JH
1991 while (*t == '0')
1992 t++;
1993 sv_chop(sv, t);
1994 PUSHs(sv_2mortal(sv));
1995 len--;
1996 auv = 0;
1997 }
1998 }
1999 if ((s >= strend) && bytes)
49704364 2000 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
a6ec74c1
JH
2001 }
2002 break;
2003 case 'P':
49704364
LW
2004 if (symptr->howlen == e_star)
2005 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
a6ec74c1 2006 EXTEND(SP, 1);
2d3e0934 2007 if (s + sizeof(char*) <= strend) {
08ca2aa3 2008 char *aptr;
f337b084 2009 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
07409e01 2010 DO_BO_UNPACK_PC(aptr);
fc241834
RGS
2011 /* newSVpvn generates undef if aptr is NULL */
2012 PUSHs(sv_2mortal(newSVpvn(aptr, len)));
08ca2aa3 2013 }
a6ec74c1
JH
2014 break;
2015#ifdef HAS_QUAD
2016 case 'q':
73cb7263 2017 while (len-- > 0) {
08ca2aa3 2018 Quad_t aquad;
f337b084
TH
2019 SHIFT_VAR(utf8, s, strend, aquad, datumtype);
2020 DO_BO_UNPACK(aquad, 64);
08ca2aa3
TH
2021 if (!checksum)
2022 PUSHs(sv_2mortal(aquad >= IV_MIN && aquad <= IV_MAX ?
ac7f3b1b 2023 newSViv((IV)aquad) : newSVnv((NV)aquad)));
73cb7263
NC
2024 else if (checksum > bits_in_uv)
2025 cdouble += (NV)aquad;
2026 else
2027 cuv += aquad;
2028 }
a6ec74c1
JH
2029 break;
2030 case 'Q':
73cb7263 2031 while (len-- > 0) {
08ca2aa3 2032 Uquad_t auquad;
f337b084
TH
2033 SHIFT_VAR(utf8, s, strend, auquad, datumtype);
2034 DO_BO_UNPACK(auquad, 64);
08ca2aa3
TH
2035 if (!checksum)
2036 PUSHs(sv_2mortal(auquad <= UV_MAX ?
2037 newSVuv((UV)auquad):newSVnv((NV)auquad)));
73cb7263
NC
2038 else if (checksum > bits_in_uv)
2039 cdouble += (NV)auquad;
2040 else
2041 cuv += auquad;
a6ec74c1
JH
2042 }
2043 break;
08ca2aa3 2044#endif /* HAS_QUAD */
a6ec74c1
JH
2045 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
2046 case 'f':
73cb7263 2047 while (len-- > 0) {
08ca2aa3 2048 float afloat;
f337b084 2049 SHIFT_VAR(utf8, s, strend, afloat, datumtype);
08ca2aa3
TH
2050 DO_BO_UNPACK_N(afloat, float);
2051 if (!checksum)
2052 PUSHs(sv_2mortal(newSVnv((NV)afloat)));
2053 else
73cb7263 2054 cdouble += afloat;
fc241834 2055 }
a6ec74c1
JH
2056 break;
2057 case 'd':
73cb7263 2058 while (len-- > 0) {
08ca2aa3 2059 double adouble;
f337b084 2060 SHIFT_VAR(utf8, s, strend, adouble, datumtype);
08ca2aa3
TH
2061 DO_BO_UNPACK_N(adouble, double);
2062 if (!checksum)
2063 PUSHs(sv_2mortal(newSVnv((NV)adouble)));
2064 else
73cb7263 2065 cdouble += adouble;
fc241834 2066 }
a6ec74c1 2067 break;
92d41999 2068 case 'F':
73cb7263 2069 while (len-- > 0) {
08ca2aa3 2070 NV anv;
f337b084 2071 SHIFT_VAR(utf8, s, strend, anv, datumtype);
08ca2aa3
TH
2072 DO_BO_UNPACK_N(anv, NV);
2073 if (!checksum)
2074 PUSHs(sv_2mortal(newSVnv(anv)));
2075 else
73cb7263 2076 cdouble += anv;
fc241834 2077 }
92d41999
JH
2078 break;
2079#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2080 case 'D':
73cb7263 2081 while (len-- > 0) {
08ca2aa3 2082 long double aldouble;
f337b084 2083 SHIFT_VAR(utf8, s, strend, aldouble, datumtype);
08ca2aa3
TH
2084 DO_BO_UNPACK_N(aldouble, long double);
2085 if (!checksum)
2086 PUSHs(sv_2mortal(newSVnv((NV)aldouble)));
2087 else
2088 cdouble += aldouble;
92d41999
JH
2089 }
2090 break;
2091#endif
a6ec74c1
JH
2092 case 'u':
2093 /* MKS:
2094 * Initialise the decode mapping. By using a table driven
2095 * algorithm, the code will be character-set independent
2096 * (and just as fast as doing character arithmetic)
2097 */
2098 if (PL_uudmap['M'] == 0) {
2d3e0934 2099 size_t i;
a6ec74c1 2100
2d3e0934 2101 for (i = 0; i < sizeof(PL_uuemap); ++i)
a6ec74c1
JH
2102 PL_uudmap[(U8)PL_uuemap[i]] = i;
2103 /*
2104 * Because ' ' and '`' map to the same value,
2105 * we need to decode them both the same.
2106 */
2107 PL_uudmap[' '] = 0;
2108 }
08ca2aa3 2109 {
f7fe979e 2110 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
561b68a9 2111 sv = sv_2mortal(newSV(l));
08ca2aa3
TH
2112 if (l) SvPOK_on(sv);
2113 }
2114 if (utf8) {
2115 while (next_uni_uu(aTHX_ &s, strend, &len)) {
2116 I32 a, b, c, d;
2117 char hunk[4];
2118
2119 hunk[3] = '\0';
2120 while (len > 0) {
2121 next_uni_uu(aTHX_ &s, strend, &a);
2122 next_uni_uu(aTHX_ &s, strend, &b);
2123 next_uni_uu(aTHX_ &s, strend, &c);
2124 next_uni_uu(aTHX_ &s, strend, &d);
2125 hunk[0] = (char)((a << 2) | (b >> 4));
2126 hunk[1] = (char)((b << 4) | (c >> 2));
2127 hunk[2] = (char)((c << 6) | d);
2128 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2129 len -= 3;
2130 }
2131 if (s < strend) {
f7fe979e
AL
2132 if (*s == '\n') {
2133 s++;
2134 }
08ca2aa3
TH
2135 else {
2136 /* possible checksum byte */
f7fe979e
AL
2137 const char *skip = s+UTF8SKIP(s);
2138 if (skip < strend && *skip == '\n')
2139 s = skip+1;
08ca2aa3
TH
2140 }
2141 }
2142 }
2143 } else {
fc241834
RGS
2144 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
2145 I32 a, b, c, d;
2146 char hunk[4];
a6ec74c1 2147
fc241834
RGS
2148 hunk[3] = '\0';
2149 len = PL_uudmap[*(U8*)s++] & 077;
2150 while (len > 0) {
2151 if (s < strend && ISUUCHAR(*s))
2152 a = PL_uudmap[*(U8*)s++] & 077;
2153 else
2154 a = 0;
2155 if (s < strend && ISUUCHAR(*s))
2156 b = PL_uudmap[*(U8*)s++] & 077;
2157 else
2158 b = 0;
2159 if (s < strend && ISUUCHAR(*s))
2160 c = PL_uudmap[*(U8*)s++] & 077;
2161 else
2162 c = 0;
2163 if (s < strend && ISUUCHAR(*s))
2164 d = PL_uudmap[*(U8*)s++] & 077;
2165 else
2166 d = 0;
2167 hunk[0] = (char)((a << 2) | (b >> 4));
2168 hunk[1] = (char)((b << 4) | (c >> 2));
2169 hunk[2] = (char)((c << 6) | d);
2170 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2171 len -= 3;
2172 }
2173 if (*s == '\n')
2174 s++;
2175 else /* possible checksum byte */
2176 if (s + 1 < strend && s[1] == '\n')
2177 s += 2;
a6ec74c1 2178 }
08ca2aa3
TH
2179 }
2180 XPUSHs(sv);
a6ec74c1
JH
2181 break;
2182 }
49704364 2183
a6ec74c1 2184 if (checksum) {
1109a392 2185 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
92d41999 2186 (checksum > bits_in_uv &&
08ca2aa3
TH
2187 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
2188 NV trouble, anv;
a6ec74c1 2189
08ca2aa3 2190 anv = (NV) (1 << (checksum & 15));
a6ec74c1
JH
2191 while (checksum >= 16) {
2192 checksum -= 16;
08ca2aa3 2193 anv *= 65536.0;
a6ec74c1 2194 }
a6ec74c1 2195 while (cdouble < 0.0)
08ca2aa3
TH
2196 cdouble += anv;
2197 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
c4c5f44a 2198 sv = newSVnv(cdouble);
a6ec74c1
JH
2199 }
2200 else {
fa8ec7c1
NC
2201 if (checksum < bits_in_uv) {
2202 UV mask = ((UV)1 << checksum) - 1;
92d41999 2203 cuv &= mask;
a6ec74c1 2204 }
c4c5f44a 2205 sv = newSVuv(cuv);
a6ec74c1
JH
2206 }
2207 XPUSHs(sv_2mortal(sv));
2208 checksum = 0;
2209 }
fc241834 2210
49704364
LW
2211 if (symptr->flags & FLAG_SLASH){
2212 if (SP - PL_stack_base - start_sp_offset <= 0)
2213 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
2214 if( next_symbol(symptr) ){
2215 if( symptr->howlen == e_number )
2216 Perl_croak(aTHX_ "Count after length/code in unpack" );
2217 if( beyond ){
2218 /* ...end of char buffer then no decent length available */
2219 Perl_croak(aTHX_ "length/code after end of string in unpack" );
2220 } else {
2221 /* take top of stack (hope it's numeric) */
2222 len = POPi;
2223 if( len < 0 )
2224 Perl_croak(aTHX_ "Negative '/' count in unpack" );
2225 }
2226 } else {
2227 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
2228 }
2229 datumtype = symptr->code;
21c16052 2230 explicit_length = FALSE;
49704364
LW
2231 goto redo_switch;
2232 }
a6ec74c1 2233 }
49704364 2234
18529408
IZ
2235 if (new_s)
2236 *new_s = s;
2237 PUTBACK;
2238 return SP - PL_stack_base - start_sp_offset;
2239}
2240
2241PP(pp_unpack)
2242{
97aff369 2243 dVAR;
18529408 2244 dSP;
bab9c0ac 2245 dPOPPOPssrl;
18529408
IZ
2246 I32 gimme = GIMME_V;
2247 STRLEN llen;
2248 STRLEN rlen;
5c144d81
NC
2249 const char *pat = SvPV_const(left, llen);
2250 const char *s = SvPV_const(right, rlen);
f7fe979e
AL
2251 const char *strend = s + rlen;
2252 const char *patend = pat + llen;
08ca2aa3 2253 I32 cnt;
18529408
IZ
2254
2255 PUTBACK;
7accc089 2256 cnt = unpackstring(pat, patend, s, strend,
49704364 2257 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
f337b084 2258 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
49704364 2259
18529408
IZ
2260 SPAGAIN;
2261 if ( !cnt && gimme == G_SCALAR )
2262 PUSHs(&PL_sv_undef);
a6ec74c1
JH
2263 RETURN;
2264}
2265
f337b084 2266STATIC U8 *
f7fe979e 2267doencodes(U8 *h, const char *s, I32 len)
a6ec74c1 2268{
f337b084 2269 *h++ = PL_uuemap[len];
a6ec74c1 2270 while (len > 2) {
f337b084
TH
2271 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2272 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
2273 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
2274 *h++ = PL_uuemap[(077 & (s[2] & 077))];
a6ec74c1
JH
2275 s += 3;
2276 len -= 3;
2277 }
2278 if (len > 0) {
f7fe979e 2279 const char r = (len > 1 ? s[1] : '\0');
f337b084
TH
2280 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2281 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
2282 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
2283 *h++ = PL_uuemap[0];
a6ec74c1 2284 }
f337b084
TH
2285 *h++ = '\n';
2286 return h;
a6ec74c1
JH
2287}
2288
2289STATIC SV *
f7fe979e 2290S_is_an_int(pTHX_ const char *s, STRLEN l)
a6ec74c1 2291{
8b6e33c7
AL
2292 SV *result = newSVpvn(s, l);
2293 char *const result_c = SvPV_nolen(result); /* convenience */
2294 char *out = result_c;
2295 bool skip = 1;
2296 bool ignore = 0;
a6ec74c1
JH
2297
2298 while (*s) {
2299 switch (*s) {
2300 case ' ':
2301 break;
2302 case '+':
2303 if (!skip) {
2304 SvREFCNT_dec(result);
2305 return (NULL);
2306 }
2307 break;
2308 case '0':
2309 case '1':
2310 case '2':
2311 case '3':
2312 case '4':
2313 case '5':
2314 case '6':
2315 case '7':
2316 case '8':
2317 case '9':
2318 skip = 0;
2319 if (!ignore) {
2320 *(out++) = *s;
2321 }
2322 break;
2323 case '.':
2324 ignore = 1;
2325 break;
2326 default:
2327 SvREFCNT_dec(result);
2328 return (NULL);
2329 }
2330 s++;
2331 }
2332 *(out++) = '\0';
2333 SvCUR_set(result, out - result_c);
2334 return (result);
2335}
2336
2337/* pnum must be '\0' terminated */
2338STATIC int
2339S_div128(pTHX_ SV *pnum, bool *done)
2340{
8b6e33c7
AL
2341 STRLEN len;
2342 char * const s = SvPV(pnum, len);
2343 char *t = s;
2344 int m = 0;
2345
2346 *done = 1;
2347 while (*t) {
2348 const int i = m * 10 + (*t - '0');
2349 const int r = (i >> 7); /* r < 10 */
2350 m = i & 0x7F;
2351 if (r) {
2352 *done = 0;
2353 }
2354 *(t++) = '0' + r;
a6ec74c1 2355 }
8b6e33c7
AL
2356 *(t++) = '\0';
2357 SvCUR_set(pnum, (STRLEN) (t - s));
2358 return (m);
a6ec74c1
JH
2359}
2360
18529408
IZ
2361/*
2362=for apidoc pack_cat
2363
7accc089
JH
2364The engine implementing pack() Perl function. Note: parameters next_in_list and
2365flags are not used. This call should not be used; use packlist instead.
18529408 2366
bfce84ec
AL
2367=cut
2368*/
49704364 2369
18529408 2370void
f7fe979e 2371Perl_pack_cat(pTHX_ SV *cat, const char *pat, const char *patend, register SV **beglist, SV **endlist, SV ***next_in_list, U32 flags)
a6ec74c1 2372{
aadb217d 2373 tempsym_t sym;
8772537c
AL
2374 PERL_UNUSED_ARG(next_in_list);
2375 PERL_UNUSED_ARG(flags);
7accc089 2376
f7fe979e 2377 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
aadb217d 2378
7accc089
JH
2379 (void)pack_rec( cat, &sym, beglist, endlist );
2380}
2381
2382
2383/*
2384=for apidoc packlist
2385
2386The engine implementing pack() Perl function.
2387
bfce84ec
AL
2388=cut
2389*/
7accc089
JH
2390
2391void
f7fe979e 2392Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, register SV **beglist, SV **endlist )
7accc089 2393{
97aff369 2394 dVAR;
f337b084 2395 STRLEN no_len;
aadb217d
JH
2396 tempsym_t sym;
2397
f7fe979e 2398 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
49704364 2399
f337b084
TH
2400 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
2401 Also make sure any UTF8 flag is loaded */
2402 SvPV_force(cat, no_len);
bfce84ec
AL
2403 if (DO_UTF8(cat))
2404 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
f337b084 2405
49704364
LW
2406 (void)pack_rec( cat, &sym, beglist, endlist );
2407}
2408
f337b084
TH
2409/* like sv_utf8_upgrade, but also repoint the group start markers */
2410STATIC void
2411marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2412 STRLEN len;
2413 tempsym_t *group;
f7fe979e
AL
2414 const char *from_ptr, *from_start, *from_end, **marks, **m;
2415 char *to_start, *to_ptr;
f337b084
TH
2416
2417 if (SvUTF8(sv)) return;
2418
aa07b2f6 2419 from_start = SvPVX_const(sv);
f337b084
TH
2420 from_end = from_start + SvCUR(sv);
2421 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2422 if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
2423 if (from_ptr == from_end) {
2424 /* Simple case: no character needs to be changed */
2425 SvUTF8_on(sv);
2426 return;
2427 }
2428
3473cf63 2429 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
a02a5408 2430 Newx(to_start, len, char);
f337b084
TH
2431 Copy(from_start, to_start, from_ptr-from_start, char);
2432 to_ptr = to_start + (from_ptr-from_start);
2433
a02a5408 2434 Newx(marks, sym_ptr->level+2, const char *);
f337b084
TH
2435 for (group=sym_ptr; group; group = group->previous)
2436 marks[group->level] = from_start + group->strbeg;
2437 marks[sym_ptr->level+1] = from_end+1;
2438 for (m = marks; *m < from_ptr; m++)
2439 *m = to_start + (*m-from_start);
2440
2441 for (;from_ptr < from_end; from_ptr++) {
2442 while (*m == from_ptr) *m++ = to_ptr;
230e1fce 2443 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
f337b084
TH
2444 }
2445 *to_ptr = 0;
2446
2447 while (*m == from_ptr) *m++ = to_ptr;
2448 if (m != marks + sym_ptr->level+1) {
2449 Safefree(marks);
2450 Safefree(to_start);
2451 Perl_croak(aTHX_ "Assertion: marks beyond string end");
2452 }
2453 for (group=sym_ptr; group; group = group->previous)
2454 group->strbeg = marks[group->level] - to_start;
2455 Safefree(marks);
2456
2457 if (SvOOK(sv)) {
2458 if (SvIVX(sv)) {
b162af07 2459 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
f337b084
TH
2460 from_start -= SvIVX(sv);
2461 SvIV_set(sv, 0);
2462 }
2463 SvFLAGS(sv) &= ~SVf_OOK;
2464 }
2465 if (SvLEN(sv) != 0)
2466 Safefree(from_start);
f880fe2f 2467 SvPV_set(sv, to_start);
b162af07
SP
2468 SvCUR_set(sv, to_ptr - to_start);
2469 SvLEN_set(sv, len);
f337b084
TH
2470 SvUTF8_on(sv);
2471}
2472
2473/* Exponential string grower. Makes string extension effectively O(n)
2474 needed says how many extra bytes we need (not counting the final '\0')
2475 Only grows the string if there is an actual lack of space
2476*/
2477STATIC char *
0bd48802 2478S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
f7fe979e
AL
2479 const STRLEN cur = SvCUR(sv);
2480 const STRLEN len = SvLEN(sv);
f337b084
TH
2481 STRLEN extend;
2482 if (len - cur > needed) return SvPVX(sv);
2483 extend = needed > len ? needed : len;
2484 return SvGROW(sv, len+extend+1);
2485}
49704364
LW
2486
2487STATIC
2488SV **
f337b084 2489S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
49704364 2490{
97aff369 2491 dVAR;
49704364 2492 tempsym_t lookahead;
f337b084
TH
2493 I32 items = endlist - beglist;
2494 bool found = next_symbol(symptr);
2495 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
041457d9 2496 bool warn_utf8 = ckWARN(WARN_UTF8);
f337b084
TH
2497
2498 if (symptr->level == 0 && found && symptr->code == 'U') {
2499 marked_upgrade(aTHX_ cat, symptr);
2500 symptr->flags |= FLAG_DO_UTF8;
2501 utf8 = 0;
49704364 2502 }
f337b084 2503 symptr->strbeg = SvCUR(cat);
49704364
LW
2504
2505 while (found) {
f337b084
TH
2506 SV *fromstr;
2507 STRLEN fromlen;
2508 I32 len;
a0714e2c 2509 SV *lengthcode = NULL;
49704364 2510 I32 datumtype = symptr->code;
f337b084
TH
2511 howlen_t howlen = symptr->howlen;
2512 char *start = SvPVX(cat);
2513 char *cur = start + SvCUR(cat);
49704364 2514
f337b084
TH
2515#define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2516
2517 switch (howlen) {
fc241834 2518 case e_star:
f337b084
TH
2519 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2520 0 : items;
2521 break;
2522 default:
2523 /* e_no_len and e_number */
2524 len = symptr->length;
49704364
LW
2525 break;
2526 }
2527
f337b084 2528 if (len) {
a7a3cfaa 2529 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
f337b084 2530
a7a3cfaa
TH
2531 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2532 /* We can process this letter. */
2533 STRLEN size = props & PACK_SIZE_MASK;
2534 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2535 }
f337b084
TH
2536 }
2537
49704364
LW
2538 /* Look ahead for next symbol. Do we have code/code? */
2539 lookahead = *symptr;
2540 found = next_symbol(&lookahead);
246f24af
TH
2541 if (symptr->flags & FLAG_SLASH) {
2542 IV count;
f337b084 2543 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
246f24af
TH
2544 if (strchr("aAZ", lookahead.code)) {
2545 if (lookahead.howlen == e_number) count = lookahead.length;
2546 else {
ce399ba6
NC
2547 if (items > 0) {
2548 if (SvGAMAGIC(*beglist)) {
2549 /* Avoid reading the active data more than once
2550 by copying it to a temporary. */
2551 STRLEN len;
2552 const char *const pv = SvPV_const(*beglist, len);
2553 SV *const temp = sv_2mortal(newSVpvn(pv, len));
2554 if (SvUTF8(*beglist))
2555 SvUTF8_on(temp);
2556 *beglist = temp;
2557 }
246f24af
TH
2558 count = DO_UTF8(*beglist) ?
2559 sv_len_utf8(*beglist) : sv_len(*beglist);
ce399ba6 2560 }
246f24af
TH
2561 else count = 0;
2562 if (lookahead.code == 'Z') count++;
2563 }
2564 } else {
2565 if (lookahead.howlen == e_number && lookahead.length < items)
2566 count = lookahead.length;
2567 else count = items;
2568 }
2569 lookahead.howlen = e_number;
2570 lookahead.length = count;
2571 lengthcode = sv_2mortal(newSViv(count));
a6ec74c1 2572 }
49704364 2573
fc241834
RGS
2574 /* Code inside the switch must take care to properly update
2575 cat (CUR length and '\0' termination) if it updated *cur and
f337b084 2576 doesn't simply leave using break */
1109a392 2577 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 2578 default:
f337b084
TH
2579 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2580 (int) TYPE_NO_MODIFIERS(datumtype));
a6ec74c1 2581 case '%':
49704364 2582 Perl_croak(aTHX_ "'%%' may not be used in pack");
28be1210
TH
2583 {
2584 char *from;
2585#ifdef PERL_PACK_CAN_SHRIEKSIGN
2586 case '.' | TYPE_IS_SHRIEKING:
2587#endif
2588 case '.':
2589 if (howlen == e_star) from = start;
2590 else if (len == 0) from = cur;
2591 else {
2592 tempsym_t *group = symptr;
2593
2594 while (--len && group) group = group->previous;
2595 from = group ? start + group->strbeg : start;
2596 }
2597 fromstr = NEXTFROM;
2598 len = SvIV(fromstr);
2599 goto resize;
2600#ifdef PERL_PACK_CAN_SHRIEKSIGN
2601 case '@' | TYPE_IS_SHRIEKING:
2602#endif
a6ec74c1 2603 case '@':
28be1210
TH
2604 from = start + symptr->strbeg;
2605 resize:
2606#ifdef PERL_PACK_CAN_SHRIEKSIGN
2607 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
2608#else /* PERL_PACK_CAN_SHRIEKSIGN */
2609 if (utf8)
2610#endif
2611 if (len >= 0) {
2612 while (len && from < cur) {
2613 from += UTF8SKIP(from);
2614 len--;
2615 }
2616 if (from > cur)
2617 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2618 if (len) {
2619 /* Here we know from == cur */
2620 grow:
2621 GROWING(0, cat, start, cur, len);
2622 Zero(cur, len, char);
2623 cur += len;
2624 } else if (from < cur) {
2625 len = cur - from;
2626 goto shrink;
2627 } else goto no_change;
2628 } else {
2629 cur = from;
2630 len = -len;
2631 goto utf8_shrink;
f337b084 2632 }
28be1210
TH
2633 else {
2634 len -= cur - from;
f337b084 2635 if (len > 0) goto grow;
28be1210 2636 if (len == 0) goto no_change;
fc241834 2637 len = -len;
28be1210 2638 goto shrink;
f337b084 2639 }
a6ec74c1 2640 break;
28be1210 2641 }
fc241834 2642 case '(': {
49704364 2643 tempsym_t savsym = *symptr;
66c611c5
MHM
2644 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2645 symptr->flags |= group_modifiers;
49704364
LW
2646 symptr->patend = savsym.grpend;
2647 symptr->level++;
f337b084 2648 symptr->previous = &lookahead;
18529408 2649 while (len--) {
f337b084
TH
2650 U32 was_utf8;
2651 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2652 else symptr->flags &= ~FLAG_PARSE_UTF8;
2653 was_utf8 = SvUTF8(cat);
49704364 2654 symptr->patptr = savsym.grpbeg;
f337b084
TH
2655 beglist = pack_rec(cat, symptr, beglist, endlist);
2656 if (SvUTF8(cat) != was_utf8)
2657 /* This had better be an upgrade while in utf8==0 mode */
2658 utf8 = 1;
2659
49704364 2660 if (savsym.howlen == e_star && beglist == endlist)
18529408
IZ
2661 break; /* No way to continue */
2662 }
f337b084
TH
2663 lookahead.flags = symptr->flags & ~group_modifiers;
2664 goto no_change;
18529408 2665 }
62f95557
IZ
2666 case 'X' | TYPE_IS_SHRIEKING:
2667 if (!len) /* Avoid division by 0 */
2668 len = 1;
f337b084
TH
2669 if (utf8) {
2670 char *hop, *last;
2671 I32 l = len;
2672 hop = last = start;
2673 while (hop < cur) {
2674 hop += UTF8SKIP(hop);
2675 if (--l == 0) {
2676 last = hop;
2677 l = len;
2678 }
2679 }
2680 if (last > cur)
2681 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2682 cur = last;
2683 break;
2684 }
2685 len = (cur-start) % len;
62f95557 2686 /* FALL THROUGH */
a6ec74c1 2687 case 'X':
f337b084
TH
2688 if (utf8) {
2689 if (len < 1) goto no_change;
28be1210 2690 utf8_shrink:
f337b084
TH
2691 while (len > 0) {
2692 if (cur <= start)
28be1210
TH
2693 Perl_croak(aTHX_ "'%c' outside of string in pack",
2694 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2695 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2696 if (cur <= start)
28be1210
TH
2697 Perl_croak(aTHX_ "'%c' outside of string in pack",
2698 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2699 }
2700 len--;
2701 }
2702 } else {
fc241834 2703 shrink:
f337b084 2704 if (cur - start < len)
28be1210
TH
2705 Perl_croak(aTHX_ "'%c' outside of string in pack",
2706 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2707 cur -= len;
2708 }
2709 if (cur < start+symptr->strbeg) {
2710 /* Make sure group starts don't point into the void */
2711 tempsym_t *group;
9e27e96a 2712 const STRLEN length = cur-start;
f337b084
TH
2713 for (group = symptr;
2714 group && length < group->strbeg;
2715 group = group->previous) group->strbeg = length;
2716 lookahead.strbeg = length;
2717 }
a6ec74c1 2718 break;
fc241834
RGS
2719 case 'x' | TYPE_IS_SHRIEKING: {
2720 I32 ai32;
62f95557
IZ
2721 if (!len) /* Avoid division by 0 */
2722 len = 1;
230e1fce 2723 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
fc241834
RGS
2724 else ai32 = (cur - start) % len;
2725 if (ai32 == 0) goto no_change;
2726 len -= ai32;
2727 }
2728 /* FALL THROUGH */
a6ec74c1 2729 case 'x':
f337b084 2730 goto grow;
a6ec74c1
JH
2731 case 'A':
2732 case 'Z':
f337b084 2733 case 'a': {
f7fe979e 2734 const char *aptr;
f337b084 2735
a6ec74c1 2736 fromstr = NEXTFROM;
e62f0680 2737 aptr = SvPV_const(fromstr, fromlen);
f337b084 2738 if (DO_UTF8(fromstr)) {
f7fe979e 2739 const char *end, *s;
f337b084
TH
2740
2741 if (!utf8 && !SvUTF8(cat)) {
2742 marked_upgrade(aTHX_ cat, symptr);
2743 lookahead.flags |= FLAG_DO_UTF8;
2744 lookahead.strbeg = symptr->strbeg;
2745 utf8 = 1;
2746 start = SvPVX(cat);
2747 cur = start + SvCUR(cat);
2748 }
fc241834 2749 if (howlen == e_star) {
f337b084
TH
2750 if (utf8) goto string_copy;
2751 len = fromlen+1;
2752 }
2753 s = aptr;
2754 end = aptr + fromlen;
2755 fromlen = datumtype == 'Z' ? len-1 : len;
2756 while ((I32) fromlen > 0 && s < end) {
2757 s += UTF8SKIP(s);
2758 fromlen--;
2759 }
2760 if (s > end)
2761 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2762 if (utf8) {
fc241834 2763 len = fromlen;
f337b084
TH
2764 if (datumtype == 'Z') len++;
2765 fromlen = s-aptr;
2766 len += fromlen;
fc241834 2767
f337b084 2768 goto string_copy;
fc241834 2769 }
f337b084
TH
2770 fromlen = len - fromlen;
2771 if (datumtype == 'Z') fromlen--;
2772 if (howlen == e_star) {
2773 len = fromlen;
2774 if (datumtype == 'Z') len++;
fc241834 2775 }
f337b084 2776 GROWING(0, cat, start, cur, len);
fc241834 2777 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
f337b084
TH
2778 datumtype | TYPE_IS_PACK))
2779 Perl_croak(aTHX_ "Perl bug: predicted utf8 length not available");
2780 cur += fromlen;
a6ec74c1 2781 len -= fromlen;
f337b084
TH
2782 } else if (utf8) {
2783 if (howlen == e_star) {
2784 len = fromlen;
2785 if (datumtype == 'Z') len++;
a6ec74c1 2786 }
f337b084
TH
2787 if (len <= (I32) fromlen) {
2788 fromlen = len;
2789 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2790 }
fc241834 2791 /* assumes a byte expands to at most UTF8_EXPAND bytes on
3473cf63
RGS
2792 upgrade, so:
2793 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2794 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
f337b084
TH
2795 len -= fromlen;
2796 while (fromlen > 0) {
230e1fce 2797 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
f337b084
TH
2798 aptr++;
2799 fromlen--;
fc241834 2800 }
f337b084
TH
2801 } else {
2802 string_copy:
2803 if (howlen == e_star) {
2804 len = fromlen;
2805 if (datumtype == 'Z') len++;
2806 }
2807 if (len <= (I32) fromlen) {
2808 fromlen = len;
2809 if (datumtype == 'Z' && fromlen > 0) fromlen--;
a6ec74c1 2810 }
f337b084
TH
2811 GROWING(0, cat, start, cur, len);
2812 Copy(aptr, cur, fromlen, char);
2813 cur += fromlen;
2814 len -= fromlen;
a6ec74c1 2815 }
f337b084
TH
2816 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2817 cur += len;
a6ec74c1 2818 break;
f337b084 2819 }
a6ec74c1 2820 case 'B':
f337b084 2821 case 'b': {
b83604b4 2822 const char *str, *end;
f337b084
TH
2823 I32 l, field_len;
2824 U8 bits;
2825 bool utf8_source;
2826 U32 utf8_flags;
a6ec74c1 2827
fc241834 2828 fromstr = NEXTFROM;
b83604b4 2829 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2830 end = str + fromlen;
2831 if (DO_UTF8(fromstr)) {
2832 utf8_source = TRUE;
041457d9 2833 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2834 } else {
2835 utf8_source = FALSE;
2836 utf8_flags = 0; /* Unused, but keep compilers happy */
2837 }
2838 if (howlen == e_star) len = fromlen;
2839 field_len = (len+7)/8;
2840 GROWING(utf8, cat, start, cur, field_len);
2841 if (len > (I32)fromlen) len = fromlen;
2842 bits = 0;
2843 l = 0;
2844 if (datumtype == 'B')
2845 while (l++ < len) {
2846 if (utf8_source) {
95b63a38 2847 UV val = 0;
f337b084
TH
2848 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2849 bits |= val & 1;
2850 } else bits |= *str++ & 1;
2851 if (l & 7) bits <<= 1;
fc241834 2852 else {
f337b084
TH
2853 PUSH_BYTE(utf8, cur, bits);
2854 bits = 0;
a6ec74c1
JH
2855 }
2856 }
f337b084
TH
2857 else
2858 /* datumtype == 'b' */
2859 while (l++ < len) {
2860 if (utf8_source) {
95b63a38 2861 UV val = 0;
f337b084
TH
2862 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2863 if (val & 1) bits |= 0x80;
2864 } else if (*str++ & 1)
2865 bits |= 0x80;
2866 if (l & 7) bits >>= 1;
fc241834 2867 else {
f337b084
TH
2868 PUSH_BYTE(utf8, cur, bits);
2869 bits = 0;
a6ec74c1
JH
2870 }
2871 }
f337b084
TH
2872 l--;
2873 if (l & 7) {
fc241834 2874 if (datumtype == 'B')
f337b084 2875 bits <<= 7 - (l & 7);
fc241834 2876 else
f337b084
TH
2877 bits >>= 7 - (l & 7);
2878 PUSH_BYTE(utf8, cur, bits);
2879 l += 7;
a6ec74c1 2880 }
f337b084
TH
2881 /* Determine how many chars are left in the requested field */
2882 l /= 8;
2883 if (howlen == e_star) field_len = 0;
2884 else field_len -= l;
2885 Zero(cur, field_len, char);
2886 cur += field_len;
a6ec74c1 2887 break;
f337b084 2888 }
a6ec74c1 2889 case 'H':
f337b084 2890 case 'h': {
10516c54 2891 const char *str, *end;
f337b084
TH
2892 I32 l, field_len;
2893 U8 bits;
2894 bool utf8_source;
2895 U32 utf8_flags;
a6ec74c1 2896
fc241834 2897 fromstr = NEXTFROM;
10516c54 2898 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2899 end = str + fromlen;
2900 if (DO_UTF8(fromstr)) {
2901 utf8_source = TRUE;
041457d9 2902 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2903 } else {
2904 utf8_source = FALSE;
2905 utf8_flags = 0; /* Unused, but keep compilers happy */
2906 }
2907 if (howlen == e_star) len = fromlen;
2908 field_len = (len+1)/2;
2909 GROWING(utf8, cat, start, cur, field_len);
2910 if (!utf8 && len > (I32)fromlen) len = fromlen;
2911 bits = 0;
2912 l = 0;
2913 if (datumtype == 'H')
2914 while (l++ < len) {
2915 if (utf8_source) {
95b63a38 2916 UV val = 0;
f337b084
TH
2917 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2918 if (val < 256 && isALPHA(val))
2919 bits |= (val + 9) & 0xf;
a6ec74c1 2920 else
f337b084
TH
2921 bits |= val & 0xf;
2922 } else if (isALPHA(*str))
2923 bits |= (*str++ + 9) & 0xf;
2924 else
2925 bits |= *str++ & 0xf;
2926 if (l & 1) bits <<= 4;
fc241834 2927 else {
f337b084
TH
2928 PUSH_BYTE(utf8, cur, bits);
2929 bits = 0;
a6ec74c1
JH
2930 }
2931 }
f337b084
TH
2932 else
2933 while (l++ < len) {
2934 if (utf8_source) {
95b63a38 2935 UV val = 0;
f337b084
TH
2936 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2937 if (val < 256 && isALPHA(val))
2938 bits |= ((val + 9) & 0xf) << 4;
a6ec74c1 2939 else
f337b084
TH
2940 bits |= (val & 0xf) << 4;
2941 } else if (isALPHA(*str))
2942 bits |= ((*str++ + 9) & 0xf) << 4;
2943 else
2944 bits |= (*str++ & 0xf) << 4;
2945 if (l & 1) bits >>= 4;
fc241834 2946 else {
f337b084
TH
2947 PUSH_BYTE(utf8, cur, bits);
2948 bits = 0;
a6ec74c1 2949 }
fc241834 2950 }
f337b084
TH
2951 l--;
2952 if (l & 1) {
2953 PUSH_BYTE(utf8, cur, bits);
2954 l++;
2955 }
2956 /* Determine how many chars are left in the requested field */
2957 l /= 2;
2958 if (howlen == e_star) field_len = 0;
2959 else field_len -= l;
2960 Zero(cur, field_len, char);
2961 cur += field_len;
2962 break;
fc241834
RGS
2963 }
2964 case 'c':
f337b084
TH
2965 while (len-- > 0) {
2966 IV aiv;
2967 fromstr = NEXTFROM;
2968 aiv = SvIV(fromstr);
2969 if ((-128 > aiv || aiv > 127) &&
2970 ckWARN(WARN_PACK))
2971 Perl_warner(aTHX_ packWARN(WARN_PACK),
2972 "Character in 'c' format wrapped in pack");
585ec06d 2973 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
a6ec74c1
JH
2974 }
2975 break;
2976 case 'C':
f337b084
TH
2977 if (len == 0) {
2978 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2979 break;
2980 }
2981 GROWING(0, cat, start, cur, len);
a6ec74c1 2982 while (len-- > 0) {
f337b084 2983 IV aiv;
a6ec74c1 2984 fromstr = NEXTFROM;
f337b084
TH
2985 aiv = SvIV(fromstr);
2986 if ((0 > aiv || aiv > 0xff) &&
fc241834
RGS
2987 ckWARN(WARN_PACK))
2988 Perl_warner(aTHX_ packWARN(WARN_PACK),
2989 "Character in 'C' format wrapped in pack");
585ec06d 2990 *cur++ = (char)(aiv & 0xff);
f337b084 2991 }
fc241834
RGS
2992 break;
2993 case 'W': {
2994 char *end;
2995 U8 in_bytes = IN_BYTES;
2996
2997 end = start+SvLEN(cat)-1;
2998 if (utf8) end -= UTF8_MAXLEN-1;
2999 while (len-- > 0) {
3000 UV auv;
3001 fromstr = NEXTFROM;
3002 auv = SvUV(fromstr);
3003 if (in_bytes) auv = auv % 0x100;
3004 if (utf8) {
3005 W_utf8:
3006 if (cur > end) {
3007 *cur = '\0';
b162af07 3008 SvCUR_set(cat, cur - start);
fc241834
RGS
3009
3010 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3011 end = start+SvLEN(cat)-UTF8_MAXLEN;
3012 }
230e1fce
NC
3013 cur = (char *) uvuni_to_utf8_flags((U8 *) cur,
3014 NATIVE_TO_UNI(auv),
041457d9 3015 warn_utf8 ?
230e1fce 3016 0 : UNICODE_ALLOW_ANY);
fc241834
RGS
3017 } else {
3018 if (auv >= 0x100) {
3019 if (!SvUTF8(cat)) {
3020 *cur = '\0';
b162af07 3021 SvCUR_set(cat, cur - start);
fc241834
RGS
3022 marked_upgrade(aTHX_ cat, symptr);
3023 lookahead.flags |= FLAG_DO_UTF8;
3024 lookahead.strbeg = symptr->strbeg;
3025 utf8 = 1;
3026 start = SvPVX(cat);
3027 cur = start + SvCUR(cat);
3028 end = start+SvLEN(cat)-UTF8_MAXLEN;
3029 goto W_utf8;
3030 }
3031 if (ckWARN(WARN_PACK))
3032 Perl_warner(aTHX_ packWARN(WARN_PACK),
3033 "Character in 'W' format wrapped in pack");
3034 auv &= 0xff;
3035 }
3036 if (cur >= end) {
3037 *cur = '\0';
b162af07 3038 SvCUR_set(cat, cur - start);
fc241834
RGS
3039 GROWING(0, cat, start, cur, len+1);
3040 end = start+SvLEN(cat)-1;
3041 }
fe2774ed 3042 *(U8 *) cur++ = (U8)auv;
a6ec74c1
JH
3043 }
3044 }
3045 break;
fc241834
RGS
3046 }
3047 case 'U': {
3048 char *end;
3049
3050 if (len == 0) {
3051 if (!(symptr->flags & FLAG_DO_UTF8)) {
3052 marked_upgrade(aTHX_ cat, symptr);
3053 lookahead.flags |= FLAG_DO_UTF8;
3054 lookahead.strbeg = symptr->strbeg;
3055 }
3056 utf8 = 0;
3057 goto no_change;
3058 }
3059
3060 end = start+SvLEN(cat);
3061 if (!utf8) end -= UTF8_MAXLEN;
a6ec74c1 3062 while (len-- > 0) {
fc241834 3063 UV auv;
a6ec74c1 3064 fromstr = NEXTFROM;
fc241834
RGS
3065 auv = SvUV(fromstr);
3066 if (utf8) {
230e1fce 3067 U8 buffer[UTF8_MAXLEN], *endb;
fc241834 3068 endb = uvuni_to_utf8_flags(buffer, auv,
041457d9 3069 warn_utf8 ?
fc241834
RGS
3070 0 : UNICODE_ALLOW_ANY);
3071 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
3072 *cur = '\0';
b162af07 3073 SvCUR_set(cat, cur - start);
fc241834
RGS
3074 GROWING(0, cat, start, cur,
3075 len+(endb-buffer)*UTF8_EXPAND);
3076 end = start+SvLEN(cat);
3077 }
3078 bytes_to_uni(aTHX_ buffer, endb-buffer, &cur);
3079 } else {
3080 if (cur >= end) {
3081 *cur = '\0';
b162af07 3082 SvCUR_set(cat, cur - start);
fc241834
RGS
3083 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3084 end = start+SvLEN(cat)-UTF8_MAXLEN;
3085 }
230e1fce 3086 cur = (char *) uvuni_to_utf8_flags((U8 *) cur, auv,
041457d9 3087 warn_utf8 ?
230e1fce 3088 0 : UNICODE_ALLOW_ANY);
fc241834 3089 }
a6ec74c1 3090 }
a6ec74c1 3091 break;
fc241834 3092 }
a6ec74c1
JH
3093 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
3094 case 'f':
a6ec74c1 3095 while (len-- > 0) {
f337b084
TH
3096 float afloat;
3097 NV anv;
a6ec74c1 3098 fromstr = NEXTFROM;
f337b084 3099 anv = SvNV(fromstr);
5cdb9e01 3100#ifdef __VOS__
f337b084 3101 /* VOS does not automatically map a floating-point overflow
fc241834
RGS
3102 during conversion from double to float into infinity, so we
3103 do it by hand. This code should either be generalized for
3104 any OS that needs it, or removed if and when VOS implements
3105 posix-976 (suggestion to support mapping to infinity).
3106 Paul.Green@stratus.com 02-04-02. */
f337b084 3107 if (anv > FLT_MAX)
fc241834 3108 afloat = _float_constants[0]; /* single prec. inf. */
f337b084 3109 else if (anv < -FLT_MAX)
fc241834 3110 afloat = _float_constants[0]; /* single prec. inf. */
f337b084
TH
3111 else afloat = (float) anv;
3112#else /* __VOS__ */
baf3cf9c 3113# if defined(VMS) && !defined(__IEEE_FP)
f337b084 3114 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
3115 * on Alpha; fake it if we don't have them.
3116 */
f337b084 3117 if (anv > FLT_MAX)
fc241834 3118 afloat = FLT_MAX;
f337b084 3119 else if (anv < -FLT_MAX)
fc241834 3120 afloat = -FLT_MAX;
f337b084 3121 else afloat = (float)anv;
baf3cf9c 3122# else
f337b084 3123 afloat = (float)anv;
baf3cf9c 3124# endif
f337b084 3125#endif /* __VOS__ */
1109a392 3126 DO_BO_PACK_N(afloat, float);
f337b084 3127 PUSH_VAR(utf8, cur, afloat);
a6ec74c1
JH
3128 }
3129 break;
3130 case 'd':
a6ec74c1 3131 while (len-- > 0) {
f337b084
TH
3132 double adouble;
3133 NV anv;
a6ec74c1 3134 fromstr = NEXTFROM;
f337b084 3135 anv = SvNV(fromstr);
5cdb9e01 3136#ifdef __VOS__
f337b084 3137 /* VOS does not automatically map a floating-point overflow
fc241834
RGS
3138 during conversion from long double to double into infinity,
3139 so we do it by hand. This code should either be generalized
3140 for any OS that needs it, or removed if and when VOS
3141 implements posix-976 (suggestion to support mapping to
3142 infinity). Paul.Green@stratus.com 02-04-02. */
f337b084 3143 if (anv > DBL_MAX)
fc241834 3144 adouble = _double_constants[0]; /* double prec. inf. */
f337b084 3145 else if (anv < -DBL_MAX)
fc241834 3146 adouble = _double_constants[0]; /* double prec. inf. */
f337b084
TH
3147 else adouble = (double) anv;
3148#else /* __VOS__ */
baf3cf9c 3149# if defined(VMS) && !defined(__IEEE_FP)
f337b084 3150 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
3151 * on Alpha; fake it if we don't have them.
3152 */
f337b084 3153 if (anv > DBL_MAX)
fc241834 3154 adouble = DBL_MAX;
f337b084 3155 else if (anv < -DBL_MAX)
fc241834 3156 adouble = -DBL_MAX;
f337b084 3157 else adouble = (double)anv;
baf3cf9c 3158# else
f337b084 3159 adouble = (double)anv;
baf3cf9c 3160# endif
f337b084 3161#endif /* __VOS__ */
1109a392 3162 DO_BO_PACK_N(adouble, double);
f337b084 3163 PUSH_VAR(utf8, cur, adouble);
a6ec74c1
JH
3164 }
3165 break;
fc241834
RGS
3166 case 'F': {
3167 NV anv;
1109a392 3168 Zero(&anv, 1, NV); /* can be long double with unused bits */
92d41999
JH
3169 while (len-- > 0) {
3170 fromstr = NEXTFROM;
3171 anv = SvNV(fromstr);
1109a392 3172 DO_BO_PACK_N(anv, NV);
fc241834 3173 PUSH_VAR(utf8, cur, anv);
92d41999
JH
3174 }
3175 break;
fc241834 3176 }
92d41999 3177#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
fc241834
RGS
3178 case 'D': {
3179 long double aldouble;
1109a392
MHM
3180 /* long doubles can have unused bits, which may be nonzero */
3181 Zero(&aldouble, 1, long double);
92d41999
JH
3182 while (len-- > 0) {
3183 fromstr = NEXTFROM;
3184 aldouble = (long double)SvNV(fromstr);
1109a392 3185 DO_BO_PACK_N(aldouble, long double);
fc241834 3186 PUSH_VAR(utf8, cur, aldouble);
92d41999
JH
3187 }
3188 break;
fc241834 3189 }
92d41999 3190#endif
7212898e 3191#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3192 case 'n' | TYPE_IS_SHRIEKING:
7212898e 3193#endif
a6ec74c1
JH
3194 case 'n':
3195 while (len-- > 0) {
f337b084 3196 I16 ai16;
a6ec74c1 3197 fromstr = NEXTFROM;
ef108786 3198 ai16 = (I16)SvIV(fromstr);
a6ec74c1 3199#ifdef HAS_HTONS
ef108786 3200 ai16 = PerlSock_htons(ai16);
a6ec74c1 3201#endif
f337b084 3202 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3203 }
3204 break;
7212898e 3205#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3206 case 'v' | TYPE_IS_SHRIEKING:
7212898e 3207#endif
a6ec74c1
JH
3208 case 'v':
3209 while (len-- > 0) {
f337b084 3210 I16 ai16;
a6ec74c1 3211 fromstr = NEXTFROM;
ef108786 3212 ai16 = (I16)SvIV(fromstr);
a6ec74c1 3213#ifdef HAS_HTOVS
ef108786 3214 ai16 = htovs(ai16);
a6ec74c1 3215#endif
f337b084 3216 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3217 }
3218 break;
49704364 3219 case 'S' | TYPE_IS_SHRIEKING:
a6ec74c1 3220#if SHORTSIZE != SIZE16
fc241834 3221 while (len-- > 0) {
f337b084 3222 unsigned short aushort;
fc241834
RGS
3223 fromstr = NEXTFROM;
3224 aushort = SvUV(fromstr);
3225 DO_BO_PACK(aushort, s);
f337b084 3226 PUSH_VAR(utf8, cur, aushort);
fc241834 3227 }
49704364
LW
3228 break;
3229#else
3230 /* Fall through! */
a6ec74c1 3231#endif
49704364 3232 case 'S':
fc241834 3233 while (len-- > 0) {
f337b084 3234 U16 au16;
fc241834
RGS
3235 fromstr = NEXTFROM;
3236 au16 = (U16)SvUV(fromstr);
3237 DO_BO_PACK(au16, 16);
f337b084 3238 PUSH16(utf8, cur, &au16);
a6ec74c1
JH
3239 }
3240 break;
49704364 3241 case 's' | TYPE_IS_SHRIEKING:
a6ec74c1 3242#if SHORTSIZE != SIZE16
fc241834 3243 while (len-- > 0) {
f337b084 3244 short ashort;
fc241834
RGS
3245 fromstr = NEXTFROM;
3246 ashort = SvIV(fromstr);
3247 DO_BO_PACK(ashort, s);
f337b084 3248 PUSH_VAR(utf8, cur, ashort);
a6ec74c1 3249 }
49704364
LW
3250 break;
3251#else
3252 /* Fall through! */
a6ec74c1 3253#endif
49704364
LW
3254 case 's':
3255 while (len-- > 0) {
f337b084 3256 I16 ai16;
49704364 3257 fromstr = NEXTFROM;
ef108786
MHM
3258 ai16 = (I16)SvIV(fromstr);
3259 DO_BO_PACK(ai16, 16);
f337b084 3260 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3261 }
3262 break;
3263 case 'I':
49704364 3264 case 'I' | TYPE_IS_SHRIEKING:
a6ec74c1 3265 while (len-- > 0) {
f337b084 3266 unsigned int auint;
a6ec74c1
JH
3267 fromstr = NEXTFROM;
3268 auint = SvUV(fromstr);
1109a392 3269 DO_BO_PACK(auint, i);
f337b084 3270 PUSH_VAR(utf8, cur, auint);
a6ec74c1
JH
3271 }
3272 break;
92d41999
JH
3273 case 'j':
3274 while (len-- > 0) {
f337b084 3275 IV aiv;
92d41999
JH
3276 fromstr = NEXTFROM;
3277 aiv = SvIV(fromstr);
1109a392
MHM
3278#if IVSIZE == INTSIZE
3279 DO_BO_PACK(aiv, i);
3280#elif IVSIZE == LONGSIZE
3281 DO_BO_PACK(aiv, l);
3282#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
3283 DO_BO_PACK(aiv, 64);
f337b084
TH
3284#else
3285 Perl_croak(aTHX_ "'j' not supported on this platform");
1109a392 3286#endif
f337b084 3287 PUSH_VAR(utf8, cur, aiv);
92d41999
JH
3288 }
3289 break;
3290 case 'J':
3291 while (len-- > 0) {
f337b084 3292 UV auv;
92d41999
JH
3293 fromstr = NEXTFROM;
3294 auv = SvUV(fromstr);
1109a392
MHM
3295#if UVSIZE == INTSIZE
3296 DO_BO_PACK(auv, i);
3297#elif UVSIZE == LONGSIZE
3298 DO_BO_PACK(auv, l);
3299#elif defined(HAS_QUAD) && UVSIZE == U64SIZE
3300 DO_BO_PACK(auv, 64);
f337b084
TH
3301#else
3302 Perl_croak(aTHX_ "'J' not supported on this platform");
1109a392 3303#endif
f337b084 3304 PUSH_VAR(utf8, cur, auv);
92d41999
JH
3305 }
3306 break;
a6ec74c1
JH
3307 case 'w':
3308 while (len-- > 0) {
f337b084 3309 NV anv;
a6ec74c1 3310 fromstr = NEXTFROM;
15e9f109 3311 anv = SvNV(fromstr);
a6ec74c1 3312
f337b084
TH
3313 if (anv < 0) {
3314 *cur = '\0';
b162af07 3315 SvCUR_set(cat, cur - start);
49704364 3316 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
f337b084 3317 }
a6ec74c1 3318
196b62db
NC
3319 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
3320 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
3321 any negative IVs will have already been got by the croak()
3322 above. IOK is untrue for fractions, so we test them
3323 against UV_MAX_P1. */
f337b084
TH
3324 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
3325 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
a6ec74c1 3326 char *in = buf + sizeof(buf);
196b62db 3327 UV auv = SvUV(fromstr);
a6ec74c1
JH
3328
3329 do {
eb160463 3330 *--in = (char)((auv & 0x7f) | 0x80);
a6ec74c1
JH
3331 auv >>= 7;
3332 } while (auv);
3333 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
3334 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3335 in, (buf + sizeof(buf)) - in);
3336 } else if (SvPOKp(fromstr))
3337 goto w_string;
a6ec74c1 3338 else if (SvNOKp(fromstr)) {
0258719b
NC
3339 /* 10**NV_MAX_10_EXP is the largest power of 10
3340 so 10**(NV_MAX_10_EXP+1) is definately unrepresentable
3341 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
3342 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
3343 And with that many bytes only Inf can overflow.
8f8d40ab
PG
3344 Some C compilers are strict about integral constant
3345 expressions so we conservatively divide by a slightly
3346 smaller integer instead of multiplying by the exact
3347 floating-point value.
0258719b
NC
3348 */
3349#ifdef NV_MAX_10_EXP
f337b084 3350 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
8f8d40ab 3351 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
0258719b 3352#else
f337b084 3353 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
8f8d40ab 3354 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
0258719b 3355#endif
a6ec74c1
JH
3356 char *in = buf + sizeof(buf);
3357
8b6e33c7 3358 anv = Perl_floor(anv);
a6ec74c1 3359 do {
8b6e33c7 3360 const NV next = Perl_floor(anv / 128);
a6ec74c1 3361 if (in <= buf) /* this cannot happen ;-) */
49704364 3362 Perl_croak(aTHX_ "Cannot compress integer in pack");
0258719b 3363 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
15e9f109
NC
3364 anv = next;
3365 } while (anv > 0);
a6ec74c1 3366 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
3367 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3368 in, (buf + sizeof(buf)) - in);
3369 } else {
8b6e33c7
AL
3370 const char *from;
3371 char *result, *in;
735b914b
JH
3372 SV *norm;
3373 STRLEN len;
3374 bool done;
3375
f337b084 3376 w_string:
735b914b 3377 /* Copy string and check for compliance */
349d4f2f 3378 from = SvPV_const(fromstr, len);
735b914b 3379 if ((norm = is_an_int(from, len)) == NULL)
49704364 3380 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
735b914b 3381
a02a5408 3382 Newx(result, len, char);
735b914b
JH
3383 in = result + len;
3384 done = FALSE;
f337b084 3385 while (!done) *--in = div128(norm, &done) | 0x80;
735b914b 3386 result[len - 1] &= 0x7F; /* clear continue bit */
f337b084
TH
3387 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3388 in, (result + len) - in);
735b914b
JH
3389 Safefree(result);
3390 SvREFCNT_dec(norm); /* free norm */
fc241834 3391 }
a6ec74c1
JH
3392 }
3393 break;
3394 case 'i':
49704364 3395 case 'i' | TYPE_IS_SHRIEKING:
a6ec74c1 3396 while (len-- > 0) {
f337b084 3397 int aint;
a6ec74c1
JH
3398 fromstr = NEXTFROM;
3399 aint = SvIV(fromstr);
1109a392 3400 DO_BO_PACK(aint, i);
f337b084 3401 PUSH_VAR(utf8, cur, aint);
a6ec74c1
JH
3402 }
3403 break;
7212898e 3404#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3405 case 'N' | TYPE_IS_SHRIEKING:
7212898e 3406#endif
a6ec74c1
JH
3407 case 'N':
3408 while (len-- > 0) {
f337b084 3409 U32 au32;
a6ec74c1 3410 fromstr = NEXTFROM;
ef108786 3411 au32 = SvUV(fromstr);
a6ec74c1 3412#ifdef HAS_HTONL
ef108786 3413 au32 = PerlSock_htonl(au32);
a6ec74c1 3414#endif
f337b084 3415 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3416 }
3417 break;
7212898e 3418#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3419 case 'V' | TYPE_IS_SHRIEKING:
7212898e 3420#endif
a6ec74c1
JH
3421 case 'V':
3422 while (len-- > 0) {
f337b084 3423 U32 au32;
a6ec74c1 3424 fromstr = NEXTFROM;
ef108786 3425 au32 = SvUV(fromstr);
a6ec74c1 3426#ifdef HAS_HTOVL
ef108786 3427 au32 = htovl(au32);
a6ec74c1 3428#endif
f337b084 3429 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3430 }
3431 break;
49704364 3432 case 'L' | TYPE_IS_SHRIEKING:
a6ec74c1 3433#if LONGSIZE != SIZE32
fc241834 3434 while (len-- > 0) {
f337b084 3435 unsigned long aulong;
fc241834
RGS
3436 fromstr = NEXTFROM;
3437 aulong = SvUV(fromstr);
3438 DO_BO_PACK(aulong, l);
f337b084 3439 PUSH_VAR(utf8, cur, aulong);
a6ec74c1 3440 }
49704364
LW
3441 break;
3442#else
3443 /* Fall though! */
a6ec74c1 3444#endif
49704364 3445 case 'L':
fc241834 3446 while (len-- > 0) {
f337b084 3447 U32 au32;
fc241834
RGS
3448 fromstr = NEXTFROM;
3449 au32 = SvUV(fromstr);
3450 DO_BO_PACK(au32, 32);
f337b084 3451 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3452 }
3453 break;
49704364 3454 case 'l' | TYPE_IS_SHRIEKING:
a6ec74c1 3455#if LONGSIZE != SIZE32
fc241834 3456 while (len-- > 0) {
f337b084 3457 long along;
fc241834
RGS
3458 fromstr = NEXTFROM;
3459 along = SvIV(fromstr);
3460 DO_BO_PACK(along, l);
f337b084 3461 PUSH_VAR(utf8, cur, along);
a6ec74c1 3462 }
49704364
LW
3463 break;
3464#else
3465 /* Fall though! */
a6ec74c1 3466#endif
49704364
LW
3467 case 'l':
3468 while (len-- > 0) {
f337b084 3469 I32 ai32;
49704364 3470 fromstr = NEXTFROM;
ef108786
MHM
3471 ai32 = SvIV(fromstr);
3472 DO_BO_PACK(ai32, 32);
f337b084 3473 PUSH32(utf8, cur, &ai32);
a6ec74c1
JH
3474 }
3475 break;
3476#ifdef HAS_QUAD
3477 case 'Q':
3478 while (len-- > 0) {
f337b084 3479 Uquad_t auquad;
a6ec74c1 3480 fromstr = NEXTFROM;
f337b084 3481 auquad = (Uquad_t) SvUV(fromstr);
1109a392 3482 DO_BO_PACK(auquad, 64);
f337b084 3483 PUSH_VAR(utf8, cur, auquad);
a6ec74c1
JH
3484 }
3485 break;
3486 case 'q':
3487 while (len-- > 0) {
f337b084 3488 Quad_t aquad;
a6ec74c1
JH
3489 fromstr = NEXTFROM;
3490 aquad = (Quad_t)SvIV(fromstr);
1109a392 3491 DO_BO_PACK(aquad, 64);
f337b084 3492 PUSH_VAR(utf8, cur, aquad);
a6ec74c1
JH
3493 }
3494 break;
f337b084 3495#endif /* HAS_QUAD */
a6ec74c1
JH
3496 case 'P':
3497 len = 1; /* assume SV is correct length */
f337b084 3498 GROWING(utf8, cat, start, cur, sizeof(char *));
49704364 3499 /* Fall through! */
a6ec74c1
JH
3500 case 'p':
3501 while (len-- > 0) {
83003860 3502 const char *aptr;
f337b084 3503
a6ec74c1 3504 fromstr = NEXTFROM;
28a4f200 3505 SvGETMAGIC(fromstr);
3506 if (!SvOK(fromstr)) aptr = NULL;
a6ec74c1 3507 else {
a6ec74c1
JH
3508 /* XXX better yet, could spirit away the string to
3509 * a safe spot and hang on to it until the result
3510 * of pack() (and all copies of the result) are
3511 * gone.
3512 */
041457d9
DM
3513 if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
3514 !SvREADONLY(fromstr))) && ckWARN(WARN_PACK)) {
9014280d 3515 Perl_warner(aTHX_ packWARN(WARN_PACK),
fc241834 3516 "Attempt to pack pointer to temporary value");
a6ec74c1
JH
3517 }
3518 if (SvPOK(fromstr) || SvNIOK(fromstr))
2596d9fe 3519 aptr = SvPV_nomg_const_nolen(fromstr);
a6ec74c1 3520 else
2596d9fe 3521 aptr = SvPV_force_flags_nolen(fromstr, 0);
a6ec74c1 3522 }
07409e01 3523 DO_BO_PACK_PC(aptr);
f337b084 3524 PUSH_VAR(utf8, cur, aptr);
a6ec74c1
JH
3525 }
3526 break;
fc241834 3527 case 'u': {
f7fe979e 3528 const char *aptr, *aend;
fc241834 3529 bool from_utf8;