This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
New data for Unicode on older versions, thanks to Nicholas
[perl5.git] / pp_pack.c
CommitLineData
a6ec74c1
JH
1/* pp_pack.c
2 *
4bb101f2 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
acde74e1 4 * 2000, 2001, 2002, 2003, 2004, 2005, 2006, by Larry Wall and others
a6ec74c1
JH
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
9 */
10
d31a8517
AT
11/*
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
16 * some salt.
17 */
18
166f8a29
DM
19/* This file contains pp ("push/pop") functions that
20 * execute the opcodes that make up a perl program. A typical pp function
21 * expects to find its arguments on the stack, and usually pushes its
22 * results onto the stack, hence the 'pp' terminology. Each OP structure
23 * contains a pointer to the relevant pp_foo() function.
24 *
25 * This particular file just contains pp_pack() and pp_unpack(). See the
26 * other pp*.c files for the rest of the pp_ functions.
27 */
28
29
a6ec74c1
JH
30#include "EXTERN.h"
31#define PERL_IN_PP_PACK_C
32#include "perl.h"
33
f7fe979e
AL
34/* Types used by pack/unpack */
35typedef enum {
36 e_no_len, /* no length */
37 e_number, /* number, [] */
38 e_star /* asterisk */
39} howlen_t;
40
41typedef struct tempsym {
42 const char* patptr; /* current template char */
43 const char* patend; /* one after last char */
44 const char* grpbeg; /* 1st char of ()-group */
45 const char* grpend; /* end of ()-group */
46 I32 code; /* template code (!<>) */
47 I32 length; /* length/repeat count */
48 howlen_t howlen; /* how length is given */
49 int level; /* () nesting level */
50 U32 flags; /* /=4, comma=2, pack=1 */
51 /* and group modifiers */
52 STRLEN strbeg; /* offset of group start */
53 struct tempsym *previous; /* previous group */
54} tempsym_t;
55
56#define TEMPSYM_INIT(symptr, p, e, f) \
57 STMT_START { \
58 (symptr)->patptr = (p); \
59 (symptr)->patend = (e); \
60 (symptr)->grpbeg = NULL; \
61 (symptr)->grpend = NULL; \
62 (symptr)->grpend = NULL; \
63 (symptr)->code = 0; \
64 (symptr)->length = 0; \
10edeb5d 65 (symptr)->howlen = e_no_len; \
f7fe979e
AL
66 (symptr)->level = 0; \
67 (symptr)->flags = (f); \
68 (symptr)->strbeg = 0; \
69 (symptr)->previous = NULL; \
70 } STMT_END
71
7212898e 72#if PERL_VERSION >= 9
f337b084
TH
73# define PERL_PACK_CAN_BYTEORDER
74# define PERL_PACK_CAN_SHRIEKSIGN
75#endif
76
77#ifndef CHAR_BIT
78# define CHAR_BIT 8
7212898e 79#endif
3473cf63
RGS
80/* Maximum number of bytes to which a byte can grow due to upgrade */
81#define UTF8_EXPAND 2
7212898e 82
a6ec74c1 83/*
a6ec74c1
JH
84 * Offset for integer pack/unpack.
85 *
86 * On architectures where I16 and I32 aren't really 16 and 32 bits,
87 * which for now are all Crays, pack and unpack have to play games.
88 */
89
90/*
91 * These values are required for portability of pack() output.
92 * If they're not right on your machine, then pack() and unpack()
93 * wouldn't work right anyway; you'll need to apply the Cray hack.
94 * (I'd like to check them with #if, but you can't use sizeof() in
95 * the preprocessor.) --???
96 */
97/*
98 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
99 defines are now in config.h. --Andy Dougherty April 1998
100 */
101#define SIZE16 2
102#define SIZE32 4
103
104/* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
105 --jhi Feb 1999 */
106
1109a392
MHM
107#if U16SIZE > SIZE16 || U32SIZE > SIZE32
108# if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
08ca2aa3
TH
109# define OFF16(p) ((char*)(p))
110# define OFF32(p) ((char*)(p))
a6ec74c1 111# else
1109a392 112# if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
a6ec74c1
JH
113# define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
114# define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
115# else
08ca2aa3 116 ++++ bad cray byte order
a6ec74c1
JH
117# endif
118# endif
a6ec74c1 119#else
08ca2aa3
TH
120# define OFF16(p) ((char *) (p))
121# define OFF32(p) ((char *) (p))
a6ec74c1
JH
122#endif
123
f337b084
TH
124/* Only to be used inside a loop (see the break) */
125#define SHIFT16(utf8, s, strend, p, datumtype) STMT_START { \
126 if (utf8) { \
127 if (!uni_to_bytes(aTHX_ &(s), strend, OFF16(p), SIZE16, datumtype)) break; \
128 } else { \
129 Copy(s, OFF16(p), SIZE16, char); \
130 (s) += SIZE16; \
131 } \
132} STMT_END
133
134/* Only to be used inside a loop (see the break) */
135#define SHIFT32(utf8, s, strend, p, datumtype) STMT_START { \
136 if (utf8) { \
137 if (!uni_to_bytes(aTHX_ &(s), strend, OFF32(p), SIZE32, datumtype)) break; \
138 } else { \
139 Copy(s, OFF32(p), SIZE32, char); \
140 (s) += SIZE32; \
141 } \
142} STMT_END
143
144#define PUSH16(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF16(p), SIZE16)
145#define PUSH32(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF32(p), SIZE32)
08ca2aa3
TH
146
147/* Only to be used inside a loop (see the break) */
f337b084 148#define SHIFT_VAR(utf8, s, strend, var, datumtype) \
08ca2aa3
TH
149STMT_START { \
150 if (utf8) { \
f337b084
TH
151 if (!uni_to_bytes(aTHX_ &s, strend, \
152 (char *) &var, sizeof(var), datumtype)) break;\
08ca2aa3
TH
153 } else { \
154 Copy(s, (char *) &var, sizeof(var), char); \
155 s += sizeof(var); \
156 } \
08ca2aa3
TH
157} STMT_END
158
f337b084 159#define PUSH_VAR(utf8, aptr, var) \
230e1fce 160 PUSH_BYTES(utf8, aptr, &(var), sizeof(var))
f337b084 161
49704364
LW
162/* Avoid stack overflow due to pathological templates. 100 should be plenty. */
163#define MAX_SUB_TEMPLATE_LEVEL 100
164
66c611c5 165/* flags (note that type modifiers can also be used as flags!) */
f337b084
TH
166#define FLAG_WAS_UTF8 0x40
167#define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
49704364 168#define FLAG_UNPACK_ONLY_ONE 0x10
f337b084 169#define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
49704364
LW
170#define FLAG_SLASH 0x04
171#define FLAG_COMMA 0x02
172#define FLAG_PACK 0x01
173
a6ec74c1
JH
174STATIC SV *
175S_mul128(pTHX_ SV *sv, U8 m)
176{
177 STRLEN len;
178 char *s = SvPV(sv, len);
179 char *t;
a6ec74c1
JH
180
181 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
396482e1 182 SV * const tmpNew = newSVpvs("0000000000");
a6ec74c1
JH
183
184 sv_catsv(tmpNew, sv);
185 SvREFCNT_dec(sv); /* free old sv */
186 sv = tmpNew;
187 s = SvPV(sv, len);
188 }
189 t = s + len - 1;
190 while (!*t) /* trailing '\0'? */
191 t--;
192 while (t > s) {
f7fe979e 193 const U32 i = ((*t - '0') << 7) + m;
eb160463
GS
194 *(t--) = '0' + (char)(i % 10);
195 m = (char)(i / 10);
a6ec74c1
JH
196 }
197 return (sv);
198}
199
200/* Explosives and implosives. */
201
202#if 'I' == 73 && 'J' == 74
203/* On an ASCII/ISO kind of system */
204#define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
205#else
206/*
207 Some other sort of character set - use memchr() so we don't match
208 the null byte.
209 */
210#define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
211#endif
212
66c611c5 213/* type modifiers */
62f95557 214#define TYPE_IS_SHRIEKING 0x100
1109a392
MHM
215#define TYPE_IS_BIG_ENDIAN 0x200
216#define TYPE_IS_LITTLE_ENDIAN 0x400
f337b084 217#define TYPE_IS_PACK 0x800
1109a392 218#define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
66c611c5 219#define TYPE_MODIFIERS(t) ((t) & ~0xFF)
1109a392
MHM
220#define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
221
7212898e 222#ifdef PERL_PACK_CAN_SHRIEKSIGN
28be1210 223# define SHRIEKING_ALLOWED_TYPES "sSiIlLxXnNvV@."
7212898e 224#else
28be1210 225# define SHRIEKING_ALLOWED_TYPES "sSiIlLxX"
7212898e
NC
226#endif
227
228#ifndef PERL_PACK_CAN_BYTEORDER
229/* Put "can't" first because it is shorter */
230# define TYPE_ENDIANNESS(t) 0
231# define TYPE_NO_ENDIANNESS(t) (t)
232
233# define ENDIANNESS_ALLOWED_TYPES ""
234
235# define DO_BO_UNPACK(var, type)
236# define DO_BO_PACK(var, type)
07409e01
NC
237# define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast)
238# define DO_BO_PACK_PTR(var, type, pre_cast, post_cast)
7212898e
NC
239# define DO_BO_UNPACK_N(var, type)
240# define DO_BO_PACK_N(var, type)
241# define DO_BO_UNPACK_P(var)
242# define DO_BO_PACK_P(var)
45f723e1
NC
243# define DO_BO_UNPACK_PC(var)
244# define DO_BO_PACK_PC(var)
66c611c5 245
f337b084 246#else /* PERL_PACK_CAN_BYTEORDER */
7212898e
NC
247
248# define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
249# define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
250
251# define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
252
253# define DO_BO_UNPACK(var, type) \
1109a392 254 STMT_START { \
66c611c5 255 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
256 case TYPE_IS_BIG_ENDIAN: var = my_betoh ## type (var); break; \
257 case TYPE_IS_LITTLE_ENDIAN: var = my_letoh ## type (var); break; \
258 default: break; \
259 } \
260 } STMT_END
261
7212898e 262# define DO_BO_PACK(var, type) \
1109a392 263 STMT_START { \
66c611c5 264 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
265 case TYPE_IS_BIG_ENDIAN: var = my_htobe ## type (var); break; \
266 case TYPE_IS_LITTLE_ENDIAN: var = my_htole ## type (var); break; \
267 default: break; \
268 } \
269 } STMT_END
270
07409e01 271# define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast) \
1109a392 272 STMT_START { \
66c611c5 273 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392 274 case TYPE_IS_BIG_ENDIAN: \
07409e01 275 var = (post_cast*) my_betoh ## type ((pre_cast) var); \
1109a392
MHM
276 break; \
277 case TYPE_IS_LITTLE_ENDIAN: \
07409e01 278 var = (post_cast *) my_letoh ## type ((pre_cast) var); \
1109a392
MHM
279 break; \
280 default: \
281 break; \
282 } \
283 } STMT_END
284
07409e01 285# define DO_BO_PACK_PTR(var, type, pre_cast, post_cast) \
1109a392 286 STMT_START { \
66c611c5 287 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392 288 case TYPE_IS_BIG_ENDIAN: \
07409e01 289 var = (post_cast *) my_htobe ## type ((pre_cast) var); \
1109a392
MHM
290 break; \
291 case TYPE_IS_LITTLE_ENDIAN: \
07409e01 292 var = (post_cast *) my_htole ## type ((pre_cast) var); \
1109a392
MHM
293 break; \
294 default: \
295 break; \
296 } \
297 } STMT_END
298
7212898e 299# define BO_CANT_DOIT(action, type) \
66c611c5
MHM
300 STMT_START { \
301 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
302 case TYPE_IS_BIG_ENDIAN: \
303 Perl_croak(aTHX_ "Can't %s big-endian %ss on this " \
304 "platform", #action, #type); \
305 break; \
306 case TYPE_IS_LITTLE_ENDIAN: \
307 Perl_croak(aTHX_ "Can't %s little-endian %ss on this " \
308 "platform", #action, #type); \
309 break; \
310 default: \
311 break; \
312 } \
313 } STMT_END
314
7212898e 315# if PTRSIZE == INTSIZE
07409e01
NC
316# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, i, int, void)
317# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, i, int, void)
318# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, i, int, char)
319# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, i, int, char)
7212898e 320# elif PTRSIZE == LONGSIZE
07409e01
NC
321# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, long, void)
322# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, long, void)
323# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, long, char)
324# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, long, char)
2b00a750
JD
325# elif PTRSIZE == IVSIZE
326# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, IV, void)
327# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, IV, void)
328# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, IV, char)
329# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, IV, char)
7212898e
NC
330# else
331# define DO_BO_UNPACK_P(var) BO_CANT_DOIT(unpack, pointer)
332# define DO_BO_PACK_P(var) BO_CANT_DOIT(pack, pointer)
45f723e1
NC
333# define DO_BO_UNPACK_PC(var) BO_CANT_DOIT(unpack, pointer)
334# define DO_BO_PACK_PC(var) BO_CANT_DOIT(pack, pointer)
7212898e 335# endif
1109a392 336
7212898e 337# if defined(my_htolen) && defined(my_letohn) && \
1109a392 338 defined(my_htoben) && defined(my_betohn)
7212898e 339# define DO_BO_UNPACK_N(var, type) \
1109a392 340 STMT_START { \
66c611c5 341 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
342 case TYPE_IS_BIG_ENDIAN: my_betohn(&var, sizeof(type)); break;\
343 case TYPE_IS_LITTLE_ENDIAN: my_letohn(&var, sizeof(type)); break;\
344 default: break; \
345 } \
346 } STMT_END
347
7212898e 348# define DO_BO_PACK_N(var, type) \
1109a392 349 STMT_START { \
66c611c5 350 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
351 case TYPE_IS_BIG_ENDIAN: my_htoben(&var, sizeof(type)); break;\
352 case TYPE_IS_LITTLE_ENDIAN: my_htolen(&var, sizeof(type)); break;\
353 default: break; \
354 } \
355 } STMT_END
7212898e
NC
356# else
357# define DO_BO_UNPACK_N(var, type) BO_CANT_DOIT(unpack, type)
358# define DO_BO_PACK_N(var, type) BO_CANT_DOIT(pack, type)
359# endif
360
f337b084 361#endif /* PERL_PACK_CAN_BYTEORDER */
62f95557 362
78d46eaa 363#define PACK_SIZE_CANNOT_CSUM 0x80
f337b084 364#define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
78d46eaa
NC
365#define PACK_SIZE_MASK 0x3F
366
78d46eaa
NC
367/* These tables are regenerated by genpacksizetables.pl (and then hand pasted
368 in). You're unlikely ever to need to regenerate them. */
a7a3cfaa
TH
369
370#if TYPE_IS_SHRIEKING != 0x100
371 ++++shriek offset should be 256
372#endif
373
374typedef U8 packprops_t;
78d46eaa
NC
375#if 'J'-'I' == 1
376/* ASCII */
f43a9a31 377STATIC const packprops_t packprops[512] = {
a7a3cfaa
TH
378 /* normal */
379 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
380 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
381 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
382 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
383 0, 0, 0,
384 /* C */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
80a13697 385#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
a7a3cfaa 386 /* D */ LONG_DOUBLESIZE,
80a13697 387#else
a7a3cfaa 388 0,
80a13697 389#endif
a7a3cfaa
TH
390 0,
391 /* F */ NVSIZE,
392 0, 0,
393 /* I */ sizeof(unsigned int),
394 /* J */ UVSIZE,
395 0,
396 /* L */ SIZE32,
397 0,
398 /* N */ SIZE32,
399 0, 0,
80a13697 400#if defined(HAS_QUAD)
a7a3cfaa 401 /* Q */ sizeof(Uquad_t),
80a13697 402#else
a7a3cfaa 403 0,
80a13697 404#endif
a7a3cfaa
TH
405 0,
406 /* S */ SIZE16,
407 0,
408 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
409 /* V */ SIZE32,
410 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
411 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
412 /* c */ sizeof(char),
413 /* d */ sizeof(double),
414 0,
415 /* f */ sizeof(float),
416 0, 0,
417 /* i */ sizeof(int),
418 /* j */ IVSIZE,
419 0,
420 /* l */ SIZE32,
421 0,
422 /* n */ SIZE16,
423 0,
424 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
80a13697 425#if defined(HAS_QUAD)
a7a3cfaa 426 /* q */ sizeof(Quad_t),
80a13697 427#else
a7a3cfaa 428 0,
80a13697 429#endif
a7a3cfaa
TH
430 0,
431 /* s */ SIZE16,
432 0, 0,
433 /* v */ SIZE16,
434 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
435 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
436 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
437 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
438 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
439 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
440 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
441 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
442 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
443 0, 0, 0, 0, 0, 0, 0, 0,
444 /* shrieking */
445 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
446 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
447 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
448 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
449 0, 0, 0, 0, 0, 0, 0, 0, 0,
450 /* I */ sizeof(unsigned int),
451 0, 0,
452 /* L */ sizeof(unsigned long),
453 0,
7212898e 454#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 455 /* N */ SIZE32,
7212898e 456#else
a7a3cfaa 457 0,
7212898e 458#endif
a7a3cfaa
TH
459 0, 0, 0, 0,
460 /* S */ sizeof(unsigned short),
461 0, 0,
7212898e 462#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 463 /* V */ SIZE32,
7212898e 464#else
a7a3cfaa 465 0,
7212898e 466#endif
a7a3cfaa
TH
467 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
468 0, 0,
469 /* i */ sizeof(int),
470 0, 0,
471 /* l */ sizeof(long),
472 0,
7212898e 473#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 474 /* n */ SIZE16,
7212898e 475#else
a7a3cfaa 476 0,
7212898e 477#endif
a7a3cfaa
TH
478 0, 0, 0, 0,
479 /* s */ sizeof(short),
480 0, 0,
7212898e 481#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 482 /* v */ SIZE16,
7212898e 483#else
a7a3cfaa 484 0,
7212898e 485#endif
a7a3cfaa
TH
486 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
487 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
488 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
489 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
490 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
491 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
492 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
493 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
494 0, 0, 0, 0, 0, 0, 0, 0, 0
78d46eaa
NC
495};
496#else
497/* EBCDIC (or bust) */
f43a9a31 498STATIC const packprops_t packprops[512] = {
a7a3cfaa
TH
499 /* normal */
500 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
501 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
502 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
503 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
504 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
505 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
506 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
507 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
508 0, 0, 0,
509 /* c */ sizeof(char),
510 /* d */ sizeof(double),
511 0,
512 /* f */ sizeof(float),
513 0, 0,
514 /* i */ sizeof(int),
515 0, 0, 0, 0, 0, 0, 0,
516 /* j */ IVSIZE,
517 0,
518 /* l */ SIZE32,
519 0,
520 /* n */ SIZE16,
521 0,
522 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
80a13697 523#if defined(HAS_QUAD)
a7a3cfaa 524 /* q */ sizeof(Quad_t),
80a13697 525#else
a7a3cfaa 526 0,
80a13697 527#endif
a7a3cfaa
TH
528 0, 0, 0, 0, 0, 0, 0, 0, 0,
529 /* s */ SIZE16,
530 0, 0,
531 /* v */ SIZE16,
532 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
533 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
534 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
535 /* C */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
80a13697 536#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
a7a3cfaa 537 /* D */ LONG_DOUBLESIZE,
80a13697 538#else
a7a3cfaa 539 0,
80a13697 540#endif
a7a3cfaa
TH
541 0,
542 /* F */ NVSIZE,
543 0, 0,
544 /* I */ sizeof(unsigned int),
545 0, 0, 0, 0, 0, 0, 0,
546 /* J */ UVSIZE,
547 0,
548 /* L */ SIZE32,
549 0,
550 /* N */ SIZE32,
551 0, 0,
80a13697 552#if defined(HAS_QUAD)
a7a3cfaa 553 /* Q */ sizeof(Uquad_t),
80a13697 554#else
a7a3cfaa 555 0,
80a13697 556#endif
a7a3cfaa
TH
557 0, 0, 0, 0, 0, 0, 0, 0, 0,
558 /* S */ SIZE16,
559 0,
560 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
561 /* V */ SIZE32,
562 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
563 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
564 0, 0, 0, 0, 0, 0, 0, 0, 0,
565 /* shrieking */
566 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
567 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
568 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
569 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
570 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
571 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
572 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
573 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
574 0, 0, 0, 0, 0, 0, 0, 0, 0,
575 /* i */ sizeof(int),
576 0, 0, 0, 0, 0, 0, 0, 0, 0,
577 /* l */ sizeof(long),
578 0,
7212898e 579#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 580 /* n */ SIZE16,
7212898e 581#else
a7a3cfaa 582 0,
7212898e 583#endif
a7a3cfaa
TH
584 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
585 /* s */ sizeof(short),
586 0, 0,
7212898e 587#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 588 /* v */ SIZE16,
7212898e 589#else
a7a3cfaa 590 0,
7212898e 591#endif
a7a3cfaa
TH
592 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
593 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
594 0, 0, 0,
595 /* I */ sizeof(unsigned int),
596 0, 0, 0, 0, 0, 0, 0, 0, 0,
597 /* L */ sizeof(unsigned long),
598 0,
7212898e 599#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 600 /* N */ SIZE32,
7212898e 601#else
a7a3cfaa 602 0,
7212898e 603#endif
a7a3cfaa
TH
604 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
605 /* S */ sizeof(unsigned short),
606 0, 0,
7212898e 607#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 608 /* V */ SIZE32,
7212898e 609#else
a7a3cfaa 610 0,
7212898e 611#endif
a7a3cfaa
TH
612 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
613 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
78d46eaa
NC
614};
615#endif
616
08ca2aa3 617STATIC U8
f7fe979e 618uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
08ca2aa3 619{
08ca2aa3 620 STRLEN retlen;
0bcc34c2 621 UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
f337b084 622 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
08ca2aa3
TH
623 /* We try to process malformed UTF-8 as much as possible (preferrably with
624 warnings), but these two mean we make no progress in the string and
625 might enter an infinite loop */
626 if (retlen == (STRLEN) -1 || retlen == 0)
f337b084
TH
627 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
628 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3 629 if (val >= 0x100) {
f337b084 630 if (ckWARN(WARN_UNPACK))
08ca2aa3
TH
631 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
632 "Character in '%c' format wrapped in unpack",
f337b084 633 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3
TH
634 val &= 0xff;
635 }
636 *s += retlen;
fe2774ed 637 return (U8)val;
08ca2aa3
TH
638}
639
f337b084
TH
640#define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
641 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
08ca2aa3
TH
642 *(U8 *)(s)++)
643
644STATIC bool
f7fe979e 645uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
08ca2aa3
TH
646{
647 UV val;
648 STRLEN retlen;
f7fe979e 649 const char *from = *s;
08ca2aa3 650 int bad = 0;
f7fe979e 651 const U32 flags = ckWARN(WARN_UTF8) ?
08ca2aa3
TH
652 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
653 for (;buf_len > 0; buf_len--) {
654 if (from >= end) return FALSE;
f337b084 655 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
08ca2aa3
TH
656 if (retlen == (STRLEN) -1 || retlen == 0) {
657 from += UTF8SKIP(from);
658 bad |= 1;
659 } else from += retlen;
660 if (val >= 0x100) {
661 bad |= 2;
662 val &= 0xff;
663 }
fe2774ed 664 *(U8 *)buf++ = (U8)val;
08ca2aa3
TH
665 }
666 /* We have enough characters for the buffer. Did we have problems ? */
667 if (bad) {
668 if (bad & 1) {
669 /* Rewalk the string fragment while warning */
f7fe979e 670 const char *ptr;
9e27e96a 671 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
08ca2aa3
TH
672 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
673 if (ptr >= end) break;
f337b084 674 utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
08ca2aa3
TH
675 }
676 if (from > end) from = end;
677 }
678 if ((bad & 2) && ckWARN(WARN_UNPACK))
fc241834 679 Perl_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
f337b084
TH
680 WARN_PACK : WARN_UNPACK),
681 "Character(s) in '%c' format wrapped in %s",
fc241834 682 (int) TYPE_NO_MODIFIERS(datumtype),
f337b084 683 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
08ca2aa3
TH
684 }
685 *s = from;
686 return TRUE;
687}
688
689STATIC bool
f7fe979e 690next_uni_uu(pTHX_ const char **s, const char *end, I32 *out)
08ca2aa3 691{
97aff369 692 dVAR;
08ca2aa3 693 STRLEN retlen;
0bcc34c2 694 const UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
08ca2aa3
TH
695 if (val >= 0x100 || !ISUUCHAR(val) ||
696 retlen == (STRLEN) -1 || retlen == 0) {
697 *out = 0;
698 return FALSE;
699 }
700 *out = PL_uudmap[val] & 077;
f337b084 701 *s += retlen;
08ca2aa3
TH
702 return TRUE;
703}
78d46eaa 704
64844641 705STATIC char *
14333449 706S_bytes_to_uni(const U8 *start, STRLEN len, char *dest) {
0bcc34c2 707 const U8 * const end = start + len;
64844641 708
f337b084 709 while (start < end) {
48fa4626
ST
710 const UV uv = NATIVE_TO_ASCII(*start);
711 if (UNI_IS_INVARIANT(uv))
712 *dest++ = (char)(U8)UTF_TO_NATIVE(uv);
713 else {
714 *dest++ = (char)(U8)UTF8_EIGHT_BIT_HI(uv);
715 *dest++ = (char)(U8)UTF8_EIGHT_BIT_LO(uv);
f337b084
TH
716 }
717 start++;
718 }
64844641 719 return dest;
f337b084
TH
720}
721
230e1fce
NC
722#define PUSH_BYTES(utf8, cur, buf, len) \
723STMT_START { \
64844641
AL
724 if (utf8) \
725 (cur) = bytes_to_uni((U8 *) buf, len, (cur)); \
230e1fce
NC
726 else { \
727 Copy(buf, cur, len, char); \
728 (cur) += (len); \
729 } \
f337b084
TH
730} STMT_END
731
732#define GROWING(utf8, cat, start, cur, in_len) \
733STMT_START { \
734 STRLEN glen = (in_len); \
3473cf63 735 if (utf8) glen *= UTF8_EXPAND; \
f337b084 736 if ((cur) + glen >= (start) + SvLEN(cat)) { \
0bd48802 737 (start) = sv_exp_grow(cat, glen); \
f337b084
TH
738 (cur) = (start) + SvCUR(cat); \
739 } \
740} STMT_END
741
742#define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
743STMT_START { \
f7fe979e 744 const STRLEN glen = (in_len); \
f337b084 745 STRLEN gl = glen; \
3473cf63 746 if (utf8) gl *= UTF8_EXPAND; \
f337b084
TH
747 if ((cur) + gl >= (start) + SvLEN(cat)) { \
748 *cur = '\0'; \
b162af07 749 SvCUR_set((cat), (cur) - (start)); \
0bd48802 750 (start) = sv_exp_grow(cat, gl); \
f337b084
TH
751 (cur) = (start) + SvCUR(cat); \
752 } \
753 PUSH_BYTES(utf8, cur, buf, glen); \
754} STMT_END
755
756#define PUSH_BYTE(utf8, s, byte) \
757STMT_START { \
758 if (utf8) { \
f7fe979e 759 const U8 au8 = (byte); \
64844641 760 (s) = bytes_to_uni(&au8, 1, (s)); \
f337b084
TH
761 } else *(U8 *)(s)++ = (byte); \
762} STMT_END
763
764/* Only to be used inside a loop (see the break) */
765#define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
766STMT_START { \
767 STRLEN retlen; \
768 if (str >= end) break; \
769 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
770 if (retlen == (STRLEN) -1 || retlen == 0) { \
771 *cur = '\0'; \
772 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
773 } \
774 str += retlen; \
775} STMT_END
776
f7fe979e
AL
777static const char *_action( const tempsym_t* symptr )
778{
10edeb5d 779 return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
f7fe979e
AL
780}
781
206947d2 782/* Returns the sizeof() struct described by pat */
028d1f6d 783STATIC I32
f337b084 784S_measure_struct(pTHX_ tempsym_t* symptr)
206947d2 785{
f337b084 786 I32 total = 0;
206947d2 787
49704364 788 while (next_symbol(symptr)) {
f337b084 789 I32 len;
f7fe979e 790 int size;
f337b084
TH
791
792 switch (symptr->howlen) {
fc241834 793 case e_star:
49704364 794 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
f7fe979e 795 _action( symptr ) );
49704364 796 break;
f337b084
TH
797 default:
798 /* e_no_len and e_number */
799 len = symptr->length;
800 break;
49704364
LW
801 }
802
a7a3cfaa 803 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
80a13697 804 if (!size) {
f7fe979e 805 int star;
80a13697
NC
806 /* endianness doesn't influence the size of a type */
807 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
808 default:
809 Perl_croak(aTHX_ "Invalid type '%c' in %s",
810 (int)TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 811 _action( symptr ) );
28be1210
TH
812#ifdef PERL_PACK_CAN_SHRIEKSIGN
813 case '.' | TYPE_IS_SHRIEKING:
814 case '@' | TYPE_IS_SHRIEKING:
815#endif
80a13697 816 case '@':
28be1210 817 case '.':
80a13697
NC
818 case '/':
819 case 'U': /* XXXX Is it correct? */
820 case 'w':
821 case 'u':
822 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
28be1210 823 (int) TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 824 _action( symptr ) );
80a13697
NC
825 case '%':
826 size = 0;
827 break;
828 case '(':
fc241834
RGS
829 {
830 tempsym_t savsym = *symptr;
831 symptr->patptr = savsym.grpbeg;
832 symptr->patend = savsym.grpend;
833 /* XXXX Theoretically, we need to measure many times at
834 different positions, since the subexpression may contain
835 alignment commands, but be not of aligned length.
836 Need to detect this and croak(). */
837 size = measure_struct(symptr);
838 *symptr = savsym;
839 break;
840 }
80a13697
NC
841 case 'X' | TYPE_IS_SHRIEKING:
842 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
843 */
844 if (!len) /* Avoid division by 0 */
845 len = 1;
846 len = total % len; /* Assumed: the start is aligned. */
847 /* FALL THROUGH */
848 case 'X':
849 size = -1;
850 if (total < len)
f7fe979e 851 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
80a13697
NC
852 break;
853 case 'x' | TYPE_IS_SHRIEKING:
854 if (!len) /* Avoid division by 0 */
855 len = 1;
856 star = total % len; /* Assumed: the start is aligned. */
857 if (star) /* Other portable ways? */
858 len = len - star;
859 else
860 len = 0;
861 /* FALL THROUGH */
862 case 'x':
863 case 'A':
864 case 'Z':
865 case 'a':
80a13697
NC
866 size = 1;
867 break;
868 case 'B':
869 case 'b':
870 len = (len + 7)/8;
871 size = 1;
872 break;
873 case 'H':
874 case 'h':
875 len = (len + 1)/2;
876 size = 1;
877 break;
78d46eaa 878
80a13697
NC
879 case 'P':
880 len = 1;
881 size = sizeof(char*);
78d46eaa
NC
882 break;
883 }
206947d2
IZ
884 }
885 total += len * size;
886 }
887 return total;
888}
889
49704364
LW
890
891/* locate matching closing parenthesis or bracket
892 * returns char pointer to char after match, or NULL
893 */
f7fe979e
AL
894STATIC const char *
895S_group_end(pTHX_ register const char *patptr, register const char *patend, char ender)
18529408 896{
49704364 897 while (patptr < patend) {
f7fe979e 898 const char c = *patptr++;
49704364
LW
899
900 if (isSPACE(c))
901 continue;
902 else if (c == ender)
903 return patptr-1;
904 else if (c == '#') {
905 while (patptr < patend && *patptr != '\n')
906 patptr++;
907 continue;
908 } else if (c == '(')
909 patptr = group_end(patptr, patend, ')') + 1;
910 else if (c == '[')
911 patptr = group_end(patptr, patend, ']') + 1;
18529408 912 }
49704364
LW
913 Perl_croak(aTHX_ "No group ending character '%c' found in template",
914 ender);
915 return 0;
18529408
IZ
916}
917
49704364
LW
918
919/* Convert unsigned decimal number to binary.
920 * Expects a pointer to the first digit and address of length variable
921 * Advances char pointer to 1st non-digit char and returns number
fc241834 922 */
f7fe979e
AL
923STATIC const char *
924S_get_num(pTHX_ register const char *patptr, I32 *lenptr )
49704364
LW
925{
926 I32 len = *patptr++ - '0';
927 while (isDIGIT(*patptr)) {
928 if (len >= 0x7FFFFFFF/10)
929 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
930 len = (len * 10) + (*patptr++ - '0');
931 }
932 *lenptr = len;
933 return patptr;
934}
935
936/* The marvellous template parsing routine: Using state stored in *symptr,
937 * locates next template code and count
938 */
939STATIC bool
f337b084 940S_next_symbol(pTHX_ tempsym_t* symptr )
18529408 941{
f7fe979e 942 const char* patptr = symptr->patptr;
0bcc34c2 943 const char* const patend = symptr->patend;
49704364
LW
944
945 symptr->flags &= ~FLAG_SLASH;
946
947 while (patptr < patend) {
948 if (isSPACE(*patptr))
949 patptr++;
950 else if (*patptr == '#') {
951 patptr++;
952 while (patptr < patend && *patptr != '\n')
953 patptr++;
954 if (patptr < patend)
955 patptr++;
956 } else {
fc241834 957 /* We should have found a template code */
49704364 958 I32 code = *patptr++ & 0xFF;
66c611c5 959 U32 inherited_modifiers = 0;
49704364
LW
960
961 if (code == ','){ /* grandfather in commas but with a warning */
962 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
963 symptr->flags |= FLAG_COMMA;
964 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
f7fe979e 965 "Invalid type ',' in %s", _action( symptr ) );
49704364
LW
966 }
967 continue;
968 }
fc241834 969
49704364 970 /* for '(', skip to ')' */
fc241834 971 if (code == '(') {
49704364
LW
972 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
973 Perl_croak(aTHX_ "()-group starts with a count in %s",
f7fe979e 974 _action( symptr ) );
49704364
LW
975 symptr->grpbeg = patptr;
976 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
977 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
978 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
f7fe979e 979 _action( symptr ) );
49704364
LW
980 }
981
66c611c5
MHM
982 /* look for group modifiers to inherit */
983 if (TYPE_ENDIANNESS(symptr->flags)) {
984 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
985 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
986 }
987
1109a392
MHM
988 /* look for modifiers */
989 while (patptr < patend) {
b7787f18
AL
990 const char *allowed;
991 I32 modifier;
1109a392
MHM
992 switch (*patptr) {
993 case '!':
994 modifier = TYPE_IS_SHRIEKING;
7212898e 995 allowed = SHRIEKING_ALLOWED_TYPES;
1109a392 996 break;
7212898e 997#ifdef PERL_PACK_CAN_BYTEORDER
1109a392
MHM
998 case '>':
999 modifier = TYPE_IS_BIG_ENDIAN;
66c611c5 1000 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392
MHM
1001 break;
1002 case '<':
1003 modifier = TYPE_IS_LITTLE_ENDIAN;
66c611c5 1004 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392 1005 break;
f337b084 1006#endif /* PERL_PACK_CAN_BYTEORDER */
1109a392 1007 default:
b7787f18
AL
1008 allowed = "";
1009 modifier = 0;
1109a392
MHM
1010 break;
1011 }
66c611c5 1012
1109a392
MHM
1013 if (modifier == 0)
1014 break;
66c611c5 1015
1109a392
MHM
1016 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
1017 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
f7fe979e 1018 allowed, _action( symptr ) );
66c611c5
MHM
1019
1020 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
1109a392 1021 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
f7fe979e 1022 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
66c611c5
MHM
1023 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
1024 TYPE_ENDIANNESS_MASK)
1025 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
f7fe979e 1026 *patptr, _action( symptr ) );
66c611c5 1027
041457d9 1028 if ((code & modifier) && ckWARN(WARN_UNPACK)) {
1109a392
MHM
1029 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
1030 "Duplicate modifier '%c' after '%c' in %s",
1031 *patptr, (int) TYPE_NO_MODIFIERS(code),
f7fe979e 1032 _action( symptr ) );
1109a392 1033 }
66c611c5 1034
1109a392
MHM
1035 code |= modifier;
1036 patptr++;
49704364
LW
1037 }
1038
66c611c5
MHM
1039 /* inherit modifiers */
1040 code |= inherited_modifiers;
1041
fc241834 1042 /* look for count and/or / */
49704364
LW
1043 if (patptr < patend) {
1044 if (isDIGIT(*patptr)) {
1045 patptr = get_num( patptr, &symptr->length );
1046 symptr->howlen = e_number;
1047
1048 } else if (*patptr == '*') {
1049 patptr++;
1050 symptr->howlen = e_star;
1051
1052 } else if (*patptr == '[') {
f7fe979e 1053 const char* lenptr = ++patptr;
49704364
LW
1054 symptr->howlen = e_number;
1055 patptr = group_end( patptr, patend, ']' ) + 1;
1056 /* what kind of [] is it? */
1057 if (isDIGIT(*lenptr)) {
1058 lenptr = get_num( lenptr, &symptr->length );
1059 if( *lenptr != ']' )
1060 Perl_croak(aTHX_ "Malformed integer in [] in %s",
f7fe979e 1061 _action( symptr ) );
49704364
LW
1062 } else {
1063 tempsym_t savsym = *symptr;
1064 symptr->patend = patptr-1;
1065 symptr->patptr = lenptr;
1066 savsym.length = measure_struct(symptr);
1067 *symptr = savsym;
1068 }
1069 } else {
1070 symptr->howlen = e_no_len;
1071 symptr->length = 1;
1072 }
1073
1074 /* try to find / */
1075 while (patptr < patend) {
1076 if (isSPACE(*patptr))
1077 patptr++;
1078 else if (*patptr == '#') {
1079 patptr++;
1080 while (patptr < patend && *patptr != '\n')
1081 patptr++;
1082 if (patptr < patend)
1083 patptr++;
1084 } else {
66c611c5 1085 if (*patptr == '/') {
49704364
LW
1086 symptr->flags |= FLAG_SLASH;
1087 patptr++;
66c611c5
MHM
1088 if (patptr < patend &&
1089 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
49704364 1090 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
f7fe979e 1091 _action( symptr ) );
49704364
LW
1092 }
1093 break;
1094 }
18529408 1095 }
49704364
LW
1096 } else {
1097 /* at end - no count, no / */
1098 symptr->howlen = e_no_len;
1099 symptr->length = 1;
1100 }
1101
1102 symptr->code = code;
fc241834 1103 symptr->patptr = patptr;
49704364 1104 return TRUE;
18529408 1105 }
49704364 1106 }
fc241834 1107 symptr->patptr = patptr;
49704364 1108 return FALSE;
18529408
IZ
1109}
1110
18529408 1111/*
fc241834 1112 There is no way to cleanly handle the case where we should process the
08ca2aa3 1113 string per byte in its upgraded form while it's really in downgraded form
fc241834
RGS
1114 (e.g. estimates like strend-s as an upper bound for the number of
1115 characters left wouldn't work). So if we foresee the need of this
1116 (pattern starts with U or contains U0), we want to work on the encoded
1117 version of the string. Users are advised to upgrade their pack string
08ca2aa3
TH
1118 themselves if they need to do a lot of unpacks like this on it
1119*/
fc241834 1120STATIC bool
08ca2aa3
TH
1121need_utf8(const char *pat, const char *patend)
1122{
1123 bool first = TRUE;
1124 while (pat < patend) {
1125 if (pat[0] == '#') {
1126 pat++;
f7fe979e 1127 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
1128 if (!pat) return FALSE;
1129 } else if (pat[0] == 'U') {
1130 if (first || pat[1] == '0') return TRUE;
1131 } else first = FALSE;
1132 pat++;
1133 }
1134 return FALSE;
1135}
1136
1137STATIC char
1138first_symbol(const char *pat, const char *patend) {
1139 while (pat < patend) {
1140 if (pat[0] != '#') return pat[0];
1141 pat++;
f7fe979e 1142 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
1143 if (!pat) return 0;
1144 pat++;
1145 }
1146 return 0;
1147}
1148
1149/*
7accc089
JH
1150=for apidoc unpackstring
1151
608d3aed
LW
1152The engine implementing unpack() Perl function. C<unpackstring> puts the
1153extracted list items on the stack and returns the number of elements.
1154Issue C<PUTBACK> before and C<SPAGAIN> after the call to this function.
7accc089
JH
1155
1156=cut */
1157
1158I32
f7fe979e 1159Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
7accc089 1160{
f7fe979e 1161 tempsym_t sym;
08ca2aa3 1162
f337b084 1163 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
08ca2aa3
TH
1164 else if (need_utf8(pat, patend)) {
1165 /* We probably should try to avoid this in case a scalar context call
1166 wouldn't get to the "U0" */
1167 STRLEN len = strend - s;
230e1fce 1168 s = (char *) bytes_to_utf8((U8 *) s, &len);
08ca2aa3
TH
1169 SAVEFREEPV(s);
1170 strend = s + len;
f337b084 1171 flags |= FLAG_DO_UTF8;
08ca2aa3
TH
1172 }
1173
f337b084
TH
1174 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
1175 flags |= FLAG_PARSE_UTF8;
08ca2aa3 1176
f7fe979e 1177 TEMPSYM_INIT(&sym, pat, patend, flags);
7accc089
JH
1178
1179 return unpack_rec(&sym, s, s, strend, NULL );
1180}
1181
4136a0f7 1182STATIC I32
f7fe979e 1183S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
49704364 1184{
27da23d5 1185 dVAR; dSP;
18529408 1186 SV *sv;
f7fe979e 1187 const I32 start_sp_offset = SP - PL_stack_base;
49704364 1188 howlen_t howlen;
a6ec74c1 1189
a6ec74c1 1190 I32 checksum = 0;
92d41999 1191 UV cuv = 0;
a6ec74c1 1192 NV cdouble = 0.0;
f337b084 1193 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
49704364 1194 bool beyond = FALSE;
21c16052 1195 bool explicit_length;
9e27e96a 1196 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
f337b084 1197 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
28be1210 1198 symptr->strbeg = s - strbeg;
49704364 1199
49704364 1200 while (next_symbol(symptr)) {
a7a3cfaa 1201 packprops_t props;
9e27e96a 1202 I32 len;
f337b084 1203 I32 datumtype = symptr->code;
206947d2 1204 /* do first one only unless in list context
08ca2aa3 1205 / is implemented by unpacking the count, then popping it from the
206947d2 1206 stack, so must check that we're not in the middle of a / */
49704364 1207 if ( unpack_only_one
206947d2 1208 && (SP - PL_stack_base == start_sp_offset + 1)
49704364 1209 && (datumtype != '/') ) /* XXX can this be omitted */
206947d2 1210 break;
49704364 1211
f337b084 1212 switch (howlen = symptr->howlen) {
fc241834
RGS
1213 case e_star:
1214 len = strend - strbeg; /* long enough */
49704364 1215 break;
f337b084
TH
1216 default:
1217 /* e_no_len and e_number */
1218 len = symptr->length;
1219 break;
49704364 1220 }
18529408 1221
21c16052 1222 explicit_length = TRUE;
a6ec74c1 1223 redo_switch:
49704364 1224 beyond = s >= strend;
a7a3cfaa
TH
1225
1226 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
1227 if (props) {
1228 /* props nonzero means we can process this letter. */
9e27e96a
AL
1229 const long size = props & PACK_SIZE_MASK;
1230 const long howmany = (strend - s) / size;
a7a3cfaa
TH
1231 if (len > howmany)
1232 len = howmany;
1233
1234 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
1235 if (len && unpack_only_one) len = 1;
1236 EXTEND(SP, len);
1237 EXTEND_MORTAL(len);
78d46eaa
NC
1238 }
1239 }
a7a3cfaa 1240
1109a392 1241 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 1242 default:
1109a392 1243 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
49704364 1244
a6ec74c1 1245 case '%':
49704364 1246 if (howlen == e_no_len)
18529408 1247 len = 16; /* len is not specified */
a6ec74c1 1248 checksum = len;
92d41999 1249 cuv = 0;
a6ec74c1 1250 cdouble = 0;
18529408 1251 continue;
a6ec74c1 1252 break;
18529408
IZ
1253 case '(':
1254 {
49704364 1255 tempsym_t savsym = *symptr;
9e27e96a 1256 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
66c611c5 1257 symptr->flags |= group_modifiers;
49704364 1258 symptr->patend = savsym.grpend;
28be1210 1259 symptr->previous = &savsym;
49704364 1260 symptr->level++;
18529408
IZ
1261 PUTBACK;
1262 while (len--) {
49704364 1263 symptr->patptr = savsym.grpbeg;
f337b084
TH
1264 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
1265 else symptr->flags &= ~FLAG_PARSE_UTF8;
08ca2aa3
TH
1266 unpack_rec(symptr, s, strbeg, strend, &s);
1267 if (s == strend && savsym.howlen == e_star)
49704364 1268 break; /* No way to continue */
18529408
IZ
1269 }
1270 SPAGAIN;
28be1210 1271 savsym.flags = symptr->flags & ~group_modifiers;
49704364 1272 *symptr = savsym;
18529408
IZ
1273 break;
1274 }
28be1210
TH
1275#ifdef PERL_PACK_CAN_SHRIEKSIGN
1276 case '.' | TYPE_IS_SHRIEKING:
1277#endif
1278 case '.': {
9e27e96a 1279 const char *from;
28be1210
TH
1280 SV *sv;
1281#ifdef PERL_PACK_CAN_SHRIEKSIGN
9e27e96a 1282 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
28be1210 1283#else /* PERL_PACK_CAN_SHRIEKSIGN */
9e27e96a 1284 const bool u8 = utf8;
28be1210
TH
1285#endif
1286 if (howlen == e_star) from = strbeg;
1287 else if (len <= 0) from = s;
1288 else {
1289 tempsym_t *group = symptr;
1290
1291 while (--len && group) group = group->previous;
1292 from = group ? strbeg + group->strbeg : strbeg;
1293 }
1294 sv = from <= s ?
00646304
CB
1295 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
1296 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
28be1210
TH
1297 XPUSHs(sv_2mortal(sv));
1298 break;
1299 }
1300#ifdef PERL_PACK_CAN_SHRIEKSIGN
1301 case '@' | TYPE_IS_SHRIEKING:
1302#endif
a6ec74c1 1303 case '@':
28be1210
TH
1304 s = strbeg + symptr->strbeg;
1305#ifdef PERL_PACK_CAN_SHRIEKSIGN
1306 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
1307#else /* PERL_PACK_CAN_SHRIEKSIGN */
1308 if (utf8)
1309#endif
1310 {
08ca2aa3
TH
1311 while (len > 0) {
1312 if (s >= strend)
1313 Perl_croak(aTHX_ "'@' outside of string in unpack");
1314 s += UTF8SKIP(s);
1315 len--;
1316 }
1317 if (s > strend)
1318 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
1319 } else {
28be1210 1320 if (strend-s < len)
fc241834 1321 Perl_croak(aTHX_ "'@' outside of string in unpack");
28be1210 1322 s += len;
08ca2aa3 1323 }
a6ec74c1 1324 break;
62f95557
IZ
1325 case 'X' | TYPE_IS_SHRIEKING:
1326 if (!len) /* Avoid division by 0 */
1327 len = 1;
08ca2aa3 1328 if (utf8) {
f7fe979e 1329 const char *hop, *last;
f337b084
TH
1330 I32 l = len;
1331 hop = last = strbeg;
1332 while (hop < s) {
1333 hop += UTF8SKIP(hop);
1334 if (--l == 0) {
08ca2aa3 1335 last = hop;
f337b084
TH
1336 l = len;
1337 }
fc241834 1338 }
f337b084
TH
1339 if (last > s)
1340 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
08ca2aa3
TH
1341 s = last;
1342 break;
f337b084
TH
1343 }
1344 len = (s - strbeg) % len;
62f95557 1345 /* FALL THROUGH */
a6ec74c1 1346 case 'X':
08ca2aa3
TH
1347 if (utf8) {
1348 while (len > 0) {
1349 if (s <= strbeg)
1350 Perl_croak(aTHX_ "'X' outside of string in unpack");
f337b084 1351 while (--s, UTF8_IS_CONTINUATION(*s)) {
08ca2aa3
TH
1352 if (s <= strbeg)
1353 Perl_croak(aTHX_ "'X' outside of string in unpack");
1354 }
1355 len--;
1356 }
1357 } else {
fc241834
RGS
1358 if (len > s - strbeg)
1359 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1360 s -= len;
08ca2aa3 1361 }
a6ec74c1 1362 break;
9e27e96a
AL
1363 case 'x' | TYPE_IS_SHRIEKING: {
1364 I32 ai32;
62f95557
IZ
1365 if (!len) /* Avoid division by 0 */
1366 len = 1;
230e1fce
NC
1367 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1368 else ai32 = (s - strbeg) % len;
08ca2aa3
TH
1369 if (ai32 == 0) break;
1370 len -= ai32;
9e27e96a 1371 }
62f95557 1372 /* FALL THROUGH */
a6ec74c1 1373 case 'x':
08ca2aa3
TH
1374 if (utf8) {
1375 while (len>0) {
1376 if (s >= strend)
1377 Perl_croak(aTHX_ "'x' outside of string in unpack");
1378 s += UTF8SKIP(s);
1379 len--;
1380 }
1381 } else {
fc241834
RGS
1382 if (len > strend - s)
1383 Perl_croak(aTHX_ "'x' outside of string in unpack");
1384 s += len;
f337b084 1385 }
a6ec74c1
JH
1386 break;
1387 case '/':
49704364
LW
1388 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1389 break;
a6ec74c1
JH
1390 case 'A':
1391 case 'Z':
1392 case 'a':
08ca2aa3
TH
1393 if (checksum) {
1394 /* Preliminary length estimate is assumed done in 'W' */
1395 if (len > strend - s) len = strend - s;
1396 goto W_checksum;
1397 }
1398 if (utf8) {
1399 I32 l;
f7fe979e 1400 const char *hop;
08ca2aa3
TH
1401 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1402 if (hop >= strend) {
1403 if (hop > strend)
1404 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1405 break;
fc241834 1406 }
a6ec74c1 1407 }
08ca2aa3
TH
1408 if (hop > strend)
1409 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1410 len = hop - s;
1411 } else if (len > strend - s)
1412 len = strend - s;
1413
1414 if (datumtype == 'Z') {
1415 /* 'Z' strips stuff after first null */
f7fe979e 1416 const char *ptr, *end;
f337b084
TH
1417 end = s + len;
1418 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
08ca2aa3
TH
1419 sv = newSVpvn(s, ptr-s);
1420 if (howlen == e_star) /* exact for 'Z*' */
1421 len = ptr-s + (ptr != strend ? 1 : 0);
1422 } else if (datumtype == 'A') {
1423 /* 'A' strips both nulls and spaces */
f7fe979e 1424 const char *ptr;
18bdf90a
TH
1425 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1426 for (ptr = s+len-1; ptr >= s; ptr--)
1427 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
230e1fce 1428 !is_utf8_space((U8 *) ptr)) break;
18bdf90a
TH
1429 if (ptr >= s) ptr += UTF8SKIP(ptr);
1430 else ptr++;
28be1210 1431 if (ptr > s+len)
18bdf90a
TH
1432 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1433 } else {
1434 for (ptr = s+len-1; ptr >= s; ptr--)
1435 if (*ptr != 0 && !isSPACE(*ptr)) break;
1436 ptr++;
1437 }
08ca2aa3
TH
1438 sv = newSVpvn(s, ptr-s);
1439 } else sv = newSVpvn(s, len);
1440
1441 if (utf8) {
1442 SvUTF8_on(sv);
1443 /* Undo any upgrade done due to need_utf8() */
f337b084 1444 if (!(symptr->flags & FLAG_WAS_UTF8))
08ca2aa3 1445 sv_utf8_downgrade(sv, 0);
a6ec74c1
JH
1446 }
1447 XPUSHs(sv_2mortal(sv));
08ca2aa3 1448 s += len;
a6ec74c1
JH
1449 break;
1450 case 'B':
08ca2aa3
TH
1451 case 'b': {
1452 char *str;
49704364 1453 if (howlen == e_star || len > (strend - s) * 8)
a6ec74c1
JH
1454 len = (strend - s) * 8;
1455 if (checksum) {
1456 if (!PL_bitcount) {
08ca2aa3 1457 int bits;
a02a5408 1458 Newxz(PL_bitcount, 256, char);
a6ec74c1
JH
1459 for (bits = 1; bits < 256; bits++) {
1460 if (bits & 1) PL_bitcount[bits]++;
1461 if (bits & 2) PL_bitcount[bits]++;
1462 if (bits & 4) PL_bitcount[bits]++;
1463 if (bits & 8) PL_bitcount[bits]++;
1464 if (bits & 16) PL_bitcount[bits]++;
1465 if (bits & 32) PL_bitcount[bits]++;
1466 if (bits & 64) PL_bitcount[bits]++;
1467 if (bits & 128) PL_bitcount[bits]++;
1468 }
1469 }
f337b084 1470 if (utf8)
08ca2aa3 1471 while (len >= 8 && s < strend) {
f337b084 1472 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
08ca2aa3
TH
1473 len -= 8;
1474 }
f337b084 1475 else
fc241834 1476 while (len >= 8) {
08ca2aa3 1477 cuv += PL_bitcount[*(U8 *)s++];
fc241834
RGS
1478 len -= 8;
1479 }
08ca2aa3
TH
1480 if (len && s < strend) {
1481 U8 bits;
f337b084
TH
1482 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1483 if (datumtype == 'b')
a6ec74c1 1484 while (len-- > 0) {
92d41999 1485 if (bits & 1) cuv++;
a6ec74c1
JH
1486 bits >>= 1;
1487 }
f337b084 1488 else
a6ec74c1 1489 while (len-- > 0) {
08ca2aa3 1490 if (bits & 0x80) cuv++;
a6ec74c1
JH
1491 bits <<= 1;
1492 }
fc241834 1493 }
a6ec74c1
JH
1494 break;
1495 }
08ca2aa3 1496
561b68a9 1497 sv = sv_2mortal(newSV(len ? len : 1));
a6ec74c1
JH
1498 SvPOK_on(sv);
1499 str = SvPVX(sv);
1500 if (datumtype == 'b') {
f337b084 1501 U8 bits = 0;
f7fe979e 1502 const I32 ai32 = len;
08ca2aa3
TH
1503 for (len = 0; len < ai32; len++) {
1504 if (len & 7) bits >>= 1;
1505 else if (utf8) {
1506 if (s >= strend) break;
f337b084 1507 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1508 } else bits = *(U8 *) s++;
1509 *str++ = bits & 1 ? '1' : '0';
a6ec74c1 1510 }
08ca2aa3 1511 } else {
f337b084 1512 U8 bits = 0;
f7fe979e 1513 const I32 ai32 = len;
08ca2aa3
TH
1514 for (len = 0; len < ai32; len++) {
1515 if (len & 7) bits <<= 1;
1516 else if (utf8) {
1517 if (s >= strend) break;
f337b084 1518 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1519 } else bits = *(U8 *) s++;
1520 *str++ = bits & 0x80 ? '1' : '0';
a6ec74c1
JH
1521 }
1522 }
1523 *str = '\0';
aa07b2f6 1524 SvCUR_set(sv, str - SvPVX_const(sv));
08ca2aa3 1525 XPUSHs(sv);
a6ec74c1 1526 break;
08ca2aa3 1527 }
a6ec74c1 1528 case 'H':
08ca2aa3
TH
1529 case 'h': {
1530 char *str;
fc241834 1531 /* Preliminary length estimate, acceptable for utf8 too */
49704364 1532 if (howlen == e_star || len > (strend - s) * 2)
a6ec74c1 1533 len = (strend - s) * 2;
561b68a9 1534 sv = sv_2mortal(newSV(len ? len : 1));
a6ec74c1
JH
1535 SvPOK_on(sv);
1536 str = SvPVX(sv);
1537 if (datumtype == 'h') {
f337b084 1538 U8 bits = 0;
9e27e96a 1539 I32 ai32 = len;
fc241834
RGS
1540 for (len = 0; len < ai32; len++) {
1541 if (len & 1) bits >>= 4;
1542 else if (utf8) {
1543 if (s >= strend) break;
f337b084 1544 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
fc241834 1545 } else bits = * (U8 *) s++;
a6ec74c1
JH
1546 *str++ = PL_hexdigit[bits & 15];
1547 }
08ca2aa3 1548 } else {
f337b084 1549 U8 bits = 0;
f7fe979e 1550 const I32 ai32 = len;
08ca2aa3
TH
1551 for (len = 0; len < ai32; len++) {
1552 if (len & 1) bits <<= 4;
1553 else if (utf8) {
1554 if (s >= strend) break;
f337b084 1555 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3 1556 } else bits = *(U8 *) s++;
a6ec74c1
JH
1557 *str++ = PL_hexdigit[(bits >> 4) & 15];
1558 }
1559 }
1560 *str = '\0';
aa07b2f6 1561 SvCUR_set(sv, str - SvPVX_const(sv));
08ca2aa3 1562 XPUSHs(sv);
a6ec74c1 1563 break;
08ca2aa3 1564 }
a6ec74c1 1565 case 'c':
73cb7263 1566 while (len-- > 0) {
f337b084 1567 int aint = SHIFT_BYTE(utf8, s, strend, datumtype);
73cb7263
NC
1568 if (aint >= 128) /* fake up signed chars */
1569 aint -= 256;
08ca2aa3 1570 if (!checksum)
ac7f3b1b 1571 PUSHs(sv_2mortal(newSViv((IV)aint)));
73cb7263
NC
1572 else if (checksum > bits_in_uv)
1573 cdouble += (NV)aint;
1574 else
1575 cuv += aint;
a6ec74c1
JH
1576 }
1577 break;
1578 case 'C':
08ca2aa3
TH
1579 case 'W':
1580 W_checksum:
35bcd338 1581 if (len == 0) {
fc241834 1582 if (explicit_length && datumtype == 'C')
08ca2aa3 1583 /* Switch to "character" mode */
f337b084 1584 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
35bcd338
JH
1585 break;
1586 }
fc241834 1587 if (datumtype == 'C' ?
f337b084
TH
1588 (symptr->flags & FLAG_DO_UTF8) &&
1589 !(symptr->flags & FLAG_WAS_UTF8) : utf8) {
08ca2aa3 1590 while (len-- > 0 && s < strend) {
08ca2aa3 1591 STRLEN retlen;
f7fe979e 1592 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
f337b084 1593 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
08ca2aa3
TH
1594 if (retlen == (STRLEN) -1 || retlen == 0)
1595 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1596 s += retlen;
1597 if (!checksum)
1598 PUSHs(sv_2mortal(newSVuv((UV) val)));
1599 else if (checksum > bits_in_uv)
1600 cdouble += (NV) val;
d6d3e8bd 1601 else
08ca2aa3 1602 cuv += val;
fc241834 1603 }
08ca2aa3 1604 } else if (!checksum)
a6ec74c1 1605 while (len-- > 0) {
f7fe979e 1606 const U8 ch = *(U8 *) s++;
08ca2aa3 1607 PUSHs(sv_2mortal(newSVuv((UV) ch)));
a6ec74c1 1608 }
08ca2aa3
TH
1609 else if (checksum > bits_in_uv)
1610 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1611 else
1612 while (len-- > 0) cuv += *(U8 *) s++;
a6ec74c1
JH
1613 break;
1614 case 'U':
35bcd338 1615 if (len == 0) {
08ca2aa3
TH
1616 if (explicit_length) {
1617 /* Switch to "bytes in UTF-8" mode */
f337b084 1618 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
08ca2aa3
TH
1619 else
1620 /* Should be impossible due to the need_utf8() test */
1621 Perl_croak(aTHX_ "U0 mode on a byte string");
1622 }
35bcd338
JH
1623 break;
1624 }
08ca2aa3 1625 if (len > strend - s) len = strend - s;
fc241834 1626 if (!checksum) {
08ca2aa3
TH
1627 if (len && unpack_only_one) len = 1;
1628 EXTEND(SP, len);
1629 EXTEND_MORTAL(len);
fc241834 1630 }
08ca2aa3
TH
1631 while (len-- > 0 && s < strend) {
1632 STRLEN retlen;
1633 UV auv;
1634 if (utf8) {
1635 U8 result[UTF8_MAXLEN];
f7fe979e 1636 const char *ptr = s;
08ca2aa3 1637 STRLEN len;
08ca2aa3
TH
1638 /* Bug: warns about bad utf8 even if we are short on bytes
1639 and will break out of the loop */
230e1fce
NC
1640 if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1641 'U'))
08ca2aa3
TH
1642 break;
1643 len = UTF8SKIP(result);
fc241834 1644 if (!uni_to_bytes(aTHX_ &ptr, strend,
230e1fce 1645 (char *) &result[1], len-1, 'U')) break;
08ca2aa3
TH
1646 auv = utf8n_to_uvuni(result, len, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1647 s = ptr;
1648 } else {
1649 auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1650 if (retlen == (STRLEN) -1 || retlen == 0)
1651 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1652 s += retlen;
1653 }
1654 if (!checksum)
1655 PUSHs(sv_2mortal(newSVuv((UV) auv)));
73cb7263 1656 else if (checksum > bits_in_uv)
08ca2aa3 1657 cdouble += (NV) auv;
73cb7263 1658 else
08ca2aa3 1659 cuv += auv;
a6ec74c1
JH
1660 }
1661 break;
49704364
LW
1662 case 's' | TYPE_IS_SHRIEKING:
1663#if SHORTSIZE != SIZE16
73cb7263 1664 while (len-- > 0) {
08ca2aa3 1665 short ashort;
f337b084
TH
1666 SHIFT_VAR(utf8, s, strend, ashort, datumtype);
1667 DO_BO_UNPACK(ashort, s);
08ca2aa3 1668 if (!checksum)
ac7f3b1b 1669 PUSHs(sv_2mortal(newSViv((IV)ashort)));
73cb7263
NC
1670 else if (checksum > bits_in_uv)
1671 cdouble += (NV)ashort;
1672 else
1673 cuv += ashort;
49704364
LW
1674 }
1675 break;
1676#else
1677 /* Fallthrough! */
a6ec74c1 1678#endif
49704364 1679 case 's':
73cb7263 1680 while (len-- > 0) {
08ca2aa3
TH
1681 I16 ai16;
1682
1683#if U16SIZE > SIZE16
1684 ai16 = 0;
1685#endif
f337b084 1686 SHIFT16(utf8, s, strend, &ai16, datumtype);
73cb7263 1687 DO_BO_UNPACK(ai16, 16);
1109a392 1688#if U16SIZE > SIZE16
73cb7263
NC
1689 if (ai16 > 32767)
1690 ai16 -= 65536;
a6ec74c1 1691#endif
08ca2aa3 1692 if (!checksum)
ac7f3b1b 1693 PUSHs(sv_2mortal(newSViv((IV)ai16)));
73cb7263
NC
1694 else if (checksum > bits_in_uv)
1695 cdouble += (NV)ai16;
1696 else
1697 cuv += ai16;
a6ec74c1
JH
1698 }
1699 break;
49704364
LW
1700 case 'S' | TYPE_IS_SHRIEKING:
1701#if SHORTSIZE != SIZE16
73cb7263 1702 while (len-- > 0) {
08ca2aa3 1703 unsigned short aushort;
f337b084
TH
1704 SHIFT_VAR(utf8, s, strend, aushort, datumtype);
1705 DO_BO_UNPACK(aushort, s);
08ca2aa3
TH
1706 if (!checksum)
1707 PUSHs(sv_2mortal(newSVuv((UV) aushort)));
73cb7263
NC
1708 else if (checksum > bits_in_uv)
1709 cdouble += (NV)aushort;
1710 else
1711 cuv += aushort;
49704364
LW
1712 }
1713 break;
1714#else
1715 /* Fallhrough! */
1716#endif
a6ec74c1
JH
1717 case 'v':
1718 case 'n':
1719 case 'S':
73cb7263 1720 while (len-- > 0) {
08ca2aa3
TH
1721 U16 au16;
1722#if U16SIZE > SIZE16
1723 au16 = 0;
1724#endif
f337b084 1725 SHIFT16(utf8, s, strend, &au16, datumtype);
08ca2aa3 1726 DO_BO_UNPACK(au16, 16);
a6ec74c1 1727#ifdef HAS_NTOHS
73cb7263
NC
1728 if (datumtype == 'n')
1729 au16 = PerlSock_ntohs(au16);
a6ec74c1
JH
1730#endif
1731#ifdef HAS_VTOHS
73cb7263
NC
1732 if (datumtype == 'v')
1733 au16 = vtohs(au16);
a6ec74c1 1734#endif
08ca2aa3
TH
1735 if (!checksum)
1736 PUSHs(sv_2mortal(newSVuv((UV)au16)));
73cb7263 1737 else if (checksum > bits_in_uv)
f337b084 1738 cdouble += (NV) au16;
73cb7263
NC
1739 else
1740 cuv += au16;
a6ec74c1
JH
1741 }
1742 break;
7212898e 1743#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7
MHM
1744 case 'v' | TYPE_IS_SHRIEKING:
1745 case 'n' | TYPE_IS_SHRIEKING:
73cb7263 1746 while (len-- > 0) {
08ca2aa3
TH
1747 I16 ai16;
1748# if U16SIZE > SIZE16
1749 ai16 = 0;
1750# endif
f337b084 1751 SHIFT16(utf8, s, strend, &ai16, datumtype);
08ca2aa3 1752# ifdef HAS_NTOHS
73cb7263 1753 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
08ca2aa3
TH
1754 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1755# endif /* HAS_NTOHS */
1756# ifdef HAS_VTOHS
73cb7263 1757 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
08ca2aa3
TH
1758 ai16 = (I16) vtohs((U16) ai16);
1759# endif /* HAS_VTOHS */
1760 if (!checksum)
ac7f3b1b 1761 PUSHs(sv_2mortal(newSViv((IV)ai16)));
73cb7263 1762 else if (checksum > bits_in_uv)
08ca2aa3 1763 cdouble += (NV) ai16;
73cb7263
NC
1764 else
1765 cuv += ai16;
068bd2e7
MHM
1766 }
1767 break;
08ca2aa3 1768#endif /* PERL_PACK_CAN_SHRIEKSIGN */
a6ec74c1 1769 case 'i':
49704364 1770 case 'i' | TYPE_IS_SHRIEKING:
73cb7263 1771 while (len-- > 0) {
08ca2aa3 1772 int aint;
f337b084
TH
1773 SHIFT_VAR(utf8, s, strend, aint, datumtype);
1774 DO_BO_UNPACK(aint, i);
08ca2aa3 1775 if (!checksum)
ac7f3b1b 1776 PUSHs(sv_2mortal(newSViv((IV)aint)));
73cb7263
NC
1777 else if (checksum > bits_in_uv)
1778 cdouble += (NV)aint;
1779 else
1780 cuv += aint;
a6ec74c1
JH
1781 }
1782 break;
1783 case 'I':
49704364 1784 case 'I' | TYPE_IS_SHRIEKING:
73cb7263 1785 while (len-- > 0) {
08ca2aa3 1786 unsigned int auint;
f337b084
TH
1787 SHIFT_VAR(utf8, s, strend, auint, datumtype);
1788 DO_BO_UNPACK(auint, i);
08ca2aa3 1789 if (!checksum)
ac7f3b1b 1790 PUSHs(sv_2mortal(newSVuv((UV)auint)));
73cb7263
NC
1791 else if (checksum > bits_in_uv)
1792 cdouble += (NV)auint;
1793 else
1794 cuv += auint;
a6ec74c1
JH
1795 }
1796 break;
92d41999 1797 case 'j':
73cb7263 1798 while (len-- > 0) {
08ca2aa3 1799 IV aiv;
f337b084 1800 SHIFT_VAR(utf8, s, strend, aiv, datumtype);
1109a392 1801#if IVSIZE == INTSIZE
f337b084 1802 DO_BO_UNPACK(aiv, i);
1109a392 1803#elif IVSIZE == LONGSIZE
f337b084 1804 DO_BO_UNPACK(aiv, l);
1109a392 1805#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
f337b084 1806 DO_BO_UNPACK(aiv, 64);
08ca2aa3
TH
1807#else
1808 Perl_croak(aTHX_ "'j' not supported on this platform");
1109a392 1809#endif
08ca2aa3 1810 if (!checksum)
ac7f3b1b 1811 PUSHs(sv_2mortal(newSViv(aiv)));
73cb7263
NC
1812 else if (checksum > bits_in_uv)
1813 cdouble += (NV)aiv;
1814 else
1815 cuv += aiv;
92d41999
JH
1816 }
1817 break;
1818 case 'J':
73cb7263 1819 while (len-- > 0) {
08ca2aa3 1820 UV auv;
f337b084 1821 SHIFT_VAR(utf8, s, strend, auv, datumtype);
08ca2aa3 1822#if IVSIZE == INTSIZE
f337b084 1823 DO_BO_UNPACK(auv, i);
08ca2aa3 1824#elif IVSIZE == LONGSIZE
f337b084 1825 DO_BO_UNPACK(auv, l);
08ca2aa3 1826#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
f337b084 1827 DO_BO_UNPACK(auv, 64);
08ca2aa3
TH
1828#else
1829 Perl_croak(aTHX_ "'J' not supported on this platform");
1109a392 1830#endif
08ca2aa3 1831 if (!checksum)
ac7f3b1b 1832 PUSHs(sv_2mortal(newSVuv(auv)));
73cb7263
NC
1833 else if (checksum > bits_in_uv)
1834 cdouble += (NV)auv;
1835 else
1836 cuv += auv;
92d41999
JH
1837 }
1838 break;
49704364
LW
1839 case 'l' | TYPE_IS_SHRIEKING:
1840#if LONGSIZE != SIZE32
73cb7263 1841 while (len-- > 0) {
08ca2aa3 1842 long along;
f337b084
TH
1843 SHIFT_VAR(utf8, s, strend, along, datumtype);
1844 DO_BO_UNPACK(along, l);
08ca2aa3 1845 if (!checksum)
ac7f3b1b 1846 PUSHs(sv_2mortal(newSViv((IV)along)));
73cb7263
NC
1847 else if (checksum > bits_in_uv)
1848 cdouble += (NV)along;
1849 else
1850 cuv += along;
49704364
LW
1851 }
1852 break;
1853#else
1854 /* Fallthrough! */
a6ec74c1 1855#endif
49704364 1856 case 'l':
73cb7263 1857 while (len-- > 0) {
08ca2aa3
TH
1858 I32 ai32;
1859#if U32SIZE > SIZE32
1860 ai32 = 0;
1861#endif
f337b084 1862 SHIFT32(utf8, s, strend, &ai32, datumtype);
73cb7263 1863 DO_BO_UNPACK(ai32, 32);
25a9bd2a 1864#if U32SIZE > SIZE32
08ca2aa3 1865 if (ai32 > 2147483647) ai32 -= 4294967296;
a6ec74c1 1866#endif
08ca2aa3 1867 if (!checksum)
ac7f3b1b 1868 PUSHs(sv_2mortal(newSViv((IV)ai32)));
73cb7263
NC
1869 else if (checksum > bits_in_uv)
1870 cdouble += (NV)ai32;
1871 else
1872 cuv += ai32;
a6ec74c1
JH
1873 }
1874 break;
49704364
LW
1875 case 'L' | TYPE_IS_SHRIEKING:
1876#if LONGSIZE != SIZE32
73cb7263 1877 while (len-- > 0) {
08ca2aa3 1878 unsigned long aulong;
f337b084
TH
1879 SHIFT_VAR(utf8, s, strend, aulong, datumtype);
1880 DO_BO_UNPACK(aulong, l);
08ca2aa3 1881 if (!checksum)
ac7f3b1b 1882 PUSHs(sv_2mortal(newSVuv((UV)aulong)));
73cb7263
NC
1883 else if (checksum > bits_in_uv)
1884 cdouble += (NV)aulong;
1885 else
1886 cuv += aulong;
49704364
LW
1887 }
1888 break;
1889#else
1890 /* Fall through! */
1891#endif
a6ec74c1
JH
1892 case 'V':
1893 case 'N':
1894 case 'L':
73cb7263 1895 while (len-- > 0) {
08ca2aa3
TH
1896 U32 au32;
1897#if U32SIZE > SIZE32
1898 au32 = 0;
1899#endif
f337b084 1900 SHIFT32(utf8, s, strend, &au32, datumtype);
08ca2aa3 1901 DO_BO_UNPACK(au32, 32);
a6ec74c1 1902#ifdef HAS_NTOHL
73cb7263
NC
1903 if (datumtype == 'N')
1904 au32 = PerlSock_ntohl(au32);
a6ec74c1
JH
1905#endif
1906#ifdef HAS_VTOHL
73cb7263
NC
1907 if (datumtype == 'V')
1908 au32 = vtohl(au32);
a6ec74c1 1909#endif
08ca2aa3 1910 if (!checksum)
fc241834
RGS
1911 PUSHs(sv_2mortal(newSVuv((UV)au32)));
1912 else if (checksum > bits_in_uv)
1913 cdouble += (NV)au32;
1914 else
1915 cuv += au32;
a6ec74c1
JH
1916 }
1917 break;
7212898e 1918#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7
MHM
1919 case 'V' | TYPE_IS_SHRIEKING:
1920 case 'N' | TYPE_IS_SHRIEKING:
73cb7263 1921 while (len-- > 0) {
08ca2aa3
TH
1922 I32 ai32;
1923# if U32SIZE > SIZE32
1924 ai32 = 0;
1925# endif
f337b084 1926 SHIFT32(utf8, s, strend, &ai32, datumtype);
08ca2aa3 1927# ifdef HAS_NTOHL
73cb7263
NC
1928 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1929 ai32 = (I32)PerlSock_ntohl((U32)ai32);
08ca2aa3
TH
1930# endif
1931# ifdef HAS_VTOHL
73cb7263
NC
1932 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1933 ai32 = (I32)vtohl((U32)ai32);
08ca2aa3
TH
1934# endif
1935 if (!checksum)
ac7f3b1b 1936 PUSHs(sv_2mortal(newSViv((IV)ai32)));
73cb7263
NC
1937 else if (checksum > bits_in_uv)
1938 cdouble += (NV)ai32;
1939 else
1940 cuv += ai32;
068bd2e7
MHM
1941 }
1942 break;
08ca2aa3 1943#endif /* PERL_PACK_CAN_SHRIEKSIGN */
a6ec74c1 1944 case 'p':
a6ec74c1 1945 while (len-- > 0) {
f7fe979e 1946 const char *aptr;
f337b084 1947 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
07409e01 1948 DO_BO_UNPACK_PC(aptr);
c4c5f44a
NC
1949 /* newSVpv generates undef if aptr is NULL */
1950 PUSHs(sv_2mortal(newSVpv(aptr, 0)));
a6ec74c1
JH
1951 }
1952 break;
1953 case 'w':
a6ec74c1
JH
1954 {
1955 UV auv = 0;
1956 U32 bytes = 0;
fc241834 1957
08ca2aa3
TH
1958 while (len > 0 && s < strend) {
1959 U8 ch;
f337b084 1960 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3 1961 auv = (auv << 7) | (ch & 0x7f);
a6ec74c1 1962 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
08ca2aa3 1963 if (ch < 0x80) {
a6ec74c1 1964 bytes = 0;
ac7f3b1b 1965 PUSHs(sv_2mortal(newSVuv(auv)));
a6ec74c1
JH
1966 len--;
1967 auv = 0;
08ca2aa3 1968 continue;
a6ec74c1 1969 }
08ca2aa3 1970 if (++bytes >= sizeof(UV)) { /* promote to string */
10516c54 1971 const char *t;
a6ec74c1 1972
f5992bc4 1973 sv = Perl_newSVpvf(aTHX_ "%.*"UVuf, (int)TYPE_DIGITS(UV), auv);
a6ec74c1 1974 while (s < strend) {
f337b084 1975 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3
TH
1976 sv = mul128(sv, (U8)(ch & 0x7f));
1977 if (!(ch & 0x80)) {
a6ec74c1
JH
1978 bytes = 0;
1979 break;
1980 }
1981 }
10516c54 1982 t = SvPV_nolen_const(sv);
a6ec74c1
JH
1983 while (*t == '0')
1984 t++;
1985 sv_chop(sv, t);
1986 PUSHs(sv_2mortal(sv));
1987 len--;
1988 auv = 0;
1989 }
1990 }
1991 if ((s >= strend) && bytes)
49704364 1992 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
a6ec74c1
JH
1993 }
1994 break;
1995 case 'P':
49704364
LW
1996 if (symptr->howlen == e_star)
1997 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
a6ec74c1 1998 EXTEND(SP, 1);
2d3e0934 1999 if (s + sizeof(char*) <= strend) {
08ca2aa3 2000 char *aptr;
f337b084 2001 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
07409e01 2002 DO_BO_UNPACK_PC(aptr);
fc241834
RGS
2003 /* newSVpvn generates undef if aptr is NULL */
2004 PUSHs(sv_2mortal(newSVpvn(aptr, len)));
08ca2aa3 2005 }
a6ec74c1
JH
2006 break;
2007#ifdef HAS_QUAD
2008 case 'q':
73cb7263 2009 while (len-- > 0) {
08ca2aa3 2010 Quad_t aquad;
f337b084
TH
2011 SHIFT_VAR(utf8, s, strend, aquad, datumtype);
2012 DO_BO_UNPACK(aquad, 64);
08ca2aa3
TH
2013 if (!checksum)
2014 PUSHs(sv_2mortal(aquad >= IV_MIN && aquad <= IV_MAX ?
ac7f3b1b 2015 newSViv((IV)aquad) : newSVnv((NV)aquad)));
73cb7263
NC
2016 else if (checksum > bits_in_uv)
2017 cdouble += (NV)aquad;
2018 else
2019 cuv += aquad;
2020 }
a6ec74c1
JH
2021 break;
2022 case 'Q':
73cb7263 2023 while (len-- > 0) {
08ca2aa3 2024 Uquad_t auquad;
f337b084
TH
2025 SHIFT_VAR(utf8, s, strend, auquad, datumtype);
2026 DO_BO_UNPACK(auquad, 64);
08ca2aa3
TH
2027 if (!checksum)
2028 PUSHs(sv_2mortal(auquad <= UV_MAX ?
2029 newSVuv((UV)auquad):newSVnv((NV)auquad)));
73cb7263
NC
2030 else if (checksum > bits_in_uv)
2031 cdouble += (NV)auquad;
2032 else
2033 cuv += auquad;
a6ec74c1
JH
2034 }
2035 break;
08ca2aa3 2036#endif /* HAS_QUAD */
a6ec74c1
JH
2037 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
2038 case 'f':
73cb7263 2039 while (len-- > 0) {
08ca2aa3 2040 float afloat;
f337b084 2041 SHIFT_VAR(utf8, s, strend, afloat, datumtype);
08ca2aa3
TH
2042 DO_BO_UNPACK_N(afloat, float);
2043 if (!checksum)
2044 PUSHs(sv_2mortal(newSVnv((NV)afloat)));
2045 else
73cb7263 2046 cdouble += afloat;
fc241834 2047 }
a6ec74c1
JH
2048 break;
2049 case 'd':
73cb7263 2050 while (len-- > 0) {
08ca2aa3 2051 double adouble;
f337b084 2052 SHIFT_VAR(utf8, s, strend, adouble, datumtype);
08ca2aa3
TH
2053 DO_BO_UNPACK_N(adouble, double);
2054 if (!checksum)
2055 PUSHs(sv_2mortal(newSVnv((NV)adouble)));
2056 else
73cb7263 2057 cdouble += adouble;
fc241834 2058 }
a6ec74c1 2059 break;
92d41999 2060 case 'F':
73cb7263 2061 while (len-- > 0) {
08ca2aa3 2062 NV anv;
f337b084 2063 SHIFT_VAR(utf8, s, strend, anv, datumtype);
08ca2aa3
TH
2064 DO_BO_UNPACK_N(anv, NV);
2065 if (!checksum)
2066 PUSHs(sv_2mortal(newSVnv(anv)));
2067 else
73cb7263 2068 cdouble += anv;
fc241834 2069 }
92d41999
JH
2070 break;
2071#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2072 case 'D':
73cb7263 2073 while (len-- > 0) {
08ca2aa3 2074 long double aldouble;
f337b084 2075 SHIFT_VAR(utf8, s, strend, aldouble, datumtype);
08ca2aa3
TH
2076 DO_BO_UNPACK_N(aldouble, long double);
2077 if (!checksum)
2078 PUSHs(sv_2mortal(newSVnv((NV)aldouble)));
2079 else
2080 cdouble += aldouble;
92d41999
JH
2081 }
2082 break;
2083#endif
a6ec74c1 2084 case 'u':
08ca2aa3 2085 {
f7fe979e 2086 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
561b68a9 2087 sv = sv_2mortal(newSV(l));
08ca2aa3
TH
2088 if (l) SvPOK_on(sv);
2089 }
2090 if (utf8) {
2091 while (next_uni_uu(aTHX_ &s, strend, &len)) {
2092 I32 a, b, c, d;
db187877 2093 char hunk[3];
08ca2aa3 2094
08ca2aa3
TH
2095 while (len > 0) {
2096 next_uni_uu(aTHX_ &s, strend, &a);
2097 next_uni_uu(aTHX_ &s, strend, &b);
2098 next_uni_uu(aTHX_ &s, strend, &c);
2099 next_uni_uu(aTHX_ &s, strend, &d);
2100 hunk[0] = (char)((a << 2) | (b >> 4));
2101 hunk[1] = (char)((b << 4) | (c >> 2));
2102 hunk[2] = (char)((c << 6) | d);
2103 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2104 len -= 3;
2105 }
2106 if (s < strend) {
f7fe979e
AL
2107 if (*s == '\n') {
2108 s++;
2109 }
08ca2aa3
TH
2110 else {
2111 /* possible checksum byte */
f7fe979e
AL
2112 const char *skip = s+UTF8SKIP(s);
2113 if (skip < strend && *skip == '\n')
2114 s = skip+1;
08ca2aa3
TH
2115 }
2116 }
2117 }
2118 } else {
fc241834
RGS
2119 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
2120 I32 a, b, c, d;
db187877 2121 char hunk[3];
a6ec74c1 2122
fc241834
RGS
2123 len = PL_uudmap[*(U8*)s++] & 077;
2124 while (len > 0) {
2125 if (s < strend && ISUUCHAR(*s))
2126 a = PL_uudmap[*(U8*)s++] & 077;
2127 else
2128 a = 0;
2129 if (s < strend && ISUUCHAR(*s))
2130 b = PL_uudmap[*(U8*)s++] & 077;
2131 else
2132 b = 0;
2133 if (s < strend && ISUUCHAR(*s))
2134 c = PL_uudmap[*(U8*)s++] & 077;
2135 else
2136 c = 0;
2137 if (s < strend && ISUUCHAR(*s))
2138 d = PL_uudmap[*(U8*)s++] & 077;
2139 else
2140 d = 0;
2141 hunk[0] = (char)((a << 2) | (b >> 4));
2142 hunk[1] = (char)((b << 4) | (c >> 2));
2143 hunk[2] = (char)((c << 6) | d);
2144 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2145 len -= 3;
2146 }
2147 if (*s == '\n')
2148 s++;
2149 else /* possible checksum byte */
2150 if (s + 1 < strend && s[1] == '\n')
2151 s += 2;
a6ec74c1 2152 }
08ca2aa3
TH
2153 }
2154 XPUSHs(sv);
a6ec74c1
JH
2155 break;
2156 }
49704364 2157
a6ec74c1 2158 if (checksum) {
1109a392 2159 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
92d41999 2160 (checksum > bits_in_uv &&
08ca2aa3
TH
2161 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
2162 NV trouble, anv;
a6ec74c1 2163
08ca2aa3 2164 anv = (NV) (1 << (checksum & 15));
a6ec74c1
JH
2165 while (checksum >= 16) {
2166 checksum -= 16;
08ca2aa3 2167 anv *= 65536.0;
a6ec74c1 2168 }
a6ec74c1 2169 while (cdouble < 0.0)
08ca2aa3
TH
2170 cdouble += anv;
2171 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
c4c5f44a 2172 sv = newSVnv(cdouble);
a6ec74c1
JH
2173 }
2174 else {
fa8ec7c1
NC
2175 if (checksum < bits_in_uv) {
2176 UV mask = ((UV)1 << checksum) - 1;
92d41999 2177 cuv &= mask;
a6ec74c1 2178 }
c4c5f44a 2179 sv = newSVuv(cuv);
a6ec74c1
JH
2180 }
2181 XPUSHs(sv_2mortal(sv));
2182 checksum = 0;
2183 }
fc241834 2184
49704364
LW
2185 if (symptr->flags & FLAG_SLASH){
2186 if (SP - PL_stack_base - start_sp_offset <= 0)
2187 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
2188 if( next_symbol(symptr) ){
2189 if( symptr->howlen == e_number )
2190 Perl_croak(aTHX_ "Count after length/code in unpack" );
2191 if( beyond ){
2192 /* ...end of char buffer then no decent length available */
2193 Perl_croak(aTHX_ "length/code after end of string in unpack" );
2194 } else {
2195 /* take top of stack (hope it's numeric) */
2196 len = POPi;
2197 if( len < 0 )
2198 Perl_croak(aTHX_ "Negative '/' count in unpack" );
2199 }
2200 } else {
2201 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
2202 }
2203 datumtype = symptr->code;
21c16052 2204 explicit_length = FALSE;
49704364
LW
2205 goto redo_switch;
2206 }
a6ec74c1 2207 }
49704364 2208
18529408
IZ
2209 if (new_s)
2210 *new_s = s;
2211 PUTBACK;
2212 return SP - PL_stack_base - start_sp_offset;
2213}
2214
2215PP(pp_unpack)
2216{
97aff369 2217 dVAR;
18529408 2218 dSP;
bab9c0ac 2219 dPOPPOPssrl;
18529408
IZ
2220 I32 gimme = GIMME_V;
2221 STRLEN llen;
2222 STRLEN rlen;
5c144d81
NC
2223 const char *pat = SvPV_const(left, llen);
2224 const char *s = SvPV_const(right, rlen);
f7fe979e
AL
2225 const char *strend = s + rlen;
2226 const char *patend = pat + llen;
08ca2aa3 2227 I32 cnt;
18529408
IZ
2228
2229 PUTBACK;
7accc089 2230 cnt = unpackstring(pat, patend, s, strend,
49704364 2231 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
f337b084 2232 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
49704364 2233
18529408
IZ
2234 SPAGAIN;
2235 if ( !cnt && gimme == G_SCALAR )
2236 PUSHs(&PL_sv_undef);
a6ec74c1
JH
2237 RETURN;
2238}
2239
f337b084 2240STATIC U8 *
f7fe979e 2241doencodes(U8 *h, const char *s, I32 len)
a6ec74c1 2242{
f337b084 2243 *h++ = PL_uuemap[len];
a6ec74c1 2244 while (len > 2) {
f337b084
TH
2245 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2246 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
2247 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
2248 *h++ = PL_uuemap[(077 & (s[2] & 077))];
a6ec74c1
JH
2249 s += 3;
2250 len -= 3;
2251 }
2252 if (len > 0) {
f7fe979e 2253 const char r = (len > 1 ? s[1] : '\0');
f337b084
TH
2254 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2255 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
2256 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
2257 *h++ = PL_uuemap[0];
a6ec74c1 2258 }
f337b084
TH
2259 *h++ = '\n';
2260 return h;
a6ec74c1
JH
2261}
2262
2263STATIC SV *
f7fe979e 2264S_is_an_int(pTHX_ const char *s, STRLEN l)
a6ec74c1 2265{
8b6e33c7
AL
2266 SV *result = newSVpvn(s, l);
2267 char *const result_c = SvPV_nolen(result); /* convenience */
2268 char *out = result_c;
2269 bool skip = 1;
2270 bool ignore = 0;
a6ec74c1
JH
2271
2272 while (*s) {
2273 switch (*s) {
2274 case ' ':
2275 break;
2276 case '+':
2277 if (!skip) {
2278 SvREFCNT_dec(result);
2279 return (NULL);
2280 }
2281 break;
2282 case '0':
2283 case '1':
2284 case '2':
2285 case '3':
2286 case '4':
2287 case '5':
2288 case '6':
2289 case '7':
2290 case '8':
2291 case '9':
2292 skip = 0;
2293 if (!ignore) {
2294 *(out++) = *s;
2295 }
2296 break;
2297 case '.':
2298 ignore = 1;
2299 break;
2300 default:
2301 SvREFCNT_dec(result);
2302 return (NULL);
2303 }
2304 s++;
2305 }
2306 *(out++) = '\0';
2307 SvCUR_set(result, out - result_c);
2308 return (result);
2309}
2310
2311/* pnum must be '\0' terminated */
2312STATIC int
2313S_div128(pTHX_ SV *pnum, bool *done)
2314{
8b6e33c7
AL
2315 STRLEN len;
2316 char * const s = SvPV(pnum, len);
2317 char *t = s;
2318 int m = 0;
2319
2320 *done = 1;
2321 while (*t) {
2322 const int i = m * 10 + (*t - '0');
2323 const int r = (i >> 7); /* r < 10 */
2324 m = i & 0x7F;
2325 if (r) {
2326 *done = 0;
2327 }
2328 *(t++) = '0' + r;
a6ec74c1 2329 }
8b6e33c7
AL
2330 *(t++) = '\0';
2331 SvCUR_set(pnum, (STRLEN) (t - s));
2332 return (m);
a6ec74c1
JH
2333}
2334
18529408 2335/*
7accc089
JH
2336=for apidoc packlist
2337
2338The engine implementing pack() Perl function.
2339
bfce84ec
AL
2340=cut
2341*/
7accc089
JH
2342
2343void
f7fe979e 2344Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, register SV **beglist, SV **endlist )
7accc089 2345{
97aff369 2346 dVAR;
aadb217d
JH
2347 tempsym_t sym;
2348
f7fe979e 2349 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
49704364 2350
f337b084
TH
2351 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
2352 Also make sure any UTF8 flag is loaded */
56eb0262 2353 SvPV_force_nolen(cat);
bfce84ec
AL
2354 if (DO_UTF8(cat))
2355 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
f337b084 2356
49704364
LW
2357 (void)pack_rec( cat, &sym, beglist, endlist );
2358}
2359
f337b084
TH
2360/* like sv_utf8_upgrade, but also repoint the group start markers */
2361STATIC void
2362marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2363 STRLEN len;
2364 tempsym_t *group;
f7fe979e
AL
2365 const char *from_ptr, *from_start, *from_end, **marks, **m;
2366 char *to_start, *to_ptr;
f337b084
TH
2367
2368 if (SvUTF8(sv)) return;
2369
aa07b2f6 2370 from_start = SvPVX_const(sv);
f337b084
TH
2371 from_end = from_start + SvCUR(sv);
2372 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2373 if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
2374 if (from_ptr == from_end) {
2375 /* Simple case: no character needs to be changed */
2376 SvUTF8_on(sv);
2377 return;
2378 }
2379
3473cf63 2380 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
a02a5408 2381 Newx(to_start, len, char);
f337b084
TH
2382 Copy(from_start, to_start, from_ptr-from_start, char);
2383 to_ptr = to_start + (from_ptr-from_start);
2384
a02a5408 2385 Newx(marks, sym_ptr->level+2, const char *);
f337b084
TH
2386 for (group=sym_ptr; group; group = group->previous)
2387 marks[group->level] = from_start + group->strbeg;
2388 marks[sym_ptr->level+1] = from_end+1;
2389 for (m = marks; *m < from_ptr; m++)
2390 *m = to_start + (*m-from_start);
2391
2392 for (;from_ptr < from_end; from_ptr++) {
2393 while (*m == from_ptr) *m++ = to_ptr;
230e1fce 2394 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
f337b084
TH
2395 }
2396 *to_ptr = 0;
2397
2398 while (*m == from_ptr) *m++ = to_ptr;
2399 if (m != marks + sym_ptr->level+1) {
2400 Safefree(marks);
2401 Safefree(to_start);
2402 Perl_croak(aTHX_ "Assertion: marks beyond string end");
2403 }
2404 for (group=sym_ptr; group; group = group->previous)
2405 group->strbeg = marks[group->level] - to_start;
2406 Safefree(marks);
2407
2408 if (SvOOK(sv)) {
2409 if (SvIVX(sv)) {
b162af07 2410 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
f337b084
TH
2411 from_start -= SvIVX(sv);
2412 SvIV_set(sv, 0);
2413 }
2414 SvFLAGS(sv) &= ~SVf_OOK;
2415 }
2416 if (SvLEN(sv) != 0)
2417 Safefree(from_start);
f880fe2f 2418 SvPV_set(sv, to_start);
b162af07
SP
2419 SvCUR_set(sv, to_ptr - to_start);
2420 SvLEN_set(sv, len);
f337b084
TH
2421 SvUTF8_on(sv);
2422}
2423
2424/* Exponential string grower. Makes string extension effectively O(n)
2425 needed says how many extra bytes we need (not counting the final '\0')
2426 Only grows the string if there is an actual lack of space
2427*/
2428STATIC char *
0bd48802 2429S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
f7fe979e
AL
2430 const STRLEN cur = SvCUR(sv);
2431 const STRLEN len = SvLEN(sv);
f337b084
TH
2432 STRLEN extend;
2433 if (len - cur > needed) return SvPVX(sv);
2434 extend = needed > len ? needed : len;
2435 return SvGROW(sv, len+extend+1);
2436}
49704364
LW
2437
2438STATIC
2439SV **
f337b084 2440S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
49704364 2441{
97aff369 2442 dVAR;
49704364 2443 tempsym_t lookahead;
f337b084
TH
2444 I32 items = endlist - beglist;
2445 bool found = next_symbol(symptr);
2446 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
041457d9 2447 bool warn_utf8 = ckWARN(WARN_UTF8);
f337b084
TH
2448
2449 if (symptr->level == 0 && found && symptr->code == 'U') {
2450 marked_upgrade(aTHX_ cat, symptr);
2451 symptr->flags |= FLAG_DO_UTF8;
2452 utf8 = 0;
49704364 2453 }
f337b084 2454 symptr->strbeg = SvCUR(cat);
49704364
LW
2455
2456 while (found) {
f337b084
TH
2457 SV *fromstr;
2458 STRLEN fromlen;
2459 I32 len;
a0714e2c 2460 SV *lengthcode = NULL;
49704364 2461 I32 datumtype = symptr->code;
f337b084
TH
2462 howlen_t howlen = symptr->howlen;
2463 char *start = SvPVX(cat);
2464 char *cur = start + SvCUR(cat);
49704364 2465
f337b084
TH
2466#define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2467
2468 switch (howlen) {
fc241834 2469 case e_star:
f337b084
TH
2470 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2471 0 : items;
2472 break;
2473 default:
2474 /* e_no_len and e_number */
2475 len = symptr->length;
49704364
LW
2476 break;
2477 }
2478
f337b084 2479 if (len) {
a7a3cfaa 2480 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
f337b084 2481
a7a3cfaa
TH
2482 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2483 /* We can process this letter. */
2484 STRLEN size = props & PACK_SIZE_MASK;
2485 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2486 }
f337b084
TH
2487 }
2488
49704364
LW
2489 /* Look ahead for next symbol. Do we have code/code? */
2490 lookahead = *symptr;
2491 found = next_symbol(&lookahead);
246f24af
TH
2492 if (symptr->flags & FLAG_SLASH) {
2493 IV count;
f337b084 2494 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
246f24af
TH
2495 if (strchr("aAZ", lookahead.code)) {
2496 if (lookahead.howlen == e_number) count = lookahead.length;
2497 else {
ce399ba6
NC
2498 if (items > 0) {
2499 if (SvGAMAGIC(*beglist)) {
2500 /* Avoid reading the active data more than once
2501 by copying it to a temporary. */
2502 STRLEN len;
2503 const char *const pv = SvPV_const(*beglist, len);
2504 SV *const temp = sv_2mortal(newSVpvn(pv, len));
2505 if (SvUTF8(*beglist))
2506 SvUTF8_on(temp);
2507 *beglist = temp;
2508 }
246f24af
TH
2509 count = DO_UTF8(*beglist) ?
2510 sv_len_utf8(*beglist) : sv_len(*beglist);
ce399ba6 2511 }
246f24af
TH
2512 else count = 0;
2513 if (lookahead.code == 'Z') count++;
2514 }
2515 } else {
2516 if (lookahead.howlen == e_number && lookahead.length < items)
2517 count = lookahead.length;
2518 else count = items;
2519 }
2520 lookahead.howlen = e_number;
2521 lookahead.length = count;
2522 lengthcode = sv_2mortal(newSViv(count));
a6ec74c1 2523 }
49704364 2524
fc241834
RGS
2525 /* Code inside the switch must take care to properly update
2526 cat (CUR length and '\0' termination) if it updated *cur and
f337b084 2527 doesn't simply leave using break */
1109a392 2528 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 2529 default:
f337b084
TH
2530 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2531 (int) TYPE_NO_MODIFIERS(datumtype));
a6ec74c1 2532 case '%':
49704364 2533 Perl_croak(aTHX_ "'%%' may not be used in pack");
28be1210
TH
2534 {
2535 char *from;
2536#ifdef PERL_PACK_CAN_SHRIEKSIGN
2537 case '.' | TYPE_IS_SHRIEKING:
2538#endif
2539 case '.':
2540 if (howlen == e_star) from = start;
2541 else if (len == 0) from = cur;
2542 else {
2543 tempsym_t *group = symptr;
2544
2545 while (--len && group) group = group->previous;
2546 from = group ? start + group->strbeg : start;
2547 }
2548 fromstr = NEXTFROM;
2549 len = SvIV(fromstr);
2550 goto resize;
2551#ifdef PERL_PACK_CAN_SHRIEKSIGN
2552 case '@' | TYPE_IS_SHRIEKING:
2553#endif
a6ec74c1 2554 case '@':
28be1210
TH
2555 from = start + symptr->strbeg;
2556 resize:
2557#ifdef PERL_PACK_CAN_SHRIEKSIGN
2558 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
2559#else /* PERL_PACK_CAN_SHRIEKSIGN */
2560 if (utf8)
2561#endif
2562 if (len >= 0) {
2563 while (len && from < cur) {
2564 from += UTF8SKIP(from);
2565 len--;
2566 }
2567 if (from > cur)
2568 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2569 if (len) {
2570 /* Here we know from == cur */
2571 grow:
2572 GROWING(0, cat, start, cur, len);
2573 Zero(cur, len, char);
2574 cur += len;
2575 } else if (from < cur) {
2576 len = cur - from;
2577 goto shrink;
2578 } else goto no_change;
2579 } else {
2580 cur = from;
2581 len = -len;
2582 goto utf8_shrink;
f337b084 2583 }
28be1210
TH
2584 else {
2585 len -= cur - from;
f337b084 2586 if (len > 0) goto grow;
28be1210 2587 if (len == 0) goto no_change;
fc241834 2588 len = -len;
28be1210 2589 goto shrink;
f337b084 2590 }
a6ec74c1 2591 break;
28be1210 2592 }
fc241834 2593 case '(': {
49704364 2594 tempsym_t savsym = *symptr;
66c611c5
MHM
2595 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2596 symptr->flags |= group_modifiers;
49704364
LW
2597 symptr->patend = savsym.grpend;
2598 symptr->level++;
f337b084 2599 symptr->previous = &lookahead;
18529408 2600 while (len--) {
f337b084
TH
2601 U32 was_utf8;
2602 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2603 else symptr->flags &= ~FLAG_PARSE_UTF8;
2604 was_utf8 = SvUTF8(cat);
49704364 2605 symptr->patptr = savsym.grpbeg;
f337b084
TH
2606 beglist = pack_rec(cat, symptr, beglist, endlist);
2607 if (SvUTF8(cat) != was_utf8)
2608 /* This had better be an upgrade while in utf8==0 mode */
2609 utf8 = 1;
2610
49704364 2611 if (savsym.howlen == e_star && beglist == endlist)
18529408
IZ
2612 break; /* No way to continue */
2613 }
ee790063 2614 items = endlist - beglist;
f337b084
TH
2615 lookahead.flags = symptr->flags & ~group_modifiers;
2616 goto no_change;
18529408 2617 }
62f95557
IZ
2618 case 'X' | TYPE_IS_SHRIEKING:
2619 if (!len) /* Avoid division by 0 */
2620 len = 1;
f337b084
TH
2621 if (utf8) {
2622 char *hop, *last;
2623 I32 l = len;
2624 hop = last = start;
2625 while (hop < cur) {
2626 hop += UTF8SKIP(hop);
2627 if (--l == 0) {
2628 last = hop;
2629 l = len;
2630 }
2631 }
2632 if (last > cur)
2633 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2634 cur = last;
2635 break;
2636 }
2637 len = (cur-start) % len;
62f95557 2638 /* FALL THROUGH */
a6ec74c1 2639 case 'X':
f337b084
TH
2640 if (utf8) {
2641 if (len < 1) goto no_change;
28be1210 2642 utf8_shrink:
f337b084
TH
2643 while (len > 0) {
2644 if (cur <= start)
28be1210
TH
2645 Perl_croak(aTHX_ "'%c' outside of string in pack",
2646 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2647 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2648 if (cur <= start)
28be1210
TH
2649 Perl_croak(aTHX_ "'%c' outside of string in pack",
2650 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2651 }
2652 len--;
2653 }
2654 } else {
fc241834 2655 shrink:
f337b084 2656 if (cur - start < len)
28be1210
TH
2657 Perl_croak(aTHX_ "'%c' outside of string in pack",
2658 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2659 cur -= len;
2660 }
2661 if (cur < start+symptr->strbeg) {
2662 /* Make sure group starts don't point into the void */
2663 tempsym_t *group;
9e27e96a 2664 const STRLEN length = cur-start;
f337b084
TH
2665 for (group = symptr;
2666 group && length < group->strbeg;
2667 group = group->previous) group->strbeg = length;
2668 lookahead.strbeg = length;
2669 }
a6ec74c1 2670 break;
fc241834
RGS
2671 case 'x' | TYPE_IS_SHRIEKING: {
2672 I32 ai32;
62f95557
IZ
2673 if (!len) /* Avoid division by 0 */
2674 len = 1;
230e1fce 2675 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
fc241834
RGS
2676 else ai32 = (cur - start) % len;
2677 if (ai32 == 0) goto no_change;
2678 len -= ai32;
2679 }
2680 /* FALL THROUGH */
a6ec74c1 2681 case 'x':
f337b084 2682 goto grow;
a6ec74c1
JH
2683 case 'A':
2684 case 'Z':
f337b084 2685 case 'a': {
f7fe979e 2686 const char *aptr;
f337b084 2687
a6ec74c1 2688 fromstr = NEXTFROM;
e62f0680 2689 aptr = SvPV_const(fromstr, fromlen);
f337b084 2690 if (DO_UTF8(fromstr)) {
f7fe979e 2691 const char *end, *s;
f337b084
TH
2692
2693 if (!utf8 && !SvUTF8(cat)) {
2694 marked_upgrade(aTHX_ cat, symptr);
2695 lookahead.flags |= FLAG_DO_UTF8;
2696 lookahead.strbeg = symptr->strbeg;
2697 utf8 = 1;
2698 start = SvPVX(cat);
2699 cur = start + SvCUR(cat);
2700 }
fc241834 2701 if (howlen == e_star) {
f337b084
TH
2702 if (utf8) goto string_copy;
2703 len = fromlen+1;
2704 }
2705 s = aptr;
2706 end = aptr + fromlen;
2707 fromlen = datumtype == 'Z' ? len-1 : len;
2708 while ((I32) fromlen > 0 && s < end) {
2709 s += UTF8SKIP(s);
2710 fromlen--;
2711 }
2712 if (s > end)
2713 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2714 if (utf8) {
fc241834 2715 len = fromlen;
f337b084
TH
2716 if (datumtype == 'Z') len++;
2717 fromlen = s-aptr;
2718 len += fromlen;
fc241834 2719
f337b084 2720 goto string_copy;
fc241834 2721 }
f337b084
TH
2722 fromlen = len - fromlen;
2723 if (datumtype == 'Z') fromlen--;
2724 if (howlen == e_star) {
2725 len = fromlen;
2726 if (datumtype == 'Z') len++;
fc241834 2727 }
f337b084 2728 GROWING(0, cat, start, cur, len);
fc241834 2729 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
f337b084
TH
2730 datumtype | TYPE_IS_PACK))
2731 Perl_croak(aTHX_ "Perl bug: predicted utf8 length not available");
2732 cur += fromlen;
a6ec74c1 2733 len -= fromlen;
f337b084
TH
2734 } else if (utf8) {
2735 if (howlen == e_star) {
2736 len = fromlen;
2737 if (datumtype == 'Z') len++;
a6ec74c1 2738 }
f337b084
TH
2739 if (len <= (I32) fromlen) {
2740 fromlen = len;
2741 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2742 }
fc241834 2743 /* assumes a byte expands to at most UTF8_EXPAND bytes on
3473cf63
RGS
2744 upgrade, so:
2745 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2746 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
f337b084
TH
2747 len -= fromlen;
2748 while (fromlen > 0) {
230e1fce 2749 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
f337b084
TH
2750 aptr++;
2751 fromlen--;
fc241834 2752 }
f337b084
TH
2753 } else {
2754 string_copy:
2755 if (howlen == e_star) {
2756 len = fromlen;
2757 if (datumtype == 'Z') len++;
2758 }
2759 if (len <= (I32) fromlen) {
2760 fromlen = len;
2761 if (datumtype == 'Z' && fromlen > 0) fromlen--;
a6ec74c1 2762 }
f337b084
TH
2763 GROWING(0, cat, start, cur, len);
2764 Copy(aptr, cur, fromlen, char);
2765 cur += fromlen;
2766 len -= fromlen;
a6ec74c1 2767 }
f337b084
TH
2768 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2769 cur += len;
a6ec74c1 2770 break;
f337b084 2771 }
a6ec74c1 2772 case 'B':
f337b084 2773 case 'b': {
b83604b4 2774 const char *str, *end;
f337b084
TH
2775 I32 l, field_len;
2776 U8 bits;
2777 bool utf8_source;
2778 U32 utf8_flags;
a6ec74c1 2779
fc241834 2780 fromstr = NEXTFROM;
b83604b4 2781 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2782 end = str + fromlen;
2783 if (DO_UTF8(fromstr)) {
2784 utf8_source = TRUE;
041457d9 2785 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2786 } else {
2787 utf8_source = FALSE;
2788 utf8_flags = 0; /* Unused, but keep compilers happy */
2789 }
2790 if (howlen == e_star) len = fromlen;
2791 field_len = (len+7)/8;
2792 GROWING(utf8, cat, start, cur, field_len);
2793 if (len > (I32)fromlen) len = fromlen;
2794 bits = 0;
2795 l = 0;
2796 if (datumtype == 'B')
2797 while (l++ < len) {
2798 if (utf8_source) {
95b63a38 2799 UV val = 0;
f337b084
TH
2800 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2801 bits |= val & 1;
2802 } else bits |= *str++ & 1;
2803 if (l & 7) bits <<= 1;
fc241834 2804 else {
f337b084
TH
2805 PUSH_BYTE(utf8, cur, bits);
2806 bits = 0;
a6ec74c1
JH
2807 }
2808 }
f337b084
TH
2809 else
2810 /* datumtype == 'b' */
2811 while (l++ < len) {
2812 if (utf8_source) {
95b63a38 2813 UV val = 0;
f337b084
TH
2814 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2815 if (val & 1) bits |= 0x80;
2816 } else if (*str++ & 1)
2817 bits |= 0x80;
2818 if (l & 7) bits >>= 1;
fc241834 2819 else {
f337b084
TH
2820 PUSH_BYTE(utf8, cur, bits);
2821 bits = 0;
a6ec74c1
JH
2822 }
2823 }
f337b084
TH
2824 l--;
2825 if (l & 7) {
fc241834 2826 if (datumtype == 'B')
f337b084 2827 bits <<= 7 - (l & 7);
fc241834 2828 else
f337b084
TH
2829 bits >>= 7 - (l & 7);
2830 PUSH_BYTE(utf8, cur, bits);
2831 l += 7;
a6ec74c1 2832 }
f337b084
TH
2833 /* Determine how many chars are left in the requested field */
2834 l /= 8;
2835 if (howlen == e_star) field_len = 0;
2836 else field_len -= l;
2837 Zero(cur, field_len, char);
2838 cur += field_len;
a6ec74c1 2839 break;
f337b084 2840 }
a6ec74c1 2841 case 'H':
f337b084 2842 case 'h': {
10516c54 2843 const char *str, *end;
f337b084
TH
2844 I32 l, field_len;
2845 U8 bits;
2846 bool utf8_source;
2847 U32 utf8_flags;
a6ec74c1 2848
fc241834 2849 fromstr = NEXTFROM;
10516c54 2850 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2851 end = str + fromlen;
2852 if (DO_UTF8(fromstr)) {
2853 utf8_source = TRUE;
041457d9 2854 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2855 } else {
2856 utf8_source = FALSE;
2857 utf8_flags = 0; /* Unused, but keep compilers happy */
2858 }
2859 if (howlen == e_star) len = fromlen;
2860 field_len = (len+1)/2;
2861 GROWING(utf8, cat, start, cur, field_len);
2862 if (!utf8 && len > (I32)fromlen) len = fromlen;
2863 bits = 0;
2864 l = 0;
2865 if (datumtype == 'H')
2866 while (l++ < len) {
2867 if (utf8_source) {
95b63a38 2868 UV val = 0;
f337b084
TH
2869 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2870 if (val < 256 && isALPHA(val))
2871 bits |= (val + 9) & 0xf;
a6ec74c1 2872 else
f337b084
TH
2873 bits |= val & 0xf;
2874 } else if (isALPHA(*str))
2875 bits |= (*str++ + 9) & 0xf;
2876 else
2877 bits |= *str++ & 0xf;
2878 if (l & 1) bits <<= 4;
fc241834 2879 else {
f337b084
TH
2880 PUSH_BYTE(utf8, cur, bits);
2881 bits = 0;
a6ec74c1
JH
2882 }
2883 }
f337b084
TH
2884 else
2885 while (l++ < len) {
2886 if (utf8_source) {
95b63a38 2887 UV val = 0;
f337b084
TH
2888 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2889 if (val < 256 && isALPHA(val))
2890 bits |= ((val + 9) & 0xf) << 4;
a6ec74c1 2891 else
f337b084
TH
2892 bits |= (val & 0xf) << 4;
2893 } else if (isALPHA(*str))
2894 bits |= ((*str++ + 9) & 0xf) << 4;
2895 else
2896 bits |= (*str++ & 0xf) << 4;
2897 if (l & 1) bits >>= 4;
fc241834 2898 else {
f337b084
TH
2899 PUSH_BYTE(utf8, cur, bits);
2900 bits = 0;
a6ec74c1 2901 }
fc241834 2902 }
f337b084
TH
2903 l--;
2904 if (l & 1) {
2905 PUSH_BYTE(utf8, cur, bits);
2906 l++;
2907 }
2908 /* Determine how many chars are left in the requested field */
2909 l /= 2;
2910 if (howlen == e_star) field_len = 0;
2911 else field_len -= l;
2912 Zero(cur, field_len, char);
2913 cur += field_len;
2914 break;
fc241834
RGS
2915 }
2916 case 'c':
f337b084
TH
2917 while (len-- > 0) {
2918 IV aiv;
2919 fromstr = NEXTFROM;
2920 aiv = SvIV(fromstr);
2921 if ((-128 > aiv || aiv > 127) &&
2922 ckWARN(WARN_PACK))
2923 Perl_warner(aTHX_ packWARN(WARN_PACK),
2924 "Character in 'c' format wrapped in pack");
585ec06d 2925 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
a6ec74c1
JH
2926 }
2927 break;
2928 case 'C':
f337b084
TH
2929 if (len == 0) {
2930 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2931 break;
2932 }
2933 GROWING(0, cat, start, cur, len);
a6ec74c1 2934 while (len-- > 0) {
f337b084 2935 IV aiv;
a6ec74c1 2936 fromstr = NEXTFROM;
f337b084
TH
2937 aiv = SvIV(fromstr);
2938 if ((0 > aiv || aiv > 0xff) &&
fc241834
RGS
2939 ckWARN(WARN_PACK))
2940 Perl_warner(aTHX_ packWARN(WARN_PACK),
2941 "Character in 'C' format wrapped in pack");
585ec06d 2942 *cur++ = (char)(aiv & 0xff);
f337b084 2943 }
fc241834
RGS
2944 break;
2945 case 'W': {
2946 char *end;
670f1322 2947 U8 in_bytes = (U8)IN_BYTES;
fc241834
RGS
2948
2949 end = start+SvLEN(cat)-1;
2950 if (utf8) end -= UTF8_MAXLEN-1;
2951 while (len-- > 0) {
2952 UV auv;
2953 fromstr = NEXTFROM;
2954 auv = SvUV(fromstr);
2955 if (in_bytes) auv = auv % 0x100;
2956 if (utf8) {
2957 W_utf8:
2958 if (cur > end) {
2959 *cur = '\0';
b162af07 2960 SvCUR_set(cat, cur - start);
fc241834
RGS
2961
2962 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2963 end = start+SvLEN(cat)-UTF8_MAXLEN;
2964 }
230e1fce
NC
2965 cur = (char *) uvuni_to_utf8_flags((U8 *) cur,
2966 NATIVE_TO_UNI(auv),
041457d9 2967 warn_utf8 ?
230e1fce 2968 0 : UNICODE_ALLOW_ANY);
fc241834
RGS
2969 } else {
2970 if (auv >= 0x100) {
2971 if (!SvUTF8(cat)) {
2972 *cur = '\0';
b162af07 2973 SvCUR_set(cat, cur - start);
fc241834
RGS
2974 marked_upgrade(aTHX_ cat, symptr);
2975 lookahead.flags |= FLAG_DO_UTF8;
2976 lookahead.strbeg = symptr->strbeg;
2977 utf8 = 1;
2978 start = SvPVX(cat);
2979 cur = start + SvCUR(cat);
2980 end = start+SvLEN(cat)-UTF8_MAXLEN;
2981 goto W_utf8;
2982 }
2983 if (ckWARN(WARN_PACK))
2984 Perl_warner(aTHX_ packWARN(WARN_PACK),
2985 "Character in 'W' format wrapped in pack");
2986 auv &= 0xff;
2987 }
2988 if (cur >= end) {
2989 *cur = '\0';
b162af07 2990 SvCUR_set(cat, cur - start);
fc241834
RGS
2991 GROWING(0, cat, start, cur, len+1);
2992 end = start+SvLEN(cat)-1;
2993 }
fe2774ed 2994 *(U8 *) cur++ = (U8)auv;
a6ec74c1
JH
2995 }
2996 }
2997 break;
fc241834
RGS
2998 }
2999 case 'U': {
3000 char *end;
3001
3002 if (len == 0) {
3003 if (!(symptr->flags & FLAG_DO_UTF8)) {
3004 marked_upgrade(aTHX_ cat, symptr);
3005 lookahead.flags |= FLAG_DO_UTF8;
3006 lookahead.strbeg = symptr->strbeg;
3007 }
3008 utf8 = 0;
3009 goto no_change;
3010 }
3011
3012 end = start+SvLEN(cat);
3013 if (!utf8) end -= UTF8_MAXLEN;
a6ec74c1 3014 while (len-- > 0) {
fc241834 3015 UV auv;
a6ec74c1 3016 fromstr = NEXTFROM;
fc241834
RGS
3017 auv = SvUV(fromstr);
3018 if (utf8) {
230e1fce 3019 U8 buffer[UTF8_MAXLEN], *endb;
fc241834 3020 endb = uvuni_to_utf8_flags(buffer, auv,
041457d9 3021 warn_utf8 ?
fc241834
RGS
3022 0 : UNICODE_ALLOW_ANY);
3023 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
3024 *cur = '\0';
b162af07 3025 SvCUR_set(cat, cur - start);
fc241834
RGS
3026 GROWING(0, cat, start, cur,
3027 len+(endb-buffer)*UTF8_EXPAND);
3028 end = start+SvLEN(cat);
3029 }
64844641 3030 cur = bytes_to_uni(buffer, endb-buffer, cur);
fc241834
RGS
3031 } else {
3032 if (cur >= end) {
3033 *cur = '\0';
b162af07 3034 SvCUR_set(cat, cur - start);
fc241834
RGS
3035 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3036 end = start+SvLEN(cat)-UTF8_MAXLEN;
3037 }
230e1fce 3038 cur = (char *) uvuni_to_utf8_flags((U8 *) cur, auv,
041457d9 3039 warn_utf8 ?
230e1fce 3040 0 : UNICODE_ALLOW_ANY);
fc241834 3041 }
a6ec74c1 3042 }
a6ec74c1 3043 break;
fc241834 3044 }
a6ec74c1
JH
3045 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
3046 case 'f':
a6ec74c1 3047 while (len-- > 0) {
f337b084
TH
3048 float afloat;
3049 NV anv;
a6ec74c1 3050 fromstr = NEXTFROM;
f337b084 3051 anv = SvNV(fromstr);
5cdb9e01 3052#ifdef __VOS__
f337b084 3053 /* VOS does not automatically map a floating-point overflow
fc241834
RGS
3054 during conversion from double to float into infinity, so we
3055 do it by hand. This code should either be generalized for
3056 any OS that needs it, or removed if and when VOS implements
3057 posix-976 (suggestion to support mapping to infinity).
3058 Paul.Green@stratus.com 02-04-02. */
f337b084 3059 if (anv > FLT_MAX)
fc241834 3060 afloat = _float_constants[0]; /* single prec. inf. */
f337b084 3061 else if (anv < -FLT_MAX)
fc241834 3062 afloat = _float_constants[0]; /* single prec. inf. */
f337b084
TH
3063 else afloat = (float) anv;
3064#else /* __VOS__ */
baf3cf9c 3065# if defined(VMS) && !defined(__IEEE_FP)
f337b084 3066 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
3067 * on Alpha; fake it if we don't have them.
3068 */
f337b084 3069 if (anv > FLT_MAX)
fc241834 3070 afloat = FLT_MAX;
f337b084 3071 else if (anv < -FLT_MAX)
fc241834 3072 afloat = -FLT_MAX;
f337b084 3073 else afloat = (float)anv;
baf3cf9c 3074# else
f337b084 3075 afloat = (float)anv;
baf3cf9c 3076# endif
f337b084 3077#endif /* __VOS__ */
1109a392 3078 DO_BO_PACK_N(afloat, float);
f337b084 3079 PUSH_VAR(utf8, cur, afloat);
a6ec74c1
JH
3080 }
3081 break;
3082 case 'd':
a6ec74c1 3083 while (len-- > 0) {
f337b084
TH
3084 double adouble;
3085 NV anv;
a6ec74c1 3086 fromstr = NEXTFROM;
f337b084 3087 anv = SvNV(fromstr);
5cdb9e01 3088#ifdef __VOS__
f337b084 3089 /* VOS does not automatically map a floating-point overflow
fc241834
RGS
3090 during conversion from long double to double into infinity,
3091 so we do it by hand. This code should either be generalized
3092 for any OS that needs it, or removed if and when VOS
3093 implements posix-976 (suggestion to support mapping to
3094 infinity). Paul.Green@stratus.com 02-04-02. */
f337b084 3095 if (anv > DBL_MAX)
fc241834 3096 adouble = _double_constants[0]; /* double prec. inf. */
f337b084 3097 else if (anv < -DBL_MAX)
fc241834 3098 adouble = _double_constants[0]; /* double prec. inf. */
f337b084
TH
3099 else adouble = (double) anv;
3100#else /* __VOS__ */
baf3cf9c 3101# if defined(VMS) && !defined(__IEEE_FP)
f337b084 3102 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
3103 * on Alpha; fake it if we don't have them.
3104 */
f337b084 3105 if (anv > DBL_MAX)
fc241834 3106 adouble = DBL_MAX;
f337b084 3107 else if (anv < -DBL_MAX)
fc241834 3108 adouble = -DBL_MAX;
f337b084 3109 else adouble = (double)anv;
baf3cf9c 3110# else
f337b084 3111 adouble = (double)anv;
baf3cf9c 3112# endif
f337b084 3113#endif /* __VOS__ */
1109a392 3114 DO_BO_PACK_N(adouble, double);
f337b084 3115 PUSH_VAR(utf8, cur, adouble);
a6ec74c1
JH
3116 }
3117 break;
fc241834
RGS
3118 case 'F': {
3119 NV anv;
1109a392 3120 Zero(&anv, 1, NV); /* can be long double with unused bits */
92d41999
JH
3121 while (len-- > 0) {
3122 fromstr = NEXTFROM;
3123 anv = SvNV(fromstr);
1109a392 3124 DO_BO_PACK_N(anv, NV);
fc241834 3125 PUSH_VAR(utf8, cur, anv);
92d41999
JH
3126 }
3127 break;
fc241834 3128 }
92d41999 3129#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
fc241834
RGS
3130 case 'D': {
3131 long double aldouble;
1109a392
MHM
3132 /* long doubles can have unused bits, which may be nonzero */
3133 Zero(&aldouble, 1, long double);
92d41999
JH
3134 while (len-- > 0) {
3135 fromstr = NEXTFROM;
3136 aldouble = (long double)SvNV(fromstr);
1109a392 3137 DO_BO_PACK_N(aldouble, long double);
fc241834 3138 PUSH_VAR(utf8, cur, aldouble);
92d41999
JH
3139 }
3140 break;
fc241834 3141 }
92d41999 3142#endif
7212898e 3143#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3144 case 'n' | TYPE_IS_SHRIEKING:
7212898e 3145#endif
a6ec74c1
JH
3146 case 'n':
3147 while (len-- > 0) {
f337b084 3148 I16 ai16;
a6ec74c1 3149 fromstr = NEXTFROM;
ef108786 3150 ai16 = (I16)SvIV(fromstr);
a6ec74c1 3151#ifdef HAS_HTONS
ef108786 3152 ai16 = PerlSock_htons(ai16);
a6ec74c1 3153#endif
f337b084 3154 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3155 }
3156 break;
7212898e 3157#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3158 case 'v' | TYPE_IS_SHRIEKING:
7212898e 3159#endif
a6ec74c1
JH
3160 case 'v':
3161 while (len-- > 0) {
f337b084 3162 I16 ai16;
a6ec74c1 3163 fromstr = NEXTFROM;
ef108786 3164 ai16 = (I16)SvIV(fromstr);
a6ec74c1 3165#ifdef HAS_HTOVS
ef108786 3166 ai16 = htovs(ai16);
a6ec74c1 3167#endif
f337b084 3168 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3169 }
3170 break;
49704364 3171 case 'S' | TYPE_IS_SHRIEKING:
a6ec74c1 3172#if SHORTSIZE != SIZE16
fc241834 3173 while (len-- > 0) {
f337b084 3174 unsigned short aushort;
fc241834
RGS
3175 fromstr = NEXTFROM;
3176 aushort = SvUV(fromstr);
3177 DO_BO_PACK(aushort, s);
f337b084 3178 PUSH_VAR(utf8, cur, aushort);
fc241834 3179 }
49704364
LW
3180 break;
3181#else
3182 /* Fall through! */
a6ec74c1 3183#endif
49704364 3184 case 'S':
fc241834 3185 while (len-- > 0) {
f337b084 3186 U16 au16;
fc241834
RGS
3187 fromstr = NEXTFROM;
3188 au16 = (U16)SvUV(fromstr);
3189 DO_BO_PACK(au16, 16);
f337b084 3190 PUSH16(utf8, cur, &au16);
a6ec74c1
JH
3191 }
3192 break;
49704364 3193 case 's' | TYPE_IS_SHRIEKING:
a6ec74c1 3194#if SHORTSIZE != SIZE16
fc241834 3195 while (len-- > 0) {
f337b084 3196 short ashort;
fc241834
RGS
3197 fromstr = NEXTFROM;
3198 ashort = SvIV(fromstr);
3199 DO_BO_PACK(ashort, s);
f337b084 3200 PUSH_VAR(utf8, cur, ashort);
a6ec74c1 3201 }
49704364
LW
3202 break;
3203#else
3204 /* Fall through! */
a6ec74c1 3205#endif
49704364
LW
3206 case 's':
3207 while (len-- > 0) {
f337b084 3208 I16 ai16;
49704364 3209 fromstr = NEXTFROM;
ef108786
MHM
3210 ai16 = (I16)SvIV(fromstr);
3211 DO_BO_PACK(ai16, 16);
f337b084 3212 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3213 }
3214 break;
3215 case 'I':
49704364 3216 case 'I' | TYPE_IS_SHRIEKING:
a6ec74c1 3217 while (len-- > 0) {
f337b084 3218 unsigned int auint;
a6ec74c1
JH
3219 fromstr = NEXTFROM;
3220 auint = SvUV(fromstr);
1109a392 3221 DO_BO_PACK(auint, i);
f337b084 3222 PUSH_VAR(utf8, cur, auint);
a6ec74c1
JH
3223 }
3224 break;
92d41999
JH
3225 case 'j':
3226 while (len-- > 0) {
f337b084 3227 IV aiv;
92d41999
JH
3228 fromstr = NEXTFROM;
3229 aiv = SvIV(fromstr);
1109a392
MHM
3230#if IVSIZE == INTSIZE
3231 DO_BO_PACK(aiv, i);
3232#elif IVSIZE == LONGSIZE
3233 DO_BO_PACK(aiv, l);
3234#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
3235 DO_BO_PACK(aiv, 64);
f337b084
TH
3236#else
3237 Perl_croak(aTHX_ "'j' not supported on this platform");
1109a392 3238#endif
f337b084 3239 PUSH_VAR(utf8, cur, aiv);
92d41999
JH
3240 }
3241 break;
3242 case 'J':
3243 while (len-- > 0) {
f337b084 3244 UV auv;
92d41999
JH
3245 fromstr = NEXTFROM;
3246 auv = SvUV(fromstr);
1109a392
MHM
3247#if UVSIZE == INTSIZE
3248 DO_BO_PACK(auv, i);
3249#elif UVSIZE == LONGSIZE
3250 DO_BO_PACK(auv, l);
3251#elif defined(HAS_QUAD) && UVSIZE == U64SIZE
3252 DO_BO_PACK(auv, 64);
f337b084
TH
3253#else
3254 Perl_croak(aTHX_ "'J' not supported on this platform");
1109a392 3255#endif
f337b084 3256 PUSH_VAR(utf8, cur, auv);
92d41999
JH
3257 }
3258 break;
a6ec74c1
JH
3259 case 'w':
3260 while (len-- > 0) {
f337b084 3261 NV anv;
a6ec74c1 3262 fromstr = NEXTFROM;
15e9f109 3263 anv = SvNV(fromstr);
a6ec74c1 3264
f337b084
TH
3265 if (anv < 0) {
3266 *cur = '\0';
b162af07 3267 SvCUR_set(cat, cur - start);
49704364 3268 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
f337b084 3269 }
a6ec74c1 3270
196b62db
NC
3271 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
3272 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
3273 any negative IVs will have already been got by the croak()
3274 above. IOK is untrue for fractions, so we test them
3275 against UV_MAX_P1. */
f337b084
TH
3276 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
3277 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
a6ec74c1 3278 char *in = buf + sizeof(buf);
196b62db 3279 UV auv = SvUV(fromstr);
a6ec74c1
JH
3280
3281 do {
eb160463 3282 *--in = (char)((auv & 0x7f) | 0x80);
a6ec74c1
JH
3283 auv >>= 7;
3284 } while (auv);
3285 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
3286 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3287 in, (buf + sizeof(buf)) - in);
3288 } else if (SvPOKp(fromstr))
3289 goto w_string;
a6ec74c1 3290 else if (SvNOKp(fromstr)) {
0258719b
NC
3291 /* 10**NV_MAX_10_EXP is the largest power of 10
3292 so 10**(NV_MAX_10_EXP+1) is definately unrepresentable
3293 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
3294 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
3295 And with that many bytes only Inf can overflow.
8f8d40ab
PG
3296 Some C compilers are strict about integral constant
3297 expressions so we conservatively divide by a slightly
3298 smaller integer instead of multiplying by the exact
3299 floating-point value.
0258719b
NC
3300 */
3301#ifdef NV_MAX_10_EXP
f337b084 3302 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
8f8d40ab 3303 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
0258719b 3304#else
f337b084 3305 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
8f8d40ab 3306 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
0258719b 3307#endif
a6ec74c1
JH
3308 char *in = buf + sizeof(buf);
3309
8b6e33c7 3310 anv = Perl_floor(anv);
a6ec74c1 3311 do {
8b6e33c7 3312 const NV next = Perl_floor(anv / 128);
a6ec74c1 3313 if (in <= buf) /* this cannot happen ;-) */
49704364 3314 Perl_croak(aTHX_ "Cannot compress integer in pack");
0258719b 3315 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
15e9f109
NC
3316 anv = next;
3317 } while (anv > 0);
a6ec74c1 3318 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
3319 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3320 in, (buf + sizeof(buf)) - in);
3321 } else {
8b6e33c7
AL
3322 const char *from;
3323 char *result, *in;
735b914b
JH
3324 SV *norm;
3325 STRLEN len;
3326 bool done;
3327
f337b084 3328 w_string:
735b914b 3329 /* Copy string and check for compliance */
349d4f2f 3330 from = SvPV_const(fromstr, len);
735b914b 3331 if ((norm = is_an_int(from, len)) == NULL)
49704364 3332 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
735b914b 3333
a02a5408 3334 Newx(result, len, char);
735b914b
JH
3335 in = result + len;
3336 done = FALSE;
f337b084 3337 while (!done) *--in = div128(norm, &done) | 0x80;
735b914b 3338 result[len - 1] &= 0x7F; /* clear continue bit */
f337b084
TH
3339 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3340 in, (result + len) - in);
735b914b
JH
3341 Safefree(result);
3342 SvREFCNT_dec(norm); /* free norm */
fc241834 3343 }
a6ec74c1
JH
3344 }
3345 break;
3346 case 'i':
49704364 3347 case 'i' | TYPE_IS_SHRIEKING:
a6ec74c1 3348 while (len-- > 0) {
f337b084 3349 int aint;
a6ec74c1
JH
3350 fromstr = NEXTFROM;
3351 aint = SvIV(fromstr);
1109a392 3352 DO_BO_PACK(aint, i);
f337b084 3353 PUSH_VAR(utf8, cur, aint);
a6ec74c1
JH
3354 }
3355 break;
7212898e 3356#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3357 case 'N' | TYPE_IS_SHRIEKING:
7212898e 3358#endif
a6ec74c1
JH
3359 case 'N':
3360 while (len-- > 0) {
f337b084 3361 U32 au32;
a6ec74c1 3362 fromstr = NEXTFROM;
ef108786 3363 au32 = SvUV(fromstr);
a6ec74c1 3364#ifdef HAS_HTONL
ef108786 3365 au32 = PerlSock_htonl(au32);
a6ec74c1 3366#endif
f337b084 3367 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3368 }
3369 break;
7212898e 3370#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3371 case 'V' | TYPE_IS_SHRIEKING:
7212898e 3372#endif
a6ec74c1
JH
3373 case 'V':
3374 while (len-- > 0) {
f337b084 3375 U32 au32;
a6ec74c1 3376 fromstr = NEXTFROM;
ef108786 3377 au32 = SvUV(fromstr);
a6ec74c1 3378#ifdef HAS_HTOVL
ef108786 3379 au32 = htovl(au32);
a6ec74c1 3380#endif
f337b084 3381 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3382 }
3383 break;
49704364 3384 case 'L' | TYPE_IS_SHRIEKING:
a6ec74c1 3385#if LONGSIZE != SIZE32
fc241834 3386 while (len-- > 0) {
f337b084 3387 unsigned long aulong;
fc241834
RGS
3388 fromstr = NEXTFROM;
3389 aulong = SvUV(fromstr);
3390 DO_BO_PACK(aulong, l);
f337b084 3391 PUSH_VAR(utf8, cur, aulong);
a6ec74c1 3392 }
49704364
LW
3393 break;
3394#else
3395 /* Fall though! */
a6ec74c1 3396#endif
49704364 3397 case 'L':
fc241834 3398 while (len-- > 0) {
f337b084 3399 U32 au32;
fc241834
RGS
3400 fromstr = NEXTFROM;
3401 au32 = SvUV(fromstr);
3402 DO_BO_PACK(au32, 32);
f337b084 3403 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3404 }
3405 break;
49704364 3406 case 'l' | TYPE_IS_SHRIEKING:
a6ec74c1 3407#if LONGSIZE != SIZE32
fc241834 3408 while (len-- > 0) {
f337b084 3409 long along;
fc241834
RGS
3410 fromstr = NEXTFROM;
3411 along = SvIV(fromstr);
3412 DO_BO_PACK(along, l);
f337b084 3413 PUSH_VAR(utf8, cur, along);
a6ec74c1 3414 }
49704364
LW
3415 break;
3416#else
3417 /* Fall though! */
a6ec74c1 3418#endif
49704364
LW
3419 case 'l':
3420 while (len-- > 0) {
f337b084 3421 I32 ai32;
49704364 3422 fromstr = NEXTFROM;
ef108786
MHM
3423 ai32 = SvIV(fromstr);
3424 DO_BO_PACK(ai32, 32);
f337b084 3425 PUSH32(utf8, cur, &ai32);
a6ec74c1
JH
3426 }
3427 break;
3428#ifdef HAS_QUAD
3429 case 'Q':
3430 while (len-- > 0) {
f337b084 3431 Uquad_t auquad;
a6ec74c1 3432 fromstr = NEXTFROM;
f337b084 3433 auquad = (Uquad_t) SvUV(fromstr);
1109a392 3434 DO_BO_PACK(auquad, 64);
f337b084 3435 PUSH_VAR(utf8, cur, auquad);
a6ec74c1
JH
3436 }
3437 break;
3438 case 'q':
3439 while (len-- > 0) {
f337b084 3440 Quad_t aquad;
a6ec74c1
JH
3441 fromstr = NEXTFROM;
3442 aquad = (Quad_t)SvIV(fromstr);
1109a392 3443 DO_BO_PACK(aquad, 64);
f337b084 3444 PUSH_VAR(utf8, cur, aquad);
a6ec74c1
JH
3445 }
3446 break;
f337b084 3447#endif /* HAS_QUAD */
a6ec74c1
JH
3448 case 'P':
3449 len = 1; /* assume SV is correct length */
f337b084 3450 GROWING(utf8, cat, start, cur, sizeof(char *));
49704364 3451 /* Fall through! */
a6ec74c1
JH
3452 case 'p':
3453 while (len-- > 0) {
83003860 3454 const char *aptr;
f337b084 3455
a6ec74c1 3456 fromstr = NEXTFROM;
28a4f200 3457 SvGETMAGIC(fromstr);
3458 if (!SvOK(fromstr)) aptr = NULL;
a6ec74c1 3459 else {
a6ec74c1
JH
3460 /* XXX better yet, could spirit away the string to
3461 * a safe spot and hang on to it until the result
3462 * of pack() (and all copies of the result) are
3463 * gone.
3464 */
041457d9
DM
3465 if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
3466 !SvREADONLY(fromstr))) && ckWARN(WARN_PACK)) {
9014280d 3467 Perl_warner(aTHX_ packWARN(WARN_PACK),
fc241834 3468 "Attempt to pack pointer to temporary value");
a6ec74c1
JH
3469 }
3470 if (SvPOK(fromstr) || SvNIOK(fromstr))
2596d9fe 3471 aptr = SvPV_nomg_const_nolen(fromstr);
a6ec74c1 3472 else
2596d9fe 3473 aptr = SvPV_force_flags_nolen(fromstr, 0);
a6ec74c1 3474 }
07409e01 3475 DO_BO_PACK_PC(aptr);
f337b084 3476 PUSH_VAR(utf8, cur, aptr);
a6ec74c1
JH
3477 }
3478 break;
fc241834 3479 case 'u': {
f7fe979e 3480 const char *aptr, *aend;
fc241834 3481 bool from_utf8;
f337b084 3482
a6ec74c1 3483 fromstr = NEXTFROM;
fc241834
RGS
3484 if (len <= 2) len = 45;
3485 else len = len / 3 * 3;
3486 if (len >= 64) {
0fdc08b6
RGS
3487 if (ckWARN(WARN_PACK))
3488 Perl_warner(aTHX_ packWARN(WARN_PACK),
fc241834
RGS
3489 "Field too wide in 'u' format in pack");
3490 len = 63;
3491 }
83003860 3492 aptr = SvPV_const(fromstr, fromlen);
fc241834
RGS
3493 from_utf8 = DO_UTF8(fromstr);
3494 if (from_utf8) {
3495 aend = aptr + fromlen;
3496 fromlen = sv_len_utf8(fromstr);
3497 } else aend = NULL; /* Unused, but keep compilers happy */
3498 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
a6ec74c1 3499 while (fromlen > 0) {
fc241834 3500 U8 *end;
a6ec74c1 3501 I32 todo;
fc241834 3502 U8 hunk[1+63/3*4+1];
a6ec74c1 3503
eb160463 3504 if ((I32)fromlen > len)
a6ec74c1
JH
3505 todo = len;
3506 else
3507 todo = fromlen;
fc241834
RGS
3508 if (from_utf8) {
3509 char buffer[64];
3510 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3511 'u' | TYPE_IS_PACK)) {
3512 *cur = '\0';
b162af07 3513 SvCUR_set(cat, cur - start);