This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Re: Compress::Zlib, pack "C" and utf-8 [PATCH]
[perl5.git] / pp_pack.c
CommitLineData
a6ec74c1
JH
1/* pp_pack.c
2 *
4bb101f2 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
acde74e1 4 * 2000, 2001, 2002, 2003, 2004, 2005, 2006, by Larry Wall and others
a6ec74c1
JH
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
9 */
10
d31a8517
AT
11/*
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
16 * some salt.
17 */
18
166f8a29
DM
19/* This file contains pp ("push/pop") functions that
20 * execute the opcodes that make up a perl program. A typical pp function
21 * expects to find its arguments on the stack, and usually pushes its
22 * results onto the stack, hence the 'pp' terminology. Each OP structure
23 * contains a pointer to the relevant pp_foo() function.
24 *
25 * This particular file just contains pp_pack() and pp_unpack(). See the
26 * other pp*.c files for the rest of the pp_ functions.
27 */
28
a6ec74c1
JH
29#include "EXTERN.h"
30#define PERL_IN_PP_PACK_C
31#include "perl.h"
32
f7fe979e
AL
33/* Types used by pack/unpack */
34typedef enum {
35 e_no_len, /* no length */
36 e_number, /* number, [] */
37 e_star /* asterisk */
38} howlen_t;
39
40typedef struct tempsym {
41 const char* patptr; /* current template char */
42 const char* patend; /* one after last char */
43 const char* grpbeg; /* 1st char of ()-group */
44 const char* grpend; /* end of ()-group */
45 I32 code; /* template code (!<>) */
46 I32 length; /* length/repeat count */
47 howlen_t howlen; /* how length is given */
48 int level; /* () nesting level */
49 U32 flags; /* /=4, comma=2, pack=1 */
50 /* and group modifiers */
51 STRLEN strbeg; /* offset of group start */
52 struct tempsym *previous; /* previous group */
53} tempsym_t;
54
55#define TEMPSYM_INIT(symptr, p, e, f) \
56 STMT_START { \
57 (symptr)->patptr = (p); \
58 (symptr)->patend = (e); \
59 (symptr)->grpbeg = NULL; \
60 (symptr)->grpend = NULL; \
61 (symptr)->grpend = NULL; \
62 (symptr)->code = 0; \
63 (symptr)->length = 0; \
10edeb5d 64 (symptr)->howlen = e_no_len; \
f7fe979e
AL
65 (symptr)->level = 0; \
66 (symptr)->flags = (f); \
67 (symptr)->strbeg = 0; \
68 (symptr)->previous = NULL; \
69 } STMT_END
70
7212898e 71#if PERL_VERSION >= 9
f337b084
TH
72# define PERL_PACK_CAN_BYTEORDER
73# define PERL_PACK_CAN_SHRIEKSIGN
74#endif
75
76#ifndef CHAR_BIT
77# define CHAR_BIT 8
7212898e 78#endif
3473cf63
RGS
79/* Maximum number of bytes to which a byte can grow due to upgrade */
80#define UTF8_EXPAND 2
7212898e 81
a6ec74c1 82/*
a6ec74c1
JH
83 * Offset for integer pack/unpack.
84 *
85 * On architectures where I16 and I32 aren't really 16 and 32 bits,
86 * which for now are all Crays, pack and unpack have to play games.
87 */
88
89/*
90 * These values are required for portability of pack() output.
91 * If they're not right on your machine, then pack() and unpack()
92 * wouldn't work right anyway; you'll need to apply the Cray hack.
93 * (I'd like to check them with #if, but you can't use sizeof() in
94 * the preprocessor.) --???
95 */
96/*
97 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
98 defines are now in config.h. --Andy Dougherty April 1998
99 */
100#define SIZE16 2
101#define SIZE32 4
102
103/* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
104 --jhi Feb 1999 */
105
1109a392
MHM
106#if U16SIZE > SIZE16 || U32SIZE > SIZE32
107# if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
08ca2aa3
TH
108# define OFF16(p) ((char*)(p))
109# define OFF32(p) ((char*)(p))
a6ec74c1 110# else
1109a392 111# if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
a6ec74c1
JH
112# define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
113# define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
114# else
08ca2aa3 115 ++++ bad cray byte order
a6ec74c1
JH
116# endif
117# endif
a6ec74c1 118#else
08ca2aa3
TH
119# define OFF16(p) ((char *) (p))
120# define OFF32(p) ((char *) (p))
a6ec74c1
JH
121#endif
122
f337b084
TH
123/* Only to be used inside a loop (see the break) */
124#define SHIFT16(utf8, s, strend, p, datumtype) STMT_START { \
125 if (utf8) { \
126 if (!uni_to_bytes(aTHX_ &(s), strend, OFF16(p), SIZE16, datumtype)) break; \
127 } else { \
128 Copy(s, OFF16(p), SIZE16, char); \
129 (s) += SIZE16; \
130 } \
131} STMT_END
132
133/* Only to be used inside a loop (see the break) */
134#define SHIFT32(utf8, s, strend, p, datumtype) STMT_START { \
135 if (utf8) { \
136 if (!uni_to_bytes(aTHX_ &(s), strend, OFF32(p), SIZE32, datumtype)) break; \
137 } else { \
138 Copy(s, OFF32(p), SIZE32, char); \
139 (s) += SIZE32; \
140 } \
141} STMT_END
142
143#define PUSH16(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF16(p), SIZE16)
144#define PUSH32(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF32(p), SIZE32)
08ca2aa3
TH
145
146/* Only to be used inside a loop (see the break) */
f337b084 147#define SHIFT_VAR(utf8, s, strend, var, datumtype) \
08ca2aa3
TH
148STMT_START { \
149 if (utf8) { \
f337b084
TH
150 if (!uni_to_bytes(aTHX_ &s, strend, \
151 (char *) &var, sizeof(var), datumtype)) break;\
08ca2aa3
TH
152 } else { \
153 Copy(s, (char *) &var, sizeof(var), char); \
154 s += sizeof(var); \
155 } \
08ca2aa3
TH
156} STMT_END
157
f337b084 158#define PUSH_VAR(utf8, aptr, var) \
230e1fce 159 PUSH_BYTES(utf8, aptr, &(var), sizeof(var))
f337b084 160
49704364
LW
161/* Avoid stack overflow due to pathological templates. 100 should be plenty. */
162#define MAX_SUB_TEMPLATE_LEVEL 100
163
66c611c5 164/* flags (note that type modifiers can also be used as flags!) */
f337b084
TH
165#define FLAG_WAS_UTF8 0x40
166#define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
49704364 167#define FLAG_UNPACK_ONLY_ONE 0x10
f337b084 168#define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
49704364
LW
169#define FLAG_SLASH 0x04
170#define FLAG_COMMA 0x02
171#define FLAG_PACK 0x01
172
a6ec74c1
JH
173STATIC SV *
174S_mul128(pTHX_ SV *sv, U8 m)
175{
176 STRLEN len;
177 char *s = SvPV(sv, len);
178 char *t;
a6ec74c1
JH
179
180 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
396482e1 181 SV * const tmpNew = newSVpvs("0000000000");
a6ec74c1
JH
182
183 sv_catsv(tmpNew, sv);
184 SvREFCNT_dec(sv); /* free old sv */
185 sv = tmpNew;
186 s = SvPV(sv, len);
187 }
188 t = s + len - 1;
189 while (!*t) /* trailing '\0'? */
190 t--;
191 while (t > s) {
f7fe979e 192 const U32 i = ((*t - '0') << 7) + m;
eb160463
GS
193 *(t--) = '0' + (char)(i % 10);
194 m = (char)(i / 10);
a6ec74c1
JH
195 }
196 return (sv);
197}
198
199/* Explosives and implosives. */
200
201#if 'I' == 73 && 'J' == 74
202/* On an ASCII/ISO kind of system */
203#define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
204#else
205/*
206 Some other sort of character set - use memchr() so we don't match
207 the null byte.
208 */
209#define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
210#endif
211
66c611c5 212/* type modifiers */
62f95557 213#define TYPE_IS_SHRIEKING 0x100
1109a392
MHM
214#define TYPE_IS_BIG_ENDIAN 0x200
215#define TYPE_IS_LITTLE_ENDIAN 0x400
f337b084 216#define TYPE_IS_PACK 0x800
1109a392 217#define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
66c611c5 218#define TYPE_MODIFIERS(t) ((t) & ~0xFF)
1109a392
MHM
219#define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
220
7212898e 221#ifdef PERL_PACK_CAN_SHRIEKSIGN
28be1210 222# define SHRIEKING_ALLOWED_TYPES "sSiIlLxXnNvV@."
7212898e 223#else
28be1210 224# define SHRIEKING_ALLOWED_TYPES "sSiIlLxX"
7212898e
NC
225#endif
226
227#ifndef PERL_PACK_CAN_BYTEORDER
228/* Put "can't" first because it is shorter */
229# define TYPE_ENDIANNESS(t) 0
230# define TYPE_NO_ENDIANNESS(t) (t)
231
232# define ENDIANNESS_ALLOWED_TYPES ""
233
234# define DO_BO_UNPACK(var, type)
235# define DO_BO_PACK(var, type)
07409e01
NC
236# define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast)
237# define DO_BO_PACK_PTR(var, type, pre_cast, post_cast)
7212898e
NC
238# define DO_BO_UNPACK_N(var, type)
239# define DO_BO_PACK_N(var, type)
240# define DO_BO_UNPACK_P(var)
241# define DO_BO_PACK_P(var)
45f723e1
NC
242# define DO_BO_UNPACK_PC(var)
243# define DO_BO_PACK_PC(var)
66c611c5 244
f337b084 245#else /* PERL_PACK_CAN_BYTEORDER */
7212898e
NC
246
247# define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
248# define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
249
250# define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
251
252# define DO_BO_UNPACK(var, type) \
1109a392 253 STMT_START { \
66c611c5 254 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
255 case TYPE_IS_BIG_ENDIAN: var = my_betoh ## type (var); break; \
256 case TYPE_IS_LITTLE_ENDIAN: var = my_letoh ## type (var); break; \
257 default: break; \
258 } \
259 } STMT_END
260
7212898e 261# define DO_BO_PACK(var, type) \
1109a392 262 STMT_START { \
66c611c5 263 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
264 case TYPE_IS_BIG_ENDIAN: var = my_htobe ## type (var); break; \
265 case TYPE_IS_LITTLE_ENDIAN: var = my_htole ## type (var); break; \
266 default: break; \
267 } \
268 } STMT_END
269
07409e01 270# define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast) \
1109a392 271 STMT_START { \
66c611c5 272 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392 273 case TYPE_IS_BIG_ENDIAN: \
07409e01 274 var = (post_cast*) my_betoh ## type ((pre_cast) var); \
1109a392
MHM
275 break; \
276 case TYPE_IS_LITTLE_ENDIAN: \
07409e01 277 var = (post_cast *) my_letoh ## type ((pre_cast) var); \
1109a392
MHM
278 break; \
279 default: \
280 break; \
281 } \
282 } STMT_END
283
07409e01 284# define DO_BO_PACK_PTR(var, type, pre_cast, post_cast) \
1109a392 285 STMT_START { \
66c611c5 286 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392 287 case TYPE_IS_BIG_ENDIAN: \
07409e01 288 var = (post_cast *) my_htobe ## type ((pre_cast) var); \
1109a392
MHM
289 break; \
290 case TYPE_IS_LITTLE_ENDIAN: \
07409e01 291 var = (post_cast *) my_htole ## type ((pre_cast) var); \
1109a392
MHM
292 break; \
293 default: \
294 break; \
295 } \
296 } STMT_END
297
7212898e 298# define BO_CANT_DOIT(action, type) \
66c611c5
MHM
299 STMT_START { \
300 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
301 case TYPE_IS_BIG_ENDIAN: \
302 Perl_croak(aTHX_ "Can't %s big-endian %ss on this " \
303 "platform", #action, #type); \
304 break; \
305 case TYPE_IS_LITTLE_ENDIAN: \
306 Perl_croak(aTHX_ "Can't %s little-endian %ss on this " \
307 "platform", #action, #type); \
308 break; \
309 default: \
310 break; \
311 } \
312 } STMT_END
313
7212898e 314# if PTRSIZE == INTSIZE
07409e01
NC
315# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, i, int, void)
316# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, i, int, void)
317# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, i, int, char)
318# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, i, int, char)
7212898e 319# elif PTRSIZE == LONGSIZE
07409e01
NC
320# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, long, void)
321# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, long, void)
322# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, long, char)
323# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, long, char)
2b00a750
JD
324# elif PTRSIZE == IVSIZE
325# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, IV, void)
326# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, IV, void)
327# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, IV, char)
328# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, IV, char)
7212898e
NC
329# else
330# define DO_BO_UNPACK_P(var) BO_CANT_DOIT(unpack, pointer)
331# define DO_BO_PACK_P(var) BO_CANT_DOIT(pack, pointer)
45f723e1
NC
332# define DO_BO_UNPACK_PC(var) BO_CANT_DOIT(unpack, pointer)
333# define DO_BO_PACK_PC(var) BO_CANT_DOIT(pack, pointer)
7212898e 334# endif
1109a392 335
7212898e 336# if defined(my_htolen) && defined(my_letohn) && \
1109a392 337 defined(my_htoben) && defined(my_betohn)
7212898e 338# define DO_BO_UNPACK_N(var, type) \
1109a392 339 STMT_START { \
66c611c5 340 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
341 case TYPE_IS_BIG_ENDIAN: my_betohn(&var, sizeof(type)); break;\
342 case TYPE_IS_LITTLE_ENDIAN: my_letohn(&var, sizeof(type)); break;\
343 default: break; \
344 } \
345 } STMT_END
346
7212898e 347# define DO_BO_PACK_N(var, type) \
1109a392 348 STMT_START { \
66c611c5 349 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
350 case TYPE_IS_BIG_ENDIAN: my_htoben(&var, sizeof(type)); break;\
351 case TYPE_IS_LITTLE_ENDIAN: my_htolen(&var, sizeof(type)); break;\
352 default: break; \
353 } \
354 } STMT_END
7212898e
NC
355# else
356# define DO_BO_UNPACK_N(var, type) BO_CANT_DOIT(unpack, type)
357# define DO_BO_PACK_N(var, type) BO_CANT_DOIT(pack, type)
358# endif
359
f337b084 360#endif /* PERL_PACK_CAN_BYTEORDER */
62f95557 361
78d46eaa 362#define PACK_SIZE_CANNOT_CSUM 0x80
f337b084 363#define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
78d46eaa
NC
364#define PACK_SIZE_MASK 0x3F
365
78d46eaa
NC
366/* These tables are regenerated by genpacksizetables.pl (and then hand pasted
367 in). You're unlikely ever to need to regenerate them. */
a7a3cfaa
TH
368
369#if TYPE_IS_SHRIEKING != 0x100
370 ++++shriek offset should be 256
371#endif
372
373typedef U8 packprops_t;
78d46eaa
NC
374#if 'J'-'I' == 1
375/* ASCII */
f43a9a31 376STATIC const packprops_t packprops[512] = {
a7a3cfaa
TH
377 /* normal */
378 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
379 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
380 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
381 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
382 0, 0, 0,
1651fc44 383 /* C */ sizeof(unsigned char),
80a13697 384#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
a7a3cfaa 385 /* D */ LONG_DOUBLESIZE,
80a13697 386#else
a7a3cfaa 387 0,
80a13697 388#endif
a7a3cfaa
TH
389 0,
390 /* F */ NVSIZE,
391 0, 0,
392 /* I */ sizeof(unsigned int),
393 /* J */ UVSIZE,
394 0,
395 /* L */ SIZE32,
396 0,
397 /* N */ SIZE32,
398 0, 0,
80a13697 399#if defined(HAS_QUAD)
a7a3cfaa 400 /* Q */ sizeof(Uquad_t),
80a13697 401#else
a7a3cfaa 402 0,
80a13697 403#endif
a7a3cfaa
TH
404 0,
405 /* S */ SIZE16,
406 0,
407 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
408 /* V */ SIZE32,
409 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
410 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
411 /* c */ sizeof(char),
412 /* d */ sizeof(double),
413 0,
414 /* f */ sizeof(float),
415 0, 0,
416 /* i */ sizeof(int),
417 /* j */ IVSIZE,
418 0,
419 /* l */ SIZE32,
420 0,
421 /* n */ SIZE16,
422 0,
423 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
80a13697 424#if defined(HAS_QUAD)
a7a3cfaa 425 /* q */ sizeof(Quad_t),
80a13697 426#else
a7a3cfaa 427 0,
80a13697 428#endif
a7a3cfaa
TH
429 0,
430 /* s */ SIZE16,
431 0, 0,
432 /* v */ SIZE16,
433 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
434 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
435 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
436 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
437 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
438 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
439 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
440 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
441 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
442 0, 0, 0, 0, 0, 0, 0, 0,
443 /* shrieking */
444 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
445 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
446 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
447 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
448 0, 0, 0, 0, 0, 0, 0, 0, 0,
449 /* I */ sizeof(unsigned int),
450 0, 0,
451 /* L */ sizeof(unsigned long),
452 0,
7212898e 453#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 454 /* N */ SIZE32,
7212898e 455#else
a7a3cfaa 456 0,
7212898e 457#endif
a7a3cfaa
TH
458 0, 0, 0, 0,
459 /* S */ sizeof(unsigned short),
460 0, 0,
7212898e 461#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 462 /* V */ SIZE32,
7212898e 463#else
a7a3cfaa 464 0,
7212898e 465#endif
a7a3cfaa
TH
466 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
467 0, 0,
468 /* i */ sizeof(int),
469 0, 0,
470 /* l */ sizeof(long),
471 0,
7212898e 472#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 473 /* n */ SIZE16,
7212898e 474#else
a7a3cfaa 475 0,
7212898e 476#endif
a7a3cfaa
TH
477 0, 0, 0, 0,
478 /* s */ sizeof(short),
479 0, 0,
7212898e 480#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 481 /* v */ SIZE16,
7212898e 482#else
a7a3cfaa 483 0,
7212898e 484#endif
a7a3cfaa
TH
485 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
486 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
487 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
488 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
489 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
490 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
491 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
492 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
493 0, 0, 0, 0, 0, 0, 0, 0, 0
78d46eaa
NC
494};
495#else
496/* EBCDIC (or bust) */
f43a9a31 497STATIC const packprops_t packprops[512] = {
a7a3cfaa
TH
498 /* normal */
499 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
500 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
501 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
502 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
503 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
504 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
505 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
506 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
507 0, 0, 0,
508 /* c */ sizeof(char),
509 /* d */ sizeof(double),
510 0,
511 /* f */ sizeof(float),
512 0, 0,
513 /* i */ sizeof(int),
514 0, 0, 0, 0, 0, 0, 0,
515 /* j */ IVSIZE,
516 0,
517 /* l */ SIZE32,
518 0,
519 /* n */ SIZE16,
520 0,
521 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
80a13697 522#if defined(HAS_QUAD)
a7a3cfaa 523 /* q */ sizeof(Quad_t),
80a13697 524#else
a7a3cfaa 525 0,
80a13697 526#endif
a7a3cfaa
TH
527 0, 0, 0, 0, 0, 0, 0, 0, 0,
528 /* s */ SIZE16,
529 0, 0,
530 /* v */ SIZE16,
531 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
532 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
533 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1651fc44 534 /* C */ sizeof(unsigned char),
80a13697 535#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
a7a3cfaa 536 /* D */ LONG_DOUBLESIZE,
80a13697 537#else
a7a3cfaa 538 0,
80a13697 539#endif
a7a3cfaa
TH
540 0,
541 /* F */ NVSIZE,
542 0, 0,
543 /* I */ sizeof(unsigned int),
544 0, 0, 0, 0, 0, 0, 0,
545 /* J */ UVSIZE,
546 0,
547 /* L */ SIZE32,
548 0,
549 /* N */ SIZE32,
550 0, 0,
80a13697 551#if defined(HAS_QUAD)
a7a3cfaa 552 /* Q */ sizeof(Uquad_t),
80a13697 553#else
a7a3cfaa 554 0,
80a13697 555#endif
a7a3cfaa
TH
556 0, 0, 0, 0, 0, 0, 0, 0, 0,
557 /* S */ SIZE16,
558 0,
559 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
560 /* V */ SIZE32,
561 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
562 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
563 0, 0, 0, 0, 0, 0, 0, 0, 0,
564 /* shrieking */
565 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
566 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
567 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
568 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
569 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
570 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
571 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
572 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
573 0, 0, 0, 0, 0, 0, 0, 0, 0,
574 /* i */ sizeof(int),
575 0, 0, 0, 0, 0, 0, 0, 0, 0,
576 /* l */ sizeof(long),
577 0,
7212898e 578#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 579 /* n */ SIZE16,
7212898e 580#else
a7a3cfaa 581 0,
7212898e 582#endif
a7a3cfaa
TH
583 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
584 /* s */ sizeof(short),
585 0, 0,
7212898e 586#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 587 /* v */ SIZE16,
7212898e 588#else
a7a3cfaa 589 0,
7212898e 590#endif
a7a3cfaa
TH
591 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
592 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
593 0, 0, 0,
594 /* I */ sizeof(unsigned int),
595 0, 0, 0, 0, 0, 0, 0, 0, 0,
596 /* L */ sizeof(unsigned long),
597 0,
7212898e 598#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 599 /* N */ SIZE32,
7212898e 600#else
a7a3cfaa 601 0,
7212898e 602#endif
a7a3cfaa
TH
603 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
604 /* S */ sizeof(unsigned short),
605 0, 0,
7212898e 606#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 607 /* V */ SIZE32,
7212898e 608#else
a7a3cfaa 609 0,
7212898e 610#endif
a7a3cfaa
TH
611 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
612 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
78d46eaa
NC
613};
614#endif
615
08ca2aa3 616STATIC U8
f7fe979e 617uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
08ca2aa3 618{
08ca2aa3 619 STRLEN retlen;
0bcc34c2 620 UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
f337b084 621 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
08ca2aa3
TH
622 /* We try to process malformed UTF-8 as much as possible (preferrably with
623 warnings), but these two mean we make no progress in the string and
624 might enter an infinite loop */
625 if (retlen == (STRLEN) -1 || retlen == 0)
f337b084
TH
626 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
627 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3 628 if (val >= 0x100) {
f337b084 629 if (ckWARN(WARN_UNPACK))
08ca2aa3
TH
630 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
631 "Character in '%c' format wrapped in unpack",
f337b084 632 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3
TH
633 val &= 0xff;
634 }
635 *s += retlen;
fe2774ed 636 return (U8)val;
08ca2aa3
TH
637}
638
f337b084
TH
639#define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
640 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
08ca2aa3
TH
641 *(U8 *)(s)++)
642
643STATIC bool
f7fe979e 644uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
08ca2aa3
TH
645{
646 UV val;
647 STRLEN retlen;
f7fe979e 648 const char *from = *s;
08ca2aa3 649 int bad = 0;
f7fe979e 650 const U32 flags = ckWARN(WARN_UTF8) ?
08ca2aa3
TH
651 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
652 for (;buf_len > 0; buf_len--) {
653 if (from >= end) return FALSE;
f337b084 654 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
08ca2aa3
TH
655 if (retlen == (STRLEN) -1 || retlen == 0) {
656 from += UTF8SKIP(from);
657 bad |= 1;
658 } else from += retlen;
659 if (val >= 0x100) {
660 bad |= 2;
661 val &= 0xff;
662 }
fe2774ed 663 *(U8 *)buf++ = (U8)val;
08ca2aa3
TH
664 }
665 /* We have enough characters for the buffer. Did we have problems ? */
666 if (bad) {
667 if (bad & 1) {
668 /* Rewalk the string fragment while warning */
f7fe979e 669 const char *ptr;
9e27e96a 670 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
08ca2aa3
TH
671 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
672 if (ptr >= end) break;
f337b084 673 utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
08ca2aa3
TH
674 }
675 if (from > end) from = end;
676 }
677 if ((bad & 2) && ckWARN(WARN_UNPACK))
fc241834 678 Perl_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
f337b084
TH
679 WARN_PACK : WARN_UNPACK),
680 "Character(s) in '%c' format wrapped in %s",
fc241834 681 (int) TYPE_NO_MODIFIERS(datumtype),
f337b084 682 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
08ca2aa3
TH
683 }
684 *s = from;
685 return TRUE;
686}
687
688STATIC bool
f7fe979e 689next_uni_uu(pTHX_ const char **s, const char *end, I32 *out)
08ca2aa3 690{
97aff369 691 dVAR;
08ca2aa3 692 STRLEN retlen;
0bcc34c2 693 const UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
08ca2aa3
TH
694 if (val >= 0x100 || !ISUUCHAR(val) ||
695 retlen == (STRLEN) -1 || retlen == 0) {
696 *out = 0;
697 return FALSE;
698 }
699 *out = PL_uudmap[val] & 077;
f337b084 700 *s += retlen;
08ca2aa3
TH
701 return TRUE;
702}
78d46eaa 703
64844641 704STATIC char *
14333449 705S_bytes_to_uni(const U8 *start, STRLEN len, char *dest) {
0bcc34c2 706 const U8 * const end = start + len;
64844641 707
f337b084 708 while (start < end) {
48fa4626
ST
709 const UV uv = NATIVE_TO_ASCII(*start);
710 if (UNI_IS_INVARIANT(uv))
711 *dest++ = (char)(U8)UTF_TO_NATIVE(uv);
712 else {
713 *dest++ = (char)(U8)UTF8_EIGHT_BIT_HI(uv);
714 *dest++ = (char)(U8)UTF8_EIGHT_BIT_LO(uv);
f337b084
TH
715 }
716 start++;
717 }
64844641 718 return dest;
f337b084
TH
719}
720
230e1fce
NC
721#define PUSH_BYTES(utf8, cur, buf, len) \
722STMT_START { \
64844641
AL
723 if (utf8) \
724 (cur) = bytes_to_uni((U8 *) buf, len, (cur)); \
230e1fce
NC
725 else { \
726 Copy(buf, cur, len, char); \
727 (cur) += (len); \
728 } \
f337b084
TH
729} STMT_END
730
731#define GROWING(utf8, cat, start, cur, in_len) \
732STMT_START { \
733 STRLEN glen = (in_len); \
3473cf63 734 if (utf8) glen *= UTF8_EXPAND; \
f337b084 735 if ((cur) + glen >= (start) + SvLEN(cat)) { \
0bd48802 736 (start) = sv_exp_grow(cat, glen); \
f337b084
TH
737 (cur) = (start) + SvCUR(cat); \
738 } \
739} STMT_END
740
741#define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
742STMT_START { \
f7fe979e 743 const STRLEN glen = (in_len); \
f337b084 744 STRLEN gl = glen; \
3473cf63 745 if (utf8) gl *= UTF8_EXPAND; \
f337b084
TH
746 if ((cur) + gl >= (start) + SvLEN(cat)) { \
747 *cur = '\0'; \
b162af07 748 SvCUR_set((cat), (cur) - (start)); \
0bd48802 749 (start) = sv_exp_grow(cat, gl); \
f337b084
TH
750 (cur) = (start) + SvCUR(cat); \
751 } \
752 PUSH_BYTES(utf8, cur, buf, glen); \
753} STMT_END
754
755#define PUSH_BYTE(utf8, s, byte) \
756STMT_START { \
757 if (utf8) { \
f7fe979e 758 const U8 au8 = (byte); \
64844641 759 (s) = bytes_to_uni(&au8, 1, (s)); \
f337b084
TH
760 } else *(U8 *)(s)++ = (byte); \
761} STMT_END
762
763/* Only to be used inside a loop (see the break) */
764#define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
765STMT_START { \
766 STRLEN retlen; \
767 if (str >= end) break; \
768 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
769 if (retlen == (STRLEN) -1 || retlen == 0) { \
770 *cur = '\0'; \
771 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
772 } \
773 str += retlen; \
774} STMT_END
775
f7fe979e
AL
776static const char *_action( const tempsym_t* symptr )
777{
10edeb5d 778 return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
f7fe979e
AL
779}
780
206947d2 781/* Returns the sizeof() struct described by pat */
028d1f6d 782STATIC I32
f337b084 783S_measure_struct(pTHX_ tempsym_t* symptr)
206947d2 784{
f337b084 785 I32 total = 0;
206947d2 786
49704364 787 while (next_symbol(symptr)) {
f337b084 788 I32 len;
f7fe979e 789 int size;
f337b084
TH
790
791 switch (symptr->howlen) {
fc241834 792 case e_star:
49704364 793 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
f7fe979e 794 _action( symptr ) );
49704364 795 break;
f337b084
TH
796 default:
797 /* e_no_len and e_number */
798 len = symptr->length;
799 break;
49704364
LW
800 }
801
a7a3cfaa 802 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
80a13697 803 if (!size) {
f7fe979e 804 int star;
80a13697
NC
805 /* endianness doesn't influence the size of a type */
806 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
807 default:
808 Perl_croak(aTHX_ "Invalid type '%c' in %s",
809 (int)TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 810 _action( symptr ) );
28be1210
TH
811#ifdef PERL_PACK_CAN_SHRIEKSIGN
812 case '.' | TYPE_IS_SHRIEKING:
813 case '@' | TYPE_IS_SHRIEKING:
814#endif
80a13697 815 case '@':
28be1210 816 case '.':
80a13697
NC
817 case '/':
818 case 'U': /* XXXX Is it correct? */
819 case 'w':
820 case 'u':
821 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
28be1210 822 (int) TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 823 _action( symptr ) );
80a13697
NC
824 case '%':
825 size = 0;
826 break;
827 case '(':
fc241834
RGS
828 {
829 tempsym_t savsym = *symptr;
830 symptr->patptr = savsym.grpbeg;
831 symptr->patend = savsym.grpend;
832 /* XXXX Theoretically, we need to measure many times at
833 different positions, since the subexpression may contain
834 alignment commands, but be not of aligned length.
835 Need to detect this and croak(). */
836 size = measure_struct(symptr);
837 *symptr = savsym;
838 break;
839 }
80a13697
NC
840 case 'X' | TYPE_IS_SHRIEKING:
841 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
842 */
843 if (!len) /* Avoid division by 0 */
844 len = 1;
845 len = total % len; /* Assumed: the start is aligned. */
846 /* FALL THROUGH */
847 case 'X':
848 size = -1;
849 if (total < len)
f7fe979e 850 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
80a13697
NC
851 break;
852 case 'x' | TYPE_IS_SHRIEKING:
853 if (!len) /* Avoid division by 0 */
854 len = 1;
855 star = total % len; /* Assumed: the start is aligned. */
856 if (star) /* Other portable ways? */
857 len = len - star;
858 else
859 len = 0;
860 /* FALL THROUGH */
861 case 'x':
862 case 'A':
863 case 'Z':
864 case 'a':
80a13697
NC
865 size = 1;
866 break;
867 case 'B':
868 case 'b':
869 len = (len + 7)/8;
870 size = 1;
871 break;
872 case 'H':
873 case 'h':
874 len = (len + 1)/2;
875 size = 1;
876 break;
78d46eaa 877
80a13697
NC
878 case 'P':
879 len = 1;
880 size = sizeof(char*);
78d46eaa
NC
881 break;
882 }
206947d2
IZ
883 }
884 total += len * size;
885 }
886 return total;
887}
888
49704364
LW
889
890/* locate matching closing parenthesis or bracket
891 * returns char pointer to char after match, or NULL
892 */
f7fe979e
AL
893STATIC const char *
894S_group_end(pTHX_ register const char *patptr, register const char *patend, char ender)
18529408 895{
49704364 896 while (patptr < patend) {
f7fe979e 897 const char c = *patptr++;
49704364
LW
898
899 if (isSPACE(c))
900 continue;
901 else if (c == ender)
902 return patptr-1;
903 else if (c == '#') {
904 while (patptr < patend && *patptr != '\n')
905 patptr++;
906 continue;
907 } else if (c == '(')
908 patptr = group_end(patptr, patend, ')') + 1;
909 else if (c == '[')
910 patptr = group_end(patptr, patend, ']') + 1;
18529408 911 }
49704364
LW
912 Perl_croak(aTHX_ "No group ending character '%c' found in template",
913 ender);
914 return 0;
18529408
IZ
915}
916
49704364
LW
917
918/* Convert unsigned decimal number to binary.
919 * Expects a pointer to the first digit and address of length variable
920 * Advances char pointer to 1st non-digit char and returns number
fc241834 921 */
f7fe979e
AL
922STATIC const char *
923S_get_num(pTHX_ register const char *patptr, I32 *lenptr )
49704364
LW
924{
925 I32 len = *patptr++ - '0';
926 while (isDIGIT(*patptr)) {
927 if (len >= 0x7FFFFFFF/10)
928 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
929 len = (len * 10) + (*patptr++ - '0');
930 }
931 *lenptr = len;
932 return patptr;
933}
934
935/* The marvellous template parsing routine: Using state stored in *symptr,
936 * locates next template code and count
937 */
938STATIC bool
f337b084 939S_next_symbol(pTHX_ tempsym_t* symptr )
18529408 940{
f7fe979e 941 const char* patptr = symptr->patptr;
0bcc34c2 942 const char* const patend = symptr->patend;
49704364
LW
943
944 symptr->flags &= ~FLAG_SLASH;
945
946 while (patptr < patend) {
947 if (isSPACE(*patptr))
948 patptr++;
949 else if (*patptr == '#') {
950 patptr++;
951 while (patptr < patend && *patptr != '\n')
952 patptr++;
953 if (patptr < patend)
954 patptr++;
955 } else {
fc241834 956 /* We should have found a template code */
49704364 957 I32 code = *patptr++ & 0xFF;
66c611c5 958 U32 inherited_modifiers = 0;
49704364
LW
959
960 if (code == ','){ /* grandfather in commas but with a warning */
961 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
962 symptr->flags |= FLAG_COMMA;
963 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
f7fe979e 964 "Invalid type ',' in %s", _action( symptr ) );
49704364
LW
965 }
966 continue;
967 }
fc241834 968
49704364 969 /* for '(', skip to ')' */
fc241834 970 if (code == '(') {
49704364
LW
971 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
972 Perl_croak(aTHX_ "()-group starts with a count in %s",
f7fe979e 973 _action( symptr ) );
49704364
LW
974 symptr->grpbeg = patptr;
975 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
976 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
977 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
f7fe979e 978 _action( symptr ) );
49704364
LW
979 }
980
66c611c5
MHM
981 /* look for group modifiers to inherit */
982 if (TYPE_ENDIANNESS(symptr->flags)) {
983 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
984 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
985 }
986
1109a392
MHM
987 /* look for modifiers */
988 while (patptr < patend) {
b7787f18
AL
989 const char *allowed;
990 I32 modifier;
1109a392
MHM
991 switch (*patptr) {
992 case '!':
993 modifier = TYPE_IS_SHRIEKING;
7212898e 994 allowed = SHRIEKING_ALLOWED_TYPES;
1109a392 995 break;
7212898e 996#ifdef PERL_PACK_CAN_BYTEORDER
1109a392
MHM
997 case '>':
998 modifier = TYPE_IS_BIG_ENDIAN;
66c611c5 999 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392
MHM
1000 break;
1001 case '<':
1002 modifier = TYPE_IS_LITTLE_ENDIAN;
66c611c5 1003 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392 1004 break;
f337b084 1005#endif /* PERL_PACK_CAN_BYTEORDER */
1109a392 1006 default:
b7787f18
AL
1007 allowed = "";
1008 modifier = 0;
1109a392
MHM
1009 break;
1010 }
66c611c5 1011
1109a392
MHM
1012 if (modifier == 0)
1013 break;
66c611c5 1014
1109a392
MHM
1015 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
1016 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
f7fe979e 1017 allowed, _action( symptr ) );
66c611c5
MHM
1018
1019 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
1109a392 1020 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
f7fe979e 1021 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
66c611c5
MHM
1022 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
1023 TYPE_ENDIANNESS_MASK)
1024 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
f7fe979e 1025 *patptr, _action( symptr ) );
66c611c5 1026
041457d9 1027 if ((code & modifier) && ckWARN(WARN_UNPACK)) {
1109a392
MHM
1028 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
1029 "Duplicate modifier '%c' after '%c' in %s",
1030 *patptr, (int) TYPE_NO_MODIFIERS(code),
f7fe979e 1031 _action( symptr ) );
1109a392 1032 }
66c611c5 1033
1109a392
MHM
1034 code |= modifier;
1035 patptr++;
49704364
LW
1036 }
1037
66c611c5
MHM
1038 /* inherit modifiers */
1039 code |= inherited_modifiers;
1040
fc241834 1041 /* look for count and/or / */
49704364
LW
1042 if (patptr < patend) {
1043 if (isDIGIT(*patptr)) {
1044 patptr = get_num( patptr, &symptr->length );
1045 symptr->howlen = e_number;
1046
1047 } else if (*patptr == '*') {
1048 patptr++;
1049 symptr->howlen = e_star;
1050
1051 } else if (*patptr == '[') {
f7fe979e 1052 const char* lenptr = ++patptr;
49704364
LW
1053 symptr->howlen = e_number;
1054 patptr = group_end( patptr, patend, ']' ) + 1;
1055 /* what kind of [] is it? */
1056 if (isDIGIT(*lenptr)) {
1057 lenptr = get_num( lenptr, &symptr->length );
1058 if( *lenptr != ']' )
1059 Perl_croak(aTHX_ "Malformed integer in [] in %s",
f7fe979e 1060 _action( symptr ) );
49704364
LW
1061 } else {
1062 tempsym_t savsym = *symptr;
1063 symptr->patend = patptr-1;
1064 symptr->patptr = lenptr;
1065 savsym.length = measure_struct(symptr);
1066 *symptr = savsym;
1067 }
1068 } else {
1069 symptr->howlen = e_no_len;
1070 symptr->length = 1;
1071 }
1072
1073 /* try to find / */
1074 while (patptr < patend) {
1075 if (isSPACE(*patptr))
1076 patptr++;
1077 else if (*patptr == '#') {
1078 patptr++;
1079 while (patptr < patend && *patptr != '\n')
1080 patptr++;
1081 if (patptr < patend)
1082 patptr++;
1083 } else {
66c611c5 1084 if (*patptr == '/') {
49704364
LW
1085 symptr->flags |= FLAG_SLASH;
1086 patptr++;
66c611c5
MHM
1087 if (patptr < patend &&
1088 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
49704364 1089 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
f7fe979e 1090 _action( symptr ) );
49704364
LW
1091 }
1092 break;
1093 }
18529408 1094 }
49704364
LW
1095 } else {
1096 /* at end - no count, no / */
1097 symptr->howlen = e_no_len;
1098 symptr->length = 1;
1099 }
1100
1101 symptr->code = code;
fc241834 1102 symptr->patptr = patptr;
49704364 1103 return TRUE;
18529408 1104 }
49704364 1105 }
fc241834 1106 symptr->patptr = patptr;
49704364 1107 return FALSE;
18529408
IZ
1108}
1109
18529408 1110/*
fc241834 1111 There is no way to cleanly handle the case where we should process the
08ca2aa3 1112 string per byte in its upgraded form while it's really in downgraded form
fc241834
RGS
1113 (e.g. estimates like strend-s as an upper bound for the number of
1114 characters left wouldn't work). So if we foresee the need of this
1115 (pattern starts with U or contains U0), we want to work on the encoded
1116 version of the string. Users are advised to upgrade their pack string
08ca2aa3
TH
1117 themselves if they need to do a lot of unpacks like this on it
1118*/
fc241834 1119STATIC bool
08ca2aa3
TH
1120need_utf8(const char *pat, const char *patend)
1121{
1122 bool first = TRUE;
1123 while (pat < patend) {
1124 if (pat[0] == '#') {
1125 pat++;
f7fe979e 1126 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
1127 if (!pat) return FALSE;
1128 } else if (pat[0] == 'U') {
1129 if (first || pat[1] == '0') return TRUE;
1130 } else first = FALSE;
1131 pat++;
1132 }
1133 return FALSE;
1134}
1135
1136STATIC char
1137first_symbol(const char *pat, const char *patend) {
1138 while (pat < patend) {
1139 if (pat[0] != '#') return pat[0];
1140 pat++;
f7fe979e 1141 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
1142 if (!pat) return 0;
1143 pat++;
1144 }
1145 return 0;
1146}
1147
1148/*
7accc089
JH
1149=for apidoc unpackstring
1150
608d3aed
LW
1151The engine implementing unpack() Perl function. C<unpackstring> puts the
1152extracted list items on the stack and returns the number of elements.
1153Issue C<PUTBACK> before and C<SPAGAIN> after the call to this function.
7accc089
JH
1154
1155=cut */
1156
1157I32
f7fe979e 1158Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
7accc089 1159{
f7fe979e 1160 tempsym_t sym;
08ca2aa3 1161
f337b084 1162 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
08ca2aa3
TH
1163 else if (need_utf8(pat, patend)) {
1164 /* We probably should try to avoid this in case a scalar context call
1165 wouldn't get to the "U0" */
1166 STRLEN len = strend - s;
230e1fce 1167 s = (char *) bytes_to_utf8((U8 *) s, &len);
08ca2aa3
TH
1168 SAVEFREEPV(s);
1169 strend = s + len;
f337b084 1170 flags |= FLAG_DO_UTF8;
08ca2aa3
TH
1171 }
1172
f337b084
TH
1173 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
1174 flags |= FLAG_PARSE_UTF8;
08ca2aa3 1175
f7fe979e 1176 TEMPSYM_INIT(&sym, pat, patend, flags);
7accc089
JH
1177
1178 return unpack_rec(&sym, s, s, strend, NULL );
1179}
1180
4136a0f7 1181STATIC I32
f7fe979e 1182S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
49704364 1183{
27da23d5 1184 dVAR; dSP;
18529408 1185 SV *sv;
f7fe979e 1186 const I32 start_sp_offset = SP - PL_stack_base;
49704364 1187 howlen_t howlen;
a6ec74c1 1188
a6ec74c1 1189 I32 checksum = 0;
92d41999 1190 UV cuv = 0;
a6ec74c1 1191 NV cdouble = 0.0;
f337b084 1192 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
49704364 1193 bool beyond = FALSE;
21c16052 1194 bool explicit_length;
9e27e96a 1195 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
f337b084 1196 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
28be1210 1197 symptr->strbeg = s - strbeg;
49704364 1198
49704364 1199 while (next_symbol(symptr)) {
a7a3cfaa 1200 packprops_t props;
9e27e96a 1201 I32 len;
f337b084 1202 I32 datumtype = symptr->code;
206947d2 1203 /* do first one only unless in list context
08ca2aa3 1204 / is implemented by unpacking the count, then popping it from the
206947d2 1205 stack, so must check that we're not in the middle of a / */
49704364 1206 if ( unpack_only_one
206947d2 1207 && (SP - PL_stack_base == start_sp_offset + 1)
49704364 1208 && (datumtype != '/') ) /* XXX can this be omitted */
206947d2 1209 break;
49704364 1210
f337b084 1211 switch (howlen = symptr->howlen) {
fc241834
RGS
1212 case e_star:
1213 len = strend - strbeg; /* long enough */
49704364 1214 break;
f337b084
TH
1215 default:
1216 /* e_no_len and e_number */
1217 len = symptr->length;
1218 break;
49704364 1219 }
18529408 1220
21c16052 1221 explicit_length = TRUE;
a6ec74c1 1222 redo_switch:
49704364 1223 beyond = s >= strend;
a7a3cfaa
TH
1224
1225 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
1226 if (props) {
1227 /* props nonzero means we can process this letter. */
9e27e96a
AL
1228 const long size = props & PACK_SIZE_MASK;
1229 const long howmany = (strend - s) / size;
a7a3cfaa
TH
1230 if (len > howmany)
1231 len = howmany;
1232
1233 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
1234 if (len && unpack_only_one) len = 1;
1235 EXTEND(SP, len);
1236 EXTEND_MORTAL(len);
78d46eaa
NC
1237 }
1238 }
a7a3cfaa 1239
1109a392 1240 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 1241 default:
1109a392 1242 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
49704364 1243
a6ec74c1 1244 case '%':
49704364 1245 if (howlen == e_no_len)
18529408 1246 len = 16; /* len is not specified */
a6ec74c1 1247 checksum = len;
92d41999 1248 cuv = 0;
a6ec74c1 1249 cdouble = 0;
18529408 1250 continue;
a6ec74c1 1251 break;
18529408
IZ
1252 case '(':
1253 {
49704364 1254 tempsym_t savsym = *symptr;
9e27e96a 1255 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
66c611c5 1256 symptr->flags |= group_modifiers;
49704364 1257 symptr->patend = savsym.grpend;
28be1210 1258 symptr->previous = &savsym;
49704364 1259 symptr->level++;
18529408
IZ
1260 PUTBACK;
1261 while (len--) {
49704364 1262 symptr->patptr = savsym.grpbeg;
f337b084
TH
1263 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
1264 else symptr->flags &= ~FLAG_PARSE_UTF8;
08ca2aa3
TH
1265 unpack_rec(symptr, s, strbeg, strend, &s);
1266 if (s == strend && savsym.howlen == e_star)
49704364 1267 break; /* No way to continue */
18529408
IZ
1268 }
1269 SPAGAIN;
28be1210 1270 savsym.flags = symptr->flags & ~group_modifiers;
49704364 1271 *symptr = savsym;
18529408
IZ
1272 break;
1273 }
28be1210
TH
1274#ifdef PERL_PACK_CAN_SHRIEKSIGN
1275 case '.' | TYPE_IS_SHRIEKING:
1276#endif
1277 case '.': {
9e27e96a 1278 const char *from;
28be1210
TH
1279 SV *sv;
1280#ifdef PERL_PACK_CAN_SHRIEKSIGN
9e27e96a 1281 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
28be1210 1282#else /* PERL_PACK_CAN_SHRIEKSIGN */
9e27e96a 1283 const bool u8 = utf8;
28be1210
TH
1284#endif
1285 if (howlen == e_star) from = strbeg;
1286 else if (len <= 0) from = s;
1287 else {
1288 tempsym_t *group = symptr;
1289
1290 while (--len && group) group = group->previous;
1291 from = group ? strbeg + group->strbeg : strbeg;
1292 }
1293 sv = from <= s ?
00646304
CB
1294 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
1295 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
28be1210
TH
1296 XPUSHs(sv_2mortal(sv));
1297 break;
1298 }
1299#ifdef PERL_PACK_CAN_SHRIEKSIGN
1300 case '@' | TYPE_IS_SHRIEKING:
1301#endif
a6ec74c1 1302 case '@':
28be1210
TH
1303 s = strbeg + symptr->strbeg;
1304#ifdef PERL_PACK_CAN_SHRIEKSIGN
1305 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
1306#else /* PERL_PACK_CAN_SHRIEKSIGN */
1307 if (utf8)
1308#endif
1309 {
08ca2aa3
TH
1310 while (len > 0) {
1311 if (s >= strend)
1312 Perl_croak(aTHX_ "'@' outside of string in unpack");
1313 s += UTF8SKIP(s);
1314 len--;
1315 }
1316 if (s > strend)
1317 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
1318 } else {
28be1210 1319 if (strend-s < len)
fc241834 1320 Perl_croak(aTHX_ "'@' outside of string in unpack");
28be1210 1321 s += len;
08ca2aa3 1322 }
a6ec74c1 1323 break;
62f95557
IZ
1324 case 'X' | TYPE_IS_SHRIEKING:
1325 if (!len) /* Avoid division by 0 */
1326 len = 1;
08ca2aa3 1327 if (utf8) {
f7fe979e 1328 const char *hop, *last;
f337b084
TH
1329 I32 l = len;
1330 hop = last = strbeg;
1331 while (hop < s) {
1332 hop += UTF8SKIP(hop);
1333 if (--l == 0) {
08ca2aa3 1334 last = hop;
f337b084
TH
1335 l = len;
1336 }
fc241834 1337 }
f337b084
TH
1338 if (last > s)
1339 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
08ca2aa3
TH
1340 s = last;
1341 break;
f337b084
TH
1342 }
1343 len = (s - strbeg) % len;
62f95557 1344 /* FALL THROUGH */
a6ec74c1 1345 case 'X':
08ca2aa3
TH
1346 if (utf8) {
1347 while (len > 0) {
1348 if (s <= strbeg)
1349 Perl_croak(aTHX_ "'X' outside of string in unpack");
f337b084 1350 while (--s, UTF8_IS_CONTINUATION(*s)) {
08ca2aa3
TH
1351 if (s <= strbeg)
1352 Perl_croak(aTHX_ "'X' outside of string in unpack");
1353 }
1354 len--;
1355 }
1356 } else {
fc241834
RGS
1357 if (len > s - strbeg)
1358 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1359 s -= len;
08ca2aa3 1360 }
a6ec74c1 1361 break;
9e27e96a
AL
1362 case 'x' | TYPE_IS_SHRIEKING: {
1363 I32 ai32;
62f95557
IZ
1364 if (!len) /* Avoid division by 0 */
1365 len = 1;
230e1fce
NC
1366 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1367 else ai32 = (s - strbeg) % len;
08ca2aa3
TH
1368 if (ai32 == 0) break;
1369 len -= ai32;
9e27e96a 1370 }
62f95557 1371 /* FALL THROUGH */
a6ec74c1 1372 case 'x':
08ca2aa3
TH
1373 if (utf8) {
1374 while (len>0) {
1375 if (s >= strend)
1376 Perl_croak(aTHX_ "'x' outside of string in unpack");
1377 s += UTF8SKIP(s);
1378 len--;
1379 }
1380 } else {
fc241834
RGS
1381 if (len > strend - s)
1382 Perl_croak(aTHX_ "'x' outside of string in unpack");
1383 s += len;
f337b084 1384 }
a6ec74c1
JH
1385 break;
1386 case '/':
49704364
LW
1387 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1388 break;
a6ec74c1
JH
1389 case 'A':
1390 case 'Z':
1391 case 'a':
08ca2aa3
TH
1392 if (checksum) {
1393 /* Preliminary length estimate is assumed done in 'W' */
1394 if (len > strend - s) len = strend - s;
1395 goto W_checksum;
1396 }
1397 if (utf8) {
1398 I32 l;
f7fe979e 1399 const char *hop;
08ca2aa3
TH
1400 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1401 if (hop >= strend) {
1402 if (hop > strend)
1403 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1404 break;
fc241834 1405 }
a6ec74c1 1406 }
08ca2aa3
TH
1407 if (hop > strend)
1408 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1409 len = hop - s;
1410 } else if (len > strend - s)
1411 len = strend - s;
1412
1413 if (datumtype == 'Z') {
1414 /* 'Z' strips stuff after first null */
f7fe979e 1415 const char *ptr, *end;
f337b084
TH
1416 end = s + len;
1417 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
08ca2aa3
TH
1418 sv = newSVpvn(s, ptr-s);
1419 if (howlen == e_star) /* exact for 'Z*' */
1420 len = ptr-s + (ptr != strend ? 1 : 0);
1421 } else if (datumtype == 'A') {
1422 /* 'A' strips both nulls and spaces */
f7fe979e 1423 const char *ptr;
18bdf90a
TH
1424 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1425 for (ptr = s+len-1; ptr >= s; ptr--)
1426 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
230e1fce 1427 !is_utf8_space((U8 *) ptr)) break;
18bdf90a
TH
1428 if (ptr >= s) ptr += UTF8SKIP(ptr);
1429 else ptr++;
28be1210 1430 if (ptr > s+len)
18bdf90a
TH
1431 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1432 } else {
1433 for (ptr = s+len-1; ptr >= s; ptr--)
1434 if (*ptr != 0 && !isSPACE(*ptr)) break;
1435 ptr++;
1436 }
08ca2aa3
TH
1437 sv = newSVpvn(s, ptr-s);
1438 } else sv = newSVpvn(s, len);
1439
1440 if (utf8) {
1441 SvUTF8_on(sv);
1442 /* Undo any upgrade done due to need_utf8() */
f337b084 1443 if (!(symptr->flags & FLAG_WAS_UTF8))
08ca2aa3 1444 sv_utf8_downgrade(sv, 0);
a6ec74c1
JH
1445 }
1446 XPUSHs(sv_2mortal(sv));
08ca2aa3 1447 s += len;
a6ec74c1
JH
1448 break;
1449 case 'B':
08ca2aa3
TH
1450 case 'b': {
1451 char *str;
49704364 1452 if (howlen == e_star || len > (strend - s) * 8)
a6ec74c1
JH
1453 len = (strend - s) * 8;
1454 if (checksum) {
1455 if (!PL_bitcount) {
08ca2aa3 1456 int bits;
a02a5408 1457 Newxz(PL_bitcount, 256, char);
a6ec74c1
JH
1458 for (bits = 1; bits < 256; bits++) {
1459 if (bits & 1) PL_bitcount[bits]++;
1460 if (bits & 2) PL_bitcount[bits]++;
1461 if (bits & 4) PL_bitcount[bits]++;
1462 if (bits & 8) PL_bitcount[bits]++;
1463 if (bits & 16) PL_bitcount[bits]++;
1464 if (bits & 32) PL_bitcount[bits]++;
1465 if (bits & 64) PL_bitcount[bits]++;
1466 if (bits & 128) PL_bitcount[bits]++;
1467 }
1468 }
f337b084 1469 if (utf8)
08ca2aa3 1470 while (len >= 8 && s < strend) {
f337b084 1471 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
08ca2aa3
TH
1472 len -= 8;
1473 }
f337b084 1474 else
fc241834 1475 while (len >= 8) {
08ca2aa3 1476 cuv += PL_bitcount[*(U8 *)s++];
fc241834
RGS
1477 len -= 8;
1478 }
08ca2aa3
TH
1479 if (len && s < strend) {
1480 U8 bits;
f337b084
TH
1481 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1482 if (datumtype == 'b')
a6ec74c1 1483 while (len-- > 0) {
92d41999 1484 if (bits & 1) cuv++;
a6ec74c1
JH
1485 bits >>= 1;
1486 }
f337b084 1487 else
a6ec74c1 1488 while (len-- > 0) {
08ca2aa3 1489 if (bits & 0x80) cuv++;
a6ec74c1
JH
1490 bits <<= 1;
1491 }
fc241834 1492 }
a6ec74c1
JH
1493 break;
1494 }
08ca2aa3 1495
561b68a9 1496 sv = sv_2mortal(newSV(len ? len : 1));
a6ec74c1
JH
1497 SvPOK_on(sv);
1498 str = SvPVX(sv);
1499 if (datumtype == 'b') {
f337b084 1500 U8 bits = 0;
f7fe979e 1501 const I32 ai32 = len;
08ca2aa3
TH
1502 for (len = 0; len < ai32; len++) {
1503 if (len & 7) bits >>= 1;
1504 else if (utf8) {
1505 if (s >= strend) break;
f337b084 1506 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1507 } else bits = *(U8 *) s++;
1508 *str++ = bits & 1 ? '1' : '0';
a6ec74c1 1509 }
08ca2aa3 1510 } else {
f337b084 1511 U8 bits = 0;
f7fe979e 1512 const I32 ai32 = len;
08ca2aa3
TH
1513 for (len = 0; len < ai32; len++) {
1514 if (len & 7) bits <<= 1;
1515 else if (utf8) {
1516 if (s >= strend) break;
f337b084 1517 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1518 } else bits = *(U8 *) s++;
1519 *str++ = bits & 0x80 ? '1' : '0';
a6ec74c1
JH
1520 }
1521 }
1522 *str = '\0';
aa07b2f6 1523 SvCUR_set(sv, str - SvPVX_const(sv));
08ca2aa3 1524 XPUSHs(sv);
a6ec74c1 1525 break;
08ca2aa3 1526 }
a6ec74c1 1527 case 'H':
08ca2aa3
TH
1528 case 'h': {
1529 char *str;
fc241834 1530 /* Preliminary length estimate, acceptable for utf8 too */
49704364 1531 if (howlen == e_star || len > (strend - s) * 2)
a6ec74c1 1532 len = (strend - s) * 2;
561b68a9 1533 sv = sv_2mortal(newSV(len ? len : 1));
a6ec74c1
JH
1534 SvPOK_on(sv);
1535 str = SvPVX(sv);
1536 if (datumtype == 'h') {
f337b084 1537 U8 bits = 0;
9e27e96a 1538 I32 ai32 = len;
fc241834
RGS
1539 for (len = 0; len < ai32; len++) {
1540 if (len & 1) bits >>= 4;
1541 else if (utf8) {
1542 if (s >= strend) break;
f337b084 1543 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
fc241834 1544 } else bits = * (U8 *) s++;
a6ec74c1
JH
1545 *str++ = PL_hexdigit[bits & 15];
1546 }
08ca2aa3 1547 } else {
f337b084 1548 U8 bits = 0;
f7fe979e 1549 const I32 ai32 = len;
08ca2aa3
TH
1550 for (len = 0; len < ai32; len++) {
1551 if (len & 1) bits <<= 4;
1552 else if (utf8) {
1553 if (s >= strend) break;
f337b084 1554 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3 1555 } else bits = *(U8 *) s++;
a6ec74c1
JH
1556 *str++ = PL_hexdigit[(bits >> 4) & 15];
1557 }
1558 }
1559 *str = '\0';
aa07b2f6 1560 SvCUR_set(sv, str - SvPVX_const(sv));
08ca2aa3 1561 XPUSHs(sv);
a6ec74c1 1562 break;
08ca2aa3 1563 }
1651fc44
ML
1564 case 'C':
1565 if (len == 0) {
1566 if (explicit_length)
1567 /* Switch to "character" mode */
1568 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1569 break;
1570 }
1571 /* FALL THROUGH */
a6ec74c1 1572 case 'c':
1651fc44
ML
1573 while (len-- > 0 && s < strend) {
1574 int aint;
1575 if (utf8)
1576 {
1577 STRLEN retlen;
1578 aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1579 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1580 if (retlen == (STRLEN) -1 || retlen == 0)
1581 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1582 s += retlen;
1583 }
1584 else
1585 aint = *(U8 *)(s)++;
1586 if (aint >= 128 && datumtype != 'C') /* fake up signed chars */
73cb7263 1587 aint -= 256;
08ca2aa3 1588 if (!checksum)
ac7f3b1b 1589 PUSHs(sv_2mortal(newSViv((IV)aint)));
73cb7263
NC
1590 else if (checksum > bits_in_uv)
1591 cdouble += (NV)aint;
1592 else
1593 cuv += aint;
a6ec74c1
JH
1594 }
1595 break;
08ca2aa3
TH
1596 case 'W':
1597 W_checksum:
1651fc44 1598 if (utf8) {
08ca2aa3 1599 while (len-- > 0 && s < strend) {
08ca2aa3 1600 STRLEN retlen;
f7fe979e 1601 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
f337b084 1602 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
08ca2aa3
TH
1603 if (retlen == (STRLEN) -1 || retlen == 0)
1604 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1605 s += retlen;
1606 if (!checksum)
1607 PUSHs(sv_2mortal(newSVuv((UV) val)));
1608 else if (checksum > bits_in_uv)
1609 cdouble += (NV) val;
d6d3e8bd 1610 else
08ca2aa3 1611 cuv += val;
fc241834 1612 }
08ca2aa3 1613 } else if (!checksum)
a6ec74c1 1614 while (len-- > 0) {
f7fe979e 1615 const U8 ch = *(U8 *) s++;
08ca2aa3 1616 PUSHs(sv_2mortal(newSVuv((UV) ch)));
a6ec74c1 1617 }
08ca2aa3
TH
1618 else if (checksum > bits_in_uv)
1619 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1620 else
1621 while (len-- > 0) cuv += *(U8 *) s++;
a6ec74c1
JH
1622 break;
1623 case 'U':
35bcd338 1624 if (len == 0) {
08ca2aa3
TH
1625 if (explicit_length) {
1626 /* Switch to "bytes in UTF-8" mode */
f337b084 1627 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
08ca2aa3
TH
1628 else
1629 /* Should be impossible due to the need_utf8() test */
1630 Perl_croak(aTHX_ "U0 mode on a byte string");
1631 }
35bcd338
JH
1632 break;
1633 }
08ca2aa3 1634 if (len > strend - s) len = strend - s;
fc241834 1635 if (!checksum) {
08ca2aa3
TH
1636 if (len && unpack_only_one) len = 1;
1637 EXTEND(SP, len);
1638 EXTEND_MORTAL(len);
fc241834 1639 }
08ca2aa3
TH
1640 while (len-- > 0 && s < strend) {
1641 STRLEN retlen;
1642 UV auv;
1643 if (utf8) {
1644 U8 result[UTF8_MAXLEN];
f7fe979e 1645 const char *ptr = s;
08ca2aa3 1646 STRLEN len;
08ca2aa3
TH
1647 /* Bug: warns about bad utf8 even if we are short on bytes
1648 and will break out of the loop */
230e1fce
NC
1649 if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1650 'U'))
08ca2aa3
TH
1651 break;
1652 len = UTF8SKIP(result);
fc241834 1653 if (!uni_to_bytes(aTHX_ &ptr, strend,
230e1fce 1654 (char *) &result[1], len-1, 'U')) break;
08ca2aa3
TH
1655 auv = utf8n_to_uvuni(result, len, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1656 s = ptr;
1657 } else {
1658 auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1659 if (retlen == (STRLEN) -1 || retlen == 0)
1660 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1661 s += retlen;
1662 }
1663 if (!checksum)
1664 PUSHs(sv_2mortal(newSVuv((UV) auv)));
73cb7263 1665 else if (checksum > bits_in_uv)
08ca2aa3 1666 cdouble += (NV) auv;
73cb7263 1667 else
08ca2aa3 1668 cuv += auv;
a6ec74c1
JH
1669 }
1670 break;
49704364
LW
1671 case 's' | TYPE_IS_SHRIEKING:
1672#if SHORTSIZE != SIZE16
73cb7263 1673 while (len-- > 0) {
08ca2aa3 1674 short ashort;
f337b084
TH
1675 SHIFT_VAR(utf8, s, strend, ashort, datumtype);
1676 DO_BO_UNPACK(ashort, s);
08ca2aa3 1677 if (!checksum)
ac7f3b1b 1678 PUSHs(sv_2mortal(newSViv((IV)ashort)));
73cb7263
NC
1679 else if (checksum > bits_in_uv)
1680 cdouble += (NV)ashort;
1681 else
1682 cuv += ashort;
49704364
LW
1683 }
1684 break;
1685#else
1686 /* Fallthrough! */
a6ec74c1 1687#endif
49704364 1688 case 's':
73cb7263 1689 while (len-- > 0) {
08ca2aa3
TH
1690 I16 ai16;
1691
1692#if U16SIZE > SIZE16
1693 ai16 = 0;
1694#endif
f337b084 1695 SHIFT16(utf8, s, strend, &ai16, datumtype);
73cb7263 1696 DO_BO_UNPACK(ai16, 16);
1109a392 1697#if U16SIZE > SIZE16
73cb7263
NC
1698 if (ai16 > 32767)
1699 ai16 -= 65536;
a6ec74c1 1700#endif
08ca2aa3 1701 if (!checksum)
ac7f3b1b 1702 PUSHs(sv_2mortal(newSViv((IV)ai16)));
73cb7263
NC
1703 else if (checksum > bits_in_uv)
1704 cdouble += (NV)ai16;
1705 else
1706 cuv += ai16;
a6ec74c1
JH
1707 }
1708 break;
49704364
LW
1709 case 'S' | TYPE_IS_SHRIEKING:
1710#if SHORTSIZE != SIZE16
73cb7263 1711 while (len-- > 0) {
08ca2aa3 1712 unsigned short aushort;
f337b084
TH
1713 SHIFT_VAR(utf8, s, strend, aushort, datumtype);
1714 DO_BO_UNPACK(aushort, s);
08ca2aa3
TH
1715 if (!checksum)
1716 PUSHs(sv_2mortal(newSVuv((UV) aushort)));
73cb7263
NC
1717 else if (checksum > bits_in_uv)
1718 cdouble += (NV)aushort;
1719 else
1720 cuv += aushort;
49704364
LW
1721 }
1722 break;
1723#else
1724 /* Fallhrough! */
1725#endif
a6ec74c1
JH
1726 case 'v':
1727 case 'n':
1728 case 'S':
73cb7263 1729 while (len-- > 0) {
08ca2aa3
TH
1730 U16 au16;
1731#if U16SIZE > SIZE16
1732 au16 = 0;
1733#endif
f337b084 1734 SHIFT16(utf8, s, strend, &au16, datumtype);
08ca2aa3 1735 DO_BO_UNPACK(au16, 16);
a6ec74c1 1736#ifdef HAS_NTOHS
73cb7263
NC
1737 if (datumtype == 'n')
1738 au16 = PerlSock_ntohs(au16);
a6ec74c1
JH
1739#endif
1740#ifdef HAS_VTOHS
73cb7263
NC
1741 if (datumtype == 'v')
1742 au16 = vtohs(au16);
a6ec74c1 1743#endif
08ca2aa3
TH
1744 if (!checksum)
1745 PUSHs(sv_2mortal(newSVuv((UV)au16)));
73cb7263 1746 else if (checksum > bits_in_uv)
f337b084 1747 cdouble += (NV) au16;
73cb7263
NC
1748 else
1749 cuv += au16;
a6ec74c1
JH
1750 }
1751 break;
7212898e 1752#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7
MHM
1753 case 'v' | TYPE_IS_SHRIEKING:
1754 case 'n' | TYPE_IS_SHRIEKING:
73cb7263 1755 while (len-- > 0) {
08ca2aa3
TH
1756 I16 ai16;
1757# if U16SIZE > SIZE16
1758 ai16 = 0;
1759# endif
f337b084 1760 SHIFT16(utf8, s, strend, &ai16, datumtype);
08ca2aa3 1761# ifdef HAS_NTOHS
73cb7263 1762 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
08ca2aa3
TH
1763 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1764# endif /* HAS_NTOHS */
1765# ifdef HAS_VTOHS
73cb7263 1766 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
08ca2aa3
TH
1767 ai16 = (I16) vtohs((U16) ai16);
1768# endif /* HAS_VTOHS */
1769 if (!checksum)
ac7f3b1b 1770 PUSHs(sv_2mortal(newSViv((IV)ai16)));
73cb7263 1771 else if (checksum > bits_in_uv)
08ca2aa3 1772 cdouble += (NV) ai16;
73cb7263
NC
1773 else
1774 cuv += ai16;
068bd2e7
MHM
1775 }
1776 break;
08ca2aa3 1777#endif /* PERL_PACK_CAN_SHRIEKSIGN */
a6ec74c1 1778 case 'i':
49704364 1779 case 'i' | TYPE_IS_SHRIEKING:
73cb7263 1780 while (len-- > 0) {
08ca2aa3 1781 int aint;
f337b084
TH
1782 SHIFT_VAR(utf8, s, strend, aint, datumtype);
1783 DO_BO_UNPACK(aint, i);
08ca2aa3 1784 if (!checksum)
ac7f3b1b 1785 PUSHs(sv_2mortal(newSViv((IV)aint)));
73cb7263
NC
1786 else if (checksum > bits_in_uv)
1787 cdouble += (NV)aint;
1788 else
1789 cuv += aint;
a6ec74c1
JH
1790 }
1791 break;
1792 case 'I':
49704364 1793 case 'I' | TYPE_IS_SHRIEKING:
73cb7263 1794 while (len-- > 0) {
08ca2aa3 1795 unsigned int auint;
f337b084
TH
1796 SHIFT_VAR(utf8, s, strend, auint, datumtype);
1797 DO_BO_UNPACK(auint, i);
08ca2aa3 1798 if (!checksum)
ac7f3b1b 1799 PUSHs(sv_2mortal(newSVuv((UV)auint)));
73cb7263
NC
1800 else if (checksum > bits_in_uv)
1801 cdouble += (NV)auint;
1802 else
1803 cuv += auint;
a6ec74c1
JH
1804 }
1805 break;
92d41999 1806 case 'j':
73cb7263 1807 while (len-- > 0) {
08ca2aa3 1808 IV aiv;
f337b084 1809 SHIFT_VAR(utf8, s, strend, aiv, datumtype);
1109a392 1810#if IVSIZE == INTSIZE
f337b084 1811 DO_BO_UNPACK(aiv, i);
1109a392 1812#elif IVSIZE == LONGSIZE
f337b084 1813 DO_BO_UNPACK(aiv, l);
1109a392 1814#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
f337b084 1815 DO_BO_UNPACK(aiv, 64);
08ca2aa3
TH
1816#else
1817 Perl_croak(aTHX_ "'j' not supported on this platform");
1109a392 1818#endif
08ca2aa3 1819 if (!checksum)
ac7f3b1b 1820 PUSHs(sv_2mortal(newSViv(aiv)));
73cb7263
NC
1821 else if (checksum > bits_in_uv)
1822 cdouble += (NV)aiv;
1823 else
1824 cuv += aiv;
92d41999
JH
1825 }
1826 break;
1827 case 'J':
73cb7263 1828 while (len-- > 0) {
08ca2aa3 1829 UV auv;
f337b084 1830 SHIFT_VAR(utf8, s, strend, auv, datumtype);
08ca2aa3 1831#if IVSIZE == INTSIZE
f337b084 1832 DO_BO_UNPACK(auv, i);
08ca2aa3 1833#elif IVSIZE == LONGSIZE
f337b084 1834 DO_BO_UNPACK(auv, l);
08ca2aa3 1835#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
f337b084 1836 DO_BO_UNPACK(auv, 64);
08ca2aa3
TH
1837#else
1838 Perl_croak(aTHX_ "'J' not supported on this platform");
1109a392 1839#endif
08ca2aa3 1840 if (!checksum)
ac7f3b1b 1841 PUSHs(sv_2mortal(newSVuv(auv)));
73cb7263
NC
1842 else if (checksum > bits_in_uv)
1843 cdouble += (NV)auv;
1844 else
1845 cuv += auv;
92d41999
JH
1846 }
1847 break;
49704364
LW
1848 case 'l' | TYPE_IS_SHRIEKING:
1849#if LONGSIZE != SIZE32
73cb7263 1850 while (len-- > 0) {
08ca2aa3 1851 long along;
f337b084
TH
1852 SHIFT_VAR(utf8, s, strend, along, datumtype);
1853 DO_BO_UNPACK(along, l);
08ca2aa3 1854 if (!checksum)
ac7f3b1b 1855 PUSHs(sv_2mortal(newSViv((IV)along)));
73cb7263
NC
1856 else if (checksum > bits_in_uv)
1857 cdouble += (NV)along;
1858 else
1859 cuv += along;
49704364
LW
1860 }
1861 break;
1862#else
1863 /* Fallthrough! */
a6ec74c1 1864#endif
49704364 1865 case 'l':
73cb7263 1866 while (len-- > 0) {
08ca2aa3
TH
1867 I32 ai32;
1868#if U32SIZE > SIZE32
1869 ai32 = 0;
1870#endif
f337b084 1871 SHIFT32(utf8, s, strend, &ai32, datumtype);
73cb7263 1872 DO_BO_UNPACK(ai32, 32);
25a9bd2a 1873#if U32SIZE > SIZE32
08ca2aa3 1874 if (ai32 > 2147483647) ai32 -= 4294967296;
a6ec74c1 1875#endif
08ca2aa3 1876 if (!checksum)
ac7f3b1b 1877 PUSHs(sv_2mortal(newSViv((IV)ai32)));
73cb7263
NC
1878 else if (checksum > bits_in_uv)
1879 cdouble += (NV)ai32;
1880 else
1881 cuv += ai32;
a6ec74c1
JH
1882 }
1883 break;
49704364
LW
1884 case 'L' | TYPE_IS_SHRIEKING:
1885#if LONGSIZE != SIZE32
73cb7263 1886 while (len-- > 0) {
08ca2aa3 1887 unsigned long aulong;
f337b084
TH
1888 SHIFT_VAR(utf8, s, strend, aulong, datumtype);
1889 DO_BO_UNPACK(aulong, l);
08ca2aa3 1890 if (!checksum)
ac7f3b1b 1891 PUSHs(sv_2mortal(newSVuv((UV)aulong)));
73cb7263
NC
1892 else if (checksum > bits_in_uv)
1893 cdouble += (NV)aulong;
1894 else
1895 cuv += aulong;
49704364
LW
1896 }
1897 break;
1898#else
1899 /* Fall through! */
1900#endif
a6ec74c1
JH
1901 case 'V':
1902 case 'N':
1903 case 'L':
73cb7263 1904 while (len-- > 0) {
08ca2aa3
TH
1905 U32 au32;
1906#if U32SIZE > SIZE32
1907 au32 = 0;
1908#endif
f337b084 1909 SHIFT32(utf8, s, strend, &au32, datumtype);
08ca2aa3 1910 DO_BO_UNPACK(au32, 32);
a6ec74c1 1911#ifdef HAS_NTOHL
73cb7263
NC
1912 if (datumtype == 'N')
1913 au32 = PerlSock_ntohl(au32);
a6ec74c1
JH
1914#endif
1915#ifdef HAS_VTOHL
73cb7263
NC
1916 if (datumtype == 'V')
1917 au32 = vtohl(au32);
a6ec74c1 1918#endif
08ca2aa3 1919 if (!checksum)
fc241834
RGS
1920 PUSHs(sv_2mortal(newSVuv((UV)au32)));
1921 else if (checksum > bits_in_uv)
1922 cdouble += (NV)au32;
1923 else
1924 cuv += au32;
a6ec74c1
JH
1925 }
1926 break;
7212898e 1927#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7
MHM
1928 case 'V' | TYPE_IS_SHRIEKING:
1929 case 'N' | TYPE_IS_SHRIEKING:
73cb7263 1930 while (len-- > 0) {
08ca2aa3
TH
1931 I32 ai32;
1932# if U32SIZE > SIZE32
1933 ai32 = 0;
1934# endif
f337b084 1935 SHIFT32(utf8, s, strend, &ai32, datumtype);
08ca2aa3 1936# ifdef HAS_NTOHL
73cb7263
NC
1937 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1938 ai32 = (I32)PerlSock_ntohl((U32)ai32);
08ca2aa3
TH
1939# endif
1940# ifdef HAS_VTOHL
73cb7263
NC
1941 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1942 ai32 = (I32)vtohl((U32)ai32);
08ca2aa3
TH
1943# endif
1944 if (!checksum)
ac7f3b1b 1945 PUSHs(sv_2mortal(newSViv((IV)ai32)));
73cb7263
NC
1946 else if (checksum > bits_in_uv)
1947 cdouble += (NV)ai32;
1948 else
1949 cuv += ai32;
068bd2e7
MHM
1950 }
1951 break;
08ca2aa3 1952#endif /* PERL_PACK_CAN_SHRIEKSIGN */
a6ec74c1 1953 case 'p':
a6ec74c1 1954 while (len-- > 0) {
f7fe979e 1955 const char *aptr;
f337b084 1956 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
07409e01 1957 DO_BO_UNPACK_PC(aptr);
c4c5f44a
NC
1958 /* newSVpv generates undef if aptr is NULL */
1959 PUSHs(sv_2mortal(newSVpv(aptr, 0)));
a6ec74c1
JH
1960 }
1961 break;
1962 case 'w':
a6ec74c1
JH
1963 {
1964 UV auv = 0;
1965 U32 bytes = 0;
fc241834 1966
08ca2aa3
TH
1967 while (len > 0 && s < strend) {
1968 U8 ch;
f337b084 1969 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3 1970 auv = (auv << 7) | (ch & 0x7f);
a6ec74c1 1971 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
08ca2aa3 1972 if (ch < 0x80) {
a6ec74c1 1973 bytes = 0;
ac7f3b1b 1974 PUSHs(sv_2mortal(newSVuv(auv)));
a6ec74c1
JH
1975 len--;
1976 auv = 0;
08ca2aa3 1977 continue;
a6ec74c1 1978 }
08ca2aa3 1979 if (++bytes >= sizeof(UV)) { /* promote to string */
10516c54 1980 const char *t;
a6ec74c1 1981
f5992bc4 1982 sv = Perl_newSVpvf(aTHX_ "%.*"UVuf, (int)TYPE_DIGITS(UV), auv);
a6ec74c1 1983 while (s < strend) {
f337b084 1984 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3
TH
1985 sv = mul128(sv, (U8)(ch & 0x7f));
1986 if (!(ch & 0x80)) {
a6ec74c1
JH
1987 bytes = 0;
1988 break;
1989 }
1990 }
10516c54 1991 t = SvPV_nolen_const(sv);
a6ec74c1
JH
1992 while (*t == '0')
1993 t++;
1994 sv_chop(sv, t);
1995 PUSHs(sv_2mortal(sv));
1996 len--;
1997 auv = 0;
1998 }
1999 }
2000 if ((s >= strend) && bytes)
49704364 2001 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
a6ec74c1
JH
2002 }
2003 break;
2004 case 'P':
49704364
LW
2005 if (symptr->howlen == e_star)
2006 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
a6ec74c1 2007 EXTEND(SP, 1);
2d3e0934 2008 if (s + sizeof(char*) <= strend) {
08ca2aa3 2009 char *aptr;
f337b084 2010 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
07409e01 2011 DO_BO_UNPACK_PC(aptr);
fc241834
RGS
2012 /* newSVpvn generates undef if aptr is NULL */
2013 PUSHs(sv_2mortal(newSVpvn(aptr, len)));
08ca2aa3 2014 }
a6ec74c1
JH
2015 break;
2016#ifdef HAS_QUAD
2017 case 'q':
73cb7263 2018 while (len-- > 0) {
08ca2aa3 2019 Quad_t aquad;
f337b084
TH
2020 SHIFT_VAR(utf8, s, strend, aquad, datumtype);
2021 DO_BO_UNPACK(aquad, 64);
08ca2aa3
TH
2022 if (!checksum)
2023 PUSHs(sv_2mortal(aquad >= IV_MIN && aquad <= IV_MAX ?
ac7f3b1b 2024 newSViv((IV)aquad) : newSVnv((NV)aquad)));
73cb7263
NC
2025 else if (checksum > bits_in_uv)
2026 cdouble += (NV)aquad;
2027 else
2028 cuv += aquad;
2029 }
a6ec74c1
JH
2030 break;
2031 case 'Q':
73cb7263 2032 while (len-- > 0) {
08ca2aa3 2033 Uquad_t auquad;
f337b084
TH
2034 SHIFT_VAR(utf8, s, strend, auquad, datumtype);
2035 DO_BO_UNPACK(auquad, 64);
08ca2aa3
TH
2036 if (!checksum)
2037 PUSHs(sv_2mortal(auquad <= UV_MAX ?
2038 newSVuv((UV)auquad):newSVnv((NV)auquad)));
73cb7263
NC
2039 else if (checksum > bits_in_uv)
2040 cdouble += (NV)auquad;
2041 else
2042 cuv += auquad;
a6ec74c1
JH
2043 }
2044 break;
08ca2aa3 2045#endif /* HAS_QUAD */
a6ec74c1
JH
2046 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
2047 case 'f':
73cb7263 2048 while (len-- > 0) {
08ca2aa3 2049 float afloat;
f337b084 2050 SHIFT_VAR(utf8, s, strend, afloat, datumtype);
08ca2aa3
TH
2051 DO_BO_UNPACK_N(afloat, float);
2052 if (!checksum)
2053 PUSHs(sv_2mortal(newSVnv((NV)afloat)));
2054 else
73cb7263 2055 cdouble += afloat;
fc241834 2056 }
a6ec74c1
JH
2057 break;
2058 case 'd':
73cb7263 2059 while (len-- > 0) {
08ca2aa3 2060 double adouble;
f337b084 2061 SHIFT_VAR(utf8, s, strend, adouble, datumtype);
08ca2aa3
TH
2062 DO_BO_UNPACK_N(adouble, double);
2063 if (!checksum)
2064 PUSHs(sv_2mortal(newSVnv((NV)adouble)));
2065 else
73cb7263 2066 cdouble += adouble;
fc241834 2067 }
a6ec74c1 2068 break;
92d41999 2069 case 'F':
73cb7263 2070 while (len-- > 0) {
08ca2aa3 2071 NV anv;
f337b084 2072 SHIFT_VAR(utf8, s, strend, anv, datumtype);
08ca2aa3
TH
2073 DO_BO_UNPACK_N(anv, NV);
2074 if (!checksum)
2075 PUSHs(sv_2mortal(newSVnv(anv)));
2076 else
73cb7263 2077 cdouble += anv;
fc241834 2078 }
92d41999
JH
2079 break;
2080#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2081 case 'D':
73cb7263 2082 while (len-- > 0) {
08ca2aa3 2083 long double aldouble;
f337b084 2084 SHIFT_VAR(utf8, s, strend, aldouble, datumtype);
08ca2aa3
TH
2085 DO_BO_UNPACK_N(aldouble, long double);
2086 if (!checksum)
2087 PUSHs(sv_2mortal(newSVnv((NV)aldouble)));
2088 else
2089 cdouble += aldouble;
92d41999
JH
2090 }
2091 break;
2092#endif
a6ec74c1 2093 case 'u':
08ca2aa3 2094 {
f7fe979e 2095 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
561b68a9 2096 sv = sv_2mortal(newSV(l));
08ca2aa3
TH
2097 if (l) SvPOK_on(sv);
2098 }
2099 if (utf8) {
2100 while (next_uni_uu(aTHX_ &s, strend, &len)) {
2101 I32 a, b, c, d;
db187877 2102 char hunk[3];
08ca2aa3 2103
08ca2aa3
TH
2104 while (len > 0) {
2105 next_uni_uu(aTHX_ &s, strend, &a);
2106 next_uni_uu(aTHX_ &s, strend, &b);
2107 next_uni_uu(aTHX_ &s, strend, &c);
2108 next_uni_uu(aTHX_ &s, strend, &d);
2109 hunk[0] = (char)((a << 2) | (b >> 4));
2110 hunk[1] = (char)((b << 4) | (c >> 2));
2111 hunk[2] = (char)((c << 6) | d);
2112 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2113 len -= 3;
2114 }
2115 if (s < strend) {
f7fe979e
AL
2116 if (*s == '\n') {
2117 s++;
2118 }
08ca2aa3
TH
2119 else {
2120 /* possible checksum byte */
f7fe979e
AL
2121 const char *skip = s+UTF8SKIP(s);
2122 if (skip < strend && *skip == '\n')
2123 s = skip+1;
08ca2aa3
TH
2124 }
2125 }
2126 }
2127 } else {
fc241834
RGS
2128 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
2129 I32 a, b, c, d;
db187877 2130 char hunk[3];
a6ec74c1 2131
fc241834
RGS
2132 len = PL_uudmap[*(U8*)s++] & 077;
2133 while (len > 0) {
2134 if (s < strend && ISUUCHAR(*s))
2135 a = PL_uudmap[*(U8*)s++] & 077;
2136 else
2137 a = 0;
2138 if (s < strend && ISUUCHAR(*s))
2139 b = PL_uudmap[*(U8*)s++] & 077;
2140 else
2141 b = 0;
2142 if (s < strend && ISUUCHAR(*s))
2143 c = PL_uudmap[*(U8*)s++] & 077;
2144 else
2145 c = 0;
2146 if (s < strend && ISUUCHAR(*s))
2147 d = PL_uudmap[*(U8*)s++] & 077;
2148 else
2149 d = 0;
2150 hunk[0] = (char)((a << 2) | (b >> 4));
2151 hunk[1] = (char)((b << 4) | (c >> 2));
2152 hunk[2] = (char)((c << 6) | d);
2153 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2154 len -= 3;
2155 }
2156 if (*s == '\n')
2157 s++;
2158 else /* possible checksum byte */
2159 if (s + 1 < strend && s[1] == '\n')
2160 s += 2;
a6ec74c1 2161 }
08ca2aa3
TH
2162 }
2163 XPUSHs(sv);
a6ec74c1
JH
2164 break;
2165 }
49704364 2166
a6ec74c1 2167 if (checksum) {
1109a392 2168 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
92d41999 2169 (checksum > bits_in_uv &&
08ca2aa3
TH
2170 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
2171 NV trouble, anv;
a6ec74c1 2172
08ca2aa3 2173 anv = (NV) (1 << (checksum & 15));
a6ec74c1
JH
2174 while (checksum >= 16) {
2175 checksum -= 16;
08ca2aa3 2176 anv *= 65536.0;
a6ec74c1 2177 }
a6ec74c1 2178 while (cdouble < 0.0)
08ca2aa3
TH
2179 cdouble += anv;
2180 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
c4c5f44a 2181 sv = newSVnv(cdouble);
a6ec74c1
JH
2182 }
2183 else {
fa8ec7c1
NC
2184 if (checksum < bits_in_uv) {
2185 UV mask = ((UV)1 << checksum) - 1;
92d41999 2186 cuv &= mask;
a6ec74c1 2187 }
c4c5f44a 2188 sv = newSVuv(cuv);
a6ec74c1
JH
2189 }
2190 XPUSHs(sv_2mortal(sv));
2191 checksum = 0;
2192 }
fc241834 2193
49704364
LW
2194 if (symptr->flags & FLAG_SLASH){
2195 if (SP - PL_stack_base - start_sp_offset <= 0)
2196 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
2197 if( next_symbol(symptr) ){
2198 if( symptr->howlen == e_number )
2199 Perl_croak(aTHX_ "Count after length/code in unpack" );
2200 if( beyond ){
2201 /* ...end of char buffer then no decent length available */
2202 Perl_croak(aTHX_ "length/code after end of string in unpack" );
2203 } else {
2204 /* take top of stack (hope it's numeric) */
2205 len = POPi;
2206 if( len < 0 )
2207 Perl_croak(aTHX_ "Negative '/' count in unpack" );
2208 }
2209 } else {
2210 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
2211 }
2212 datumtype = symptr->code;
21c16052 2213 explicit_length = FALSE;
49704364
LW
2214 goto redo_switch;
2215 }
a6ec74c1 2216 }
49704364 2217
18529408
IZ
2218 if (new_s)
2219 *new_s = s;
2220 PUTBACK;
2221 return SP - PL_stack_base - start_sp_offset;
2222}
2223
2224PP(pp_unpack)
2225{
97aff369 2226 dVAR;
18529408 2227 dSP;
bab9c0ac 2228 dPOPPOPssrl;
18529408
IZ
2229 I32 gimme = GIMME_V;
2230 STRLEN llen;
2231 STRLEN rlen;
5c144d81
NC
2232 const char *pat = SvPV_const(left, llen);
2233 const char *s = SvPV_const(right, rlen);
f7fe979e
AL
2234 const char *strend = s + rlen;
2235 const char *patend = pat + llen;
08ca2aa3 2236 I32 cnt;
18529408
IZ
2237
2238 PUTBACK;
7accc089 2239 cnt = unpackstring(pat, patend, s, strend,
49704364 2240 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
f337b084 2241 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
49704364 2242
18529408
IZ
2243 SPAGAIN;
2244 if ( !cnt && gimme == G_SCALAR )
2245 PUSHs(&PL_sv_undef);
a6ec74c1
JH
2246 RETURN;
2247}
2248
f337b084 2249STATIC U8 *
f7fe979e 2250doencodes(U8 *h, const char *s, I32 len)
a6ec74c1 2251{
f337b084 2252 *h++ = PL_uuemap[len];
a6ec74c1 2253 while (len > 2) {
f337b084
TH
2254 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2255 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
2256 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
2257 *h++ = PL_uuemap[(077 & (s[2] & 077))];
a6ec74c1
JH
2258 s += 3;
2259 len -= 3;
2260 }
2261 if (len > 0) {
f7fe979e 2262 const char r = (len > 1 ? s[1] : '\0');
f337b084
TH
2263 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2264 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
2265 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
2266 *h++ = PL_uuemap[0];
a6ec74c1 2267 }
f337b084
TH
2268 *h++ = '\n';
2269 return h;
a6ec74c1
JH
2270}
2271
2272STATIC SV *
f7fe979e 2273S_is_an_int(pTHX_ const char *s, STRLEN l)
a6ec74c1 2274{
8b6e33c7
AL
2275 SV *result = newSVpvn(s, l);
2276 char *const result_c = SvPV_nolen(result); /* convenience */
2277 char *out = result_c;
2278 bool skip = 1;
2279 bool ignore = 0;
a6ec74c1
JH
2280
2281 while (*s) {
2282 switch (*s) {
2283 case ' ':
2284 break;
2285 case '+':
2286 if (!skip) {
2287 SvREFCNT_dec(result);
2288 return (NULL);
2289 }
2290 break;
2291 case '0':
2292 case '1':
2293 case '2':
2294 case '3':
2295 case '4':
2296 case '5':
2297 case '6':
2298 case '7':
2299 case '8':
2300 case '9':
2301 skip = 0;
2302 if (!ignore) {
2303 *(out++) = *s;
2304 }
2305 break;
2306 case '.':
2307 ignore = 1;
2308 break;
2309 default:
2310 SvREFCNT_dec(result);
2311 return (NULL);
2312 }
2313 s++;
2314 }
2315 *(out++) = '\0';
2316 SvCUR_set(result, out - result_c);
2317 return (result);
2318}
2319
2320/* pnum must be '\0' terminated */
2321STATIC int
2322S_div128(pTHX_ SV *pnum, bool *done)
2323{
8b6e33c7
AL
2324 STRLEN len;
2325 char * const s = SvPV(pnum, len);
2326 char *t = s;
2327 int m = 0;
2328
2329 *done = 1;
2330 while (*t) {
2331 const int i = m * 10 + (*t - '0');
2332 const int r = (i >> 7); /* r < 10 */
2333 m = i & 0x7F;
2334 if (r) {
2335 *done = 0;
2336 }
2337 *(t++) = '0' + r;
a6ec74c1 2338 }
8b6e33c7
AL
2339 *(t++) = '\0';
2340 SvCUR_set(pnum, (STRLEN) (t - s));
2341 return (m);
a6ec74c1
JH
2342}
2343
18529408 2344/*
7accc089
JH
2345=for apidoc packlist
2346
2347The engine implementing pack() Perl function.
2348
bfce84ec
AL
2349=cut
2350*/
7accc089
JH
2351
2352void
f7fe979e 2353Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, register SV **beglist, SV **endlist )
7accc089 2354{
97aff369 2355 dVAR;
aadb217d
JH
2356 tempsym_t sym;
2357
f7fe979e 2358 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
49704364 2359
f337b084
TH
2360 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
2361 Also make sure any UTF8 flag is loaded */
56eb0262 2362 SvPV_force_nolen(cat);
bfce84ec
AL
2363 if (DO_UTF8(cat))
2364 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
f337b084 2365
49704364
LW
2366 (void)pack_rec( cat, &sym, beglist, endlist );
2367}
2368
f337b084
TH
2369/* like sv_utf8_upgrade, but also repoint the group start markers */
2370STATIC void
2371marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2372 STRLEN len;
2373 tempsym_t *group;
f7fe979e
AL
2374 const char *from_ptr, *from_start, *from_end, **marks, **m;
2375 char *to_start, *to_ptr;
f337b084
TH
2376
2377 if (SvUTF8(sv)) return;
2378
aa07b2f6 2379 from_start = SvPVX_const(sv);
f337b084
TH
2380 from_end = from_start + SvCUR(sv);
2381 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2382 if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
2383 if (from_ptr == from_end) {
2384 /* Simple case: no character needs to be changed */
2385 SvUTF8_on(sv);
2386 return;
2387 }
2388
3473cf63 2389 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
a02a5408 2390 Newx(to_start, len, char);
f337b084
TH
2391 Copy(from_start, to_start, from_ptr-from_start, char);
2392 to_ptr = to_start + (from_ptr-from_start);
2393
a02a5408 2394 Newx(marks, sym_ptr->level+2, const char *);
f337b084
TH
2395 for (group=sym_ptr; group; group = group->previous)
2396 marks[group->level] = from_start + group->strbeg;
2397 marks[sym_ptr->level+1] = from_end+1;
2398 for (m = marks; *m < from_ptr; m++)
2399 *m = to_start + (*m-from_start);
2400
2401 for (;from_ptr < from_end; from_ptr++) {
2402 while (*m == from_ptr) *m++ = to_ptr;
230e1fce 2403 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
f337b084
TH
2404 }
2405 *to_ptr = 0;
2406
2407 while (*m == from_ptr) *m++ = to_ptr;
2408 if (m != marks + sym_ptr->level+1) {
2409 Safefree(marks);
2410 Safefree(to_start);
2411 Perl_croak(aTHX_ "Assertion: marks beyond string end");
2412 }
2413 for (group=sym_ptr; group; group = group->previous)
2414 group->strbeg = marks[group->level] - to_start;
2415 Safefree(marks);
2416
2417 if (SvOOK(sv)) {
2418 if (SvIVX(sv)) {
b162af07 2419 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
f337b084
TH
2420 from_start -= SvIVX(sv);
2421 SvIV_set(sv, 0);
2422 }
2423 SvFLAGS(sv) &= ~SVf_OOK;
2424 }
2425 if (SvLEN(sv) != 0)
2426 Safefree(from_start);
f880fe2f 2427 SvPV_set(sv, to_start);
b162af07
SP
2428 SvCUR_set(sv, to_ptr - to_start);
2429 SvLEN_set(sv, len);
f337b084
TH
2430 SvUTF8_on(sv);
2431}
2432
2433/* Exponential string grower. Makes string extension effectively O(n)
2434 needed says how many extra bytes we need (not counting the final '\0')
2435 Only grows the string if there is an actual lack of space
2436*/
2437STATIC char *
0bd48802 2438S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
f7fe979e
AL
2439 const STRLEN cur = SvCUR(sv);
2440 const STRLEN len = SvLEN(sv);
f337b084
TH
2441 STRLEN extend;
2442 if (len - cur > needed) return SvPVX(sv);
2443 extend = needed > len ? needed : len;
2444 return SvGROW(sv, len+extend+1);
2445}
49704364
LW
2446
2447STATIC
2448SV **
f337b084 2449S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
49704364 2450{
97aff369 2451 dVAR;
49704364 2452 tempsym_t lookahead;
f337b084
TH
2453 I32 items = endlist - beglist;
2454 bool found = next_symbol(symptr);
2455 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
041457d9 2456 bool warn_utf8 = ckWARN(WARN_UTF8);
f337b084
TH
2457
2458 if (symptr->level == 0 && found && symptr->code == 'U') {
2459 marked_upgrade(aTHX_ cat, symptr);
2460 symptr->flags |= FLAG_DO_UTF8;
2461 utf8 = 0;
49704364 2462 }
f337b084 2463 symptr->strbeg = SvCUR(cat);
49704364
LW
2464
2465 while (found) {
f337b084
TH
2466 SV *fromstr;
2467 STRLEN fromlen;
2468 I32 len;
a0714e2c 2469 SV *lengthcode = NULL;
49704364 2470 I32 datumtype = symptr->code;
f337b084
TH
2471 howlen_t howlen = symptr->howlen;
2472 char *start = SvPVX(cat);
2473 char *cur = start + SvCUR(cat);
49704364 2474
f337b084
TH
2475#define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2476
2477 switch (howlen) {
fc241834 2478 case e_star:
f337b084
TH
2479 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2480 0 : items;
2481 break;
2482 default:
2483 /* e_no_len and e_number */
2484 len = symptr->length;
49704364
LW
2485 break;
2486 }
2487
f337b084 2488 if (len) {
a7a3cfaa 2489 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
f337b084 2490
a7a3cfaa
TH
2491 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2492 /* We can process this letter. */
2493 STRLEN size = props & PACK_SIZE_MASK;
2494 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2495 }
f337b084
TH
2496 }
2497
49704364
LW
2498 /* Look ahead for next symbol. Do we have code/code? */
2499 lookahead = *symptr;
2500 found = next_symbol(&lookahead);
246f24af
TH
2501 if (symptr->flags & FLAG_SLASH) {
2502 IV count;
f337b084 2503 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
246f24af
TH
2504 if (strchr("aAZ", lookahead.code)) {
2505 if (lookahead.howlen == e_number) count = lookahead.length;
2506 else {
ce399ba6
NC
2507 if (items > 0) {
2508 if (SvGAMAGIC(*beglist)) {
2509 /* Avoid reading the active data more than once
2510 by copying it to a temporary. */
2511 STRLEN len;
2512 const char *const pv = SvPV_const(*beglist, len);
2513 SV *const temp = sv_2mortal(newSVpvn(pv, len));
2514 if (SvUTF8(*beglist))
2515 SvUTF8_on(temp);
2516 *beglist = temp;
2517 }
246f24af
TH
2518 count = DO_UTF8(*beglist) ?
2519 sv_len_utf8(*beglist) : sv_len(*beglist);
ce399ba6 2520 }
246f24af
TH
2521 else count = 0;
2522 if (lookahead.code == 'Z') count++;
2523 }
2524 } else {
2525 if (lookahead.howlen == e_number && lookahead.length < items)
2526 count = lookahead.length;
2527 else count = items;
2528 }
2529 lookahead.howlen = e_number;
2530 lookahead.length = count;
2531 lengthcode = sv_2mortal(newSViv(count));
a6ec74c1 2532 }
49704364 2533
fc241834
RGS
2534 /* Code inside the switch must take care to properly update
2535 cat (CUR length and '\0' termination) if it updated *cur and
f337b084 2536 doesn't simply leave using break */
1109a392 2537 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 2538 default:
f337b084
TH
2539 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2540 (int) TYPE_NO_MODIFIERS(datumtype));
a6ec74c1 2541 case '%':
49704364 2542 Perl_croak(aTHX_ "'%%' may not be used in pack");
28be1210
TH
2543 {
2544 char *from;
2545#ifdef PERL_PACK_CAN_SHRIEKSIGN
2546 case '.' | TYPE_IS_SHRIEKING:
2547#endif
2548 case '.':
2549 if (howlen == e_star) from = start;
2550 else if (len == 0) from = cur;
2551 else {
2552 tempsym_t *group = symptr;
2553
2554 while (--len && group) group = group->previous;
2555 from = group ? start + group->strbeg : start;
2556 }
2557 fromstr = NEXTFROM;
2558 len = SvIV(fromstr);
2559 goto resize;
2560#ifdef PERL_PACK_CAN_SHRIEKSIGN
2561 case '@' | TYPE_IS_SHRIEKING:
2562#endif
a6ec74c1 2563 case '@':
28be1210
TH
2564 from = start + symptr->strbeg;
2565 resize:
2566#ifdef PERL_PACK_CAN_SHRIEKSIGN
2567 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
2568#else /* PERL_PACK_CAN_SHRIEKSIGN */
2569 if (utf8)
2570#endif
2571 if (len >= 0) {
2572 while (len && from < cur) {
2573 from += UTF8SKIP(from);
2574 len--;
2575 }
2576 if (from > cur)
2577 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2578 if (len) {
2579 /* Here we know from == cur */
2580 grow:
2581 GROWING(0, cat, start, cur, len);
2582 Zero(cur, len, char);
2583 cur += len;
2584 } else if (from < cur) {
2585 len = cur - from;
2586 goto shrink;
2587 } else goto no_change;
2588 } else {
2589 cur = from;
2590 len = -len;
2591 goto utf8_shrink;
f337b084 2592 }
28be1210
TH
2593 else {
2594 len -= cur - from;
f337b084 2595 if (len > 0) goto grow;
28be1210 2596 if (len == 0) goto no_change;
fc241834 2597 len = -len;
28be1210 2598 goto shrink;
f337b084 2599 }
a6ec74c1 2600 break;
28be1210 2601 }
fc241834 2602 case '(': {
49704364 2603 tempsym_t savsym = *symptr;
66c611c5
MHM
2604 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2605 symptr->flags |= group_modifiers;
49704364
LW
2606 symptr->patend = savsym.grpend;
2607 symptr->level++;
f337b084 2608 symptr->previous = &lookahead;
18529408 2609 while (len--) {
f337b084
TH
2610 U32 was_utf8;
2611 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2612 else symptr->flags &= ~FLAG_PARSE_UTF8;
2613 was_utf8 = SvUTF8(cat);
49704364 2614 symptr->patptr = savsym.grpbeg;
f337b084
TH
2615 beglist = pack_rec(cat, symptr, beglist, endlist);
2616 if (SvUTF8(cat) != was_utf8)
2617 /* This had better be an upgrade while in utf8==0 mode */
2618 utf8 = 1;
2619
49704364 2620 if (savsym.howlen == e_star && beglist == endlist)
18529408
IZ
2621 break; /* No way to continue */
2622 }
ee790063 2623 items = endlist - beglist;
f337b084
TH
2624 lookahead.flags = symptr->flags & ~group_modifiers;
2625 goto no_change;
18529408 2626 }
62f95557
IZ
2627 case 'X' | TYPE_IS_SHRIEKING:
2628 if (!len) /* Avoid division by 0 */
2629 len = 1;
f337b084
TH
2630 if (utf8) {
2631 char *hop, *last;
2632 I32 l = len;
2633 hop = last = start;
2634 while (hop < cur) {
2635 hop += UTF8SKIP(hop);
2636 if (--l == 0) {
2637 last = hop;
2638 l = len;
2639 }
2640 }
2641 if (last > cur)
2642 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2643 cur = last;
2644 break;
2645 }
2646 len = (cur-start) % len;
62f95557 2647 /* FALL THROUGH */
a6ec74c1 2648 case 'X':
f337b084
TH
2649 if (utf8) {
2650 if (len < 1) goto no_change;
28be1210 2651 utf8_shrink:
f337b084
TH
2652 while (len > 0) {
2653 if (cur <= start)
28be1210
TH
2654 Perl_croak(aTHX_ "'%c' outside of string in pack",
2655 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2656 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2657 if (cur <= start)
28be1210
TH
2658 Perl_croak(aTHX_ "'%c' outside of string in pack",
2659 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2660 }
2661 len--;
2662 }
2663 } else {
fc241834 2664 shrink:
f337b084 2665 if (cur - start < len)
28be1210
TH
2666 Perl_croak(aTHX_ "'%c' outside of string in pack",
2667 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2668 cur -= len;
2669 }
2670 if (cur < start+symptr->strbeg) {
2671 /* Make sure group starts don't point into the void */
2672 tempsym_t *group;
9e27e96a 2673 const STRLEN length = cur-start;
f337b084
TH
2674 for (group = symptr;
2675 group && length < group->strbeg;
2676 group = group->previous) group->strbeg = length;
2677 lookahead.strbeg = length;
2678 }
a6ec74c1 2679 break;
fc241834
RGS
2680 case 'x' | TYPE_IS_SHRIEKING: {
2681 I32 ai32;
62f95557
IZ
2682 if (!len) /* Avoid division by 0 */
2683 len = 1;
230e1fce 2684 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
fc241834
RGS
2685 else ai32 = (cur - start) % len;
2686 if (ai32 == 0) goto no_change;
2687 len -= ai32;
2688 }
2689 /* FALL THROUGH */
a6ec74c1 2690 case 'x':
f337b084 2691 goto grow;
a6ec74c1
JH
2692 case 'A':
2693 case 'Z':
f337b084 2694 case 'a': {
f7fe979e 2695 const char *aptr;
f337b084 2696
a6ec74c1 2697 fromstr = NEXTFROM;
e62f0680 2698 aptr = SvPV_const(fromstr, fromlen);
f337b084 2699 if (DO_UTF8(fromstr)) {
f7fe979e 2700 const char *end, *s;
f337b084
TH
2701
2702 if (!utf8 && !SvUTF8(cat)) {
2703 marked_upgrade(aTHX_ cat, symptr);
2704 lookahead.flags |= FLAG_DO_UTF8;
2705 lookahead.strbeg = symptr->strbeg;
2706 utf8 = 1;
2707 start = SvPVX(cat);
2708 cur = start + SvCUR(cat);
2709 }
fc241834 2710 if (howlen == e_star) {
f337b084
TH
2711 if (utf8) goto string_copy;
2712 len = fromlen+1;
2713 }
2714 s = aptr;
2715 end = aptr + fromlen;
2716 fromlen = datumtype == 'Z' ? len-1 : len;
2717 while ((I32) fromlen > 0 && s < end) {
2718 s += UTF8SKIP(s);
2719 fromlen--;
2720 }
2721 if (s > end)
2722 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2723 if (utf8) {
fc241834 2724 len = fromlen;
f337b084
TH
2725 if (datumtype == 'Z') len++;
2726 fromlen = s-aptr;
2727 len += fromlen;
fc241834 2728
f337b084 2729 goto string_copy;
fc241834 2730 }
f337b084
TH
2731 fromlen = len - fromlen;
2732 if (datumtype == 'Z') fromlen--;
2733 if (howlen == e_star) {
2734 len = fromlen;
2735 if (datumtype == 'Z') len++;
fc241834 2736 }
f337b084 2737 GROWING(0, cat, start, cur, len);
fc241834 2738 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
f337b084
TH
2739 datumtype | TYPE_IS_PACK))
2740 Perl_croak(aTHX_ "Perl bug: predicted utf8 length not available");
2741 cur += fromlen;
a6ec74c1 2742 len -= fromlen;
f337b084
TH
2743 } else if (utf8) {
2744 if (howlen == e_star) {
2745 len = fromlen;
2746 if (datumtype == 'Z') len++;
a6ec74c1 2747 }
f337b084
TH
2748 if (len <= (I32) fromlen) {
2749 fromlen = len;
2750 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2751 }
fc241834 2752 /* assumes a byte expands to at most UTF8_EXPAND bytes on
3473cf63
RGS
2753 upgrade, so:
2754 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2755 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
f337b084
TH
2756 len -= fromlen;
2757 while (fromlen > 0) {
230e1fce 2758 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
f337b084
TH
2759 aptr++;
2760 fromlen--;
fc241834 2761 }
f337b084
TH
2762 } else {
2763 string_copy:
2764 if (howlen == e_star) {
2765 len = fromlen;
2766 if (datumtype == 'Z') len++;
2767 }
2768 if (len <= (I32) fromlen) {
2769 fromlen = len;
2770 if (datumtype == 'Z' && fromlen > 0) fromlen--;
a6ec74c1 2771 }
f337b084
TH
2772 GROWING(0, cat, start, cur, len);
2773 Copy(aptr, cur, fromlen, char);
2774 cur += fromlen;
2775 len -= fromlen;
a6ec74c1 2776 }
f337b084
TH
2777 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2778 cur += len;
a6ec74c1 2779 break;
f337b084 2780 }
a6ec74c1 2781 case 'B':
f337b084 2782 case 'b': {
b83604b4 2783 const char *str, *end;
f337b084
TH
2784 I32 l, field_len;
2785 U8 bits;
2786 bool utf8_source;
2787 U32 utf8_flags;
a6ec74c1 2788
fc241834 2789 fromstr = NEXTFROM;
b83604b4 2790 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2791 end = str + fromlen;
2792 if (DO_UTF8(fromstr)) {
2793 utf8_source = TRUE;
041457d9 2794 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2795 } else {
2796 utf8_source = FALSE;
2797 utf8_flags = 0; /* Unused, but keep compilers happy */
2798 }
2799 if (howlen == e_star) len = fromlen;
2800 field_len = (len+7)/8;
2801 GROWING(utf8, cat, start, cur, field_len);
2802 if (len > (I32)fromlen) len = fromlen;
2803 bits = 0;
2804 l = 0;
2805 if (datumtype == 'B')
2806 while (l++ < len) {
2807 if (utf8_source) {
95b63a38 2808 UV val = 0;
f337b084
TH
2809 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2810 bits |= val & 1;
2811 } else bits |= *str++ & 1;
2812 if (l & 7) bits <<= 1;
fc241834 2813 else {
f337b084
TH
2814 PUSH_BYTE(utf8, cur, bits);
2815 bits = 0;
a6ec74c1
JH
2816 }
2817 }
f337b084
TH
2818 else
2819 /* datumtype == 'b' */
2820 while (l++ < len) {
2821 if (utf8_source) {
95b63a38 2822 UV val = 0;
f337b084
TH
2823 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2824 if (val & 1) bits |= 0x80;
2825 } else if (*str++ & 1)
2826 bits |= 0x80;
2827 if (l & 7) bits >>= 1;
fc241834 2828 else {
f337b084
TH
2829 PUSH_BYTE(utf8, cur, bits);
2830 bits = 0;
a6ec74c1
JH
2831 }
2832 }
f337b084
TH
2833 l--;
2834 if (l & 7) {
fc241834 2835 if (datumtype == 'B')
f337b084 2836 bits <<= 7 - (l & 7);
fc241834 2837 else
f337b084
TH
2838 bits >>= 7 - (l & 7);
2839 PUSH_BYTE(utf8, cur, bits);
2840 l += 7;
a6ec74c1 2841 }
f337b084
TH
2842 /* Determine how many chars are left in the requested field */
2843 l /= 8;
2844 if (howlen == e_star) field_len = 0;
2845 else field_len -= l;
2846 Zero(cur, field_len, char);
2847 cur += field_len;
a6ec74c1 2848 break;
f337b084 2849 }
a6ec74c1 2850 case 'H':
f337b084 2851 case 'h': {
10516c54 2852 const char *str, *end;
f337b084
TH
2853 I32 l, field_len;
2854 U8 bits;
2855 bool utf8_source;
2856 U32 utf8_flags;
a6ec74c1 2857
fc241834 2858 fromstr = NEXTFROM;
10516c54 2859 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2860 end = str + fromlen;
2861 if (DO_UTF8(fromstr)) {
2862 utf8_source = TRUE;
041457d9 2863 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2864 } else {
2865 utf8_source = FALSE;
2866 utf8_flags = 0; /* Unused, but keep compilers happy */
2867 }
2868 if (howlen == e_star) len = fromlen;
2869 field_len = (len+1)/2;
2870 GROWING(utf8, cat, start, cur, field_len);
2871 if (!utf8 && len > (I32)fromlen) len = fromlen;
2872 bits = 0;
2873 l = 0;
2874 if (datumtype == 'H')
2875 while (l++ < len) {
2876 if (utf8_source) {
95b63a38 2877 UV val = 0;
f337b084
TH
2878 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2879 if (val < 256 && isALPHA(val))
2880 bits |= (val + 9) & 0xf;
a6ec74c1 2881 else
f337b084
TH
2882 bits |= val & 0xf;
2883 } else if (isALPHA(*str))
2884 bits |= (*str++ + 9) & 0xf;
2885 else
2886 bits |= *str++ & 0xf;
2887 if (l & 1) bits <<= 4;
fc241834 2888 else {
f337b084
TH
2889 PUSH_BYTE(utf8, cur, bits);
2890 bits = 0;
a6ec74c1
JH
2891 }
2892 }
f337b084
TH
2893 else
2894 while (l++ < len) {
2895 if (utf8_source) {
95b63a38 2896 UV val = 0;
f337b084
TH
2897 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2898 if (val < 256 && isALPHA(val))
2899 bits |= ((val + 9) & 0xf) << 4;
a6ec74c1 2900 else
f337b084
TH
2901 bits |= (val & 0xf) << 4;
2902 } else if (isALPHA(*str))
2903 bits |= ((*str++ + 9) & 0xf) << 4;
2904 else
2905 bits |= (*str++ & 0xf) << 4;
2906 if (l & 1) bits >>= 4;
fc241834 2907 else {
f337b084
TH
2908 PUSH_BYTE(utf8, cur, bits);
2909 bits = 0;
a6ec74c1 2910 }
fc241834 2911 }
f337b084
TH
2912 l--;
2913 if (l & 1) {
2914 PUSH_BYTE(utf8, cur, bits);
2915 l++;
2916 }
2917 /* Determine how many chars are left in the requested field */
2918 l /= 2;
2919 if (howlen == e_star) field_len = 0;
2920 else field_len -= l;
2921 Zero(cur, field_len, char);
2922 cur += field_len;
2923 break;
fc241834
RGS
2924 }
2925 case 'c':
f337b084
TH
2926 while (len-- > 0) {
2927 IV aiv;
2928 fromstr = NEXTFROM;
2929 aiv = SvIV(fromstr);
2930 if ((-128 > aiv || aiv > 127) &&
2931 ckWARN(WARN_PACK))
2932 Perl_warner(aTHX_ packWARN(WARN_PACK),
2933 "Character in 'c' format wrapped in pack");
585ec06d 2934 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
a6ec74c1
JH
2935 }
2936 break;
2937 case 'C':
f337b084
TH
2938 if (len == 0) {
2939 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2940 break;
2941 }
a6ec74c1 2942 while (len-- > 0) {
f337b084 2943 IV aiv;
a6ec74c1 2944 fromstr = NEXTFROM;
f337b084
TH
2945 aiv = SvIV(fromstr);
2946 if ((0 > aiv || aiv > 0xff) &&
fc241834
RGS
2947 ckWARN(WARN_PACK))
2948 Perl_warner(aTHX_ packWARN(WARN_PACK),
2949 "Character in 'C' format wrapped in pack");
1651fc44 2950 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
f337b084 2951 }
fc241834
RGS
2952 break;
2953 case 'W': {
2954 char *end;
670f1322 2955 U8 in_bytes = (U8)IN_BYTES;
fc241834
RGS
2956
2957 end = start+SvLEN(cat)-1;
2958 if (utf8) end -= UTF8_MAXLEN-1;
2959 while (len-- > 0) {
2960 UV auv;
2961 fromstr = NEXTFROM;
2962 auv = SvUV(fromstr);
2963 if (in_bytes) auv = auv % 0x100;
2964 if (utf8) {
2965 W_utf8:
2966 if (cur > end) {
2967 *cur = '\0';
b162af07 2968 SvCUR_set(cat, cur - start);
fc241834
RGS
2969
2970 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2971 end = start+SvLEN(cat)-UTF8_MAXLEN;
2972 }
230e1fce
NC
2973 cur = (char *) uvuni_to_utf8_flags((U8 *) cur,
2974 NATIVE_TO_UNI(auv),
041457d9 2975 warn_utf8 ?
230e1fce 2976 0 : UNICODE_ALLOW_ANY);
fc241834
RGS
2977 } else {
2978 if (auv >= 0x100) {
2979 if (!SvUTF8(cat)) {
2980 *cur = '\0';
b162af07 2981 SvCUR_set(cat, cur - start);
fc241834
RGS
2982 marked_upgrade(aTHX_ cat, symptr);
2983 lookahead.flags |= FLAG_DO_UTF8;
2984 lookahead.strbeg = symptr->strbeg;
2985 utf8 = 1;
2986 start = SvPVX(cat);
2987 cur = start + SvCUR(cat);
2988 end = start+SvLEN(cat)-UTF8_MAXLEN;
2989 goto W_utf8;
2990 }
2991 if (ckWARN(WARN_PACK))
2992 Perl_warner(aTHX_ packWARN(WARN_PACK),
2993 "Character in 'W' format wrapped in pack");
2994 auv &= 0xff;
2995 }
2996 if (cur >= end) {
2997 *cur = '\0';
b162af07 2998 SvCUR_set(cat, cur - start);
fc241834
RGS
2999 GROWING(0, cat, start, cur, len+1);
3000 end = start+SvLEN(cat)-1;
3001 }
fe2774ed 3002 *(U8 *) cur++ = (U8)auv;
a6ec74c1
JH
3003 }
3004 }
3005 break;
fc241834
RGS
3006 }
3007 case 'U': {
3008 char *end;
3009
3010 if (len == 0) {
3011 if (!(symptr->flags & FLAG_DO_UTF8)) {
3012 marked_upgrade(aTHX_ cat, symptr);
3013 lookahead.flags |= FLAG_DO_UTF8;
3014 lookahead.strbeg = symptr->strbeg;
3015 }
3016 utf8 = 0;
3017 goto no_change;
3018 }
3019
3020 end = start+SvLEN(cat);
3021 if (!utf8) end -= UTF8_MAXLEN;
a6ec74c1 3022 while (len-- > 0) {
fc241834 3023 UV auv;
a6ec74c1 3024 fromstr = NEXTFROM;
fc241834
RGS
3025 auv = SvUV(fromstr);
3026 if (utf8) {
230e1fce 3027 U8 buffer[UTF8_MAXLEN], *endb;
fc241834 3028 endb = uvuni_to_utf8_flags(buffer, auv,
041457d9 3029 warn_utf8 ?
fc241834
RGS
3030 0 : UNICODE_ALLOW_ANY);
3031 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
3032 *cur = '\0';
b162af07 3033 SvCUR_set(cat, cur - start);
fc241834
RGS
3034 GROWING(0, cat, start, cur,
3035 len+(endb-buffer)*UTF8_EXPAND);
3036 end = start+SvLEN(cat);
3037 }
64844641 3038 cur = bytes_to_uni(buffer, endb-buffer, cur);
fc241834
RGS
3039 } else {
3040 if (cur >= end) {
3041 *cur = '\0';
b162af07 3042 SvCUR_set(cat, cur - start);
fc241834
RGS
3043 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3044 end = start+SvLEN(cat)-UTF8_MAXLEN;
3045 }
230e1fce 3046 cur = (char *) uvuni_to_utf8_flags((U8 *) cur, auv,
041457d9 3047 warn_utf8 ?
230e1fce 3048 0 : UNICODE_ALLOW_ANY);
fc241834 3049 }
a6ec74c1 3050 }
a6ec74c1 3051 break;
fc241834 3052 }
a6ec74c1
JH
3053 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
3054 case 'f':
a6ec74c1 3055 while (len-- > 0) {
f337b084
TH
3056 float afloat;
3057 NV anv;
a6ec74c1 3058 fromstr = NEXTFROM;
f337b084 3059 anv = SvNV(fromstr);
5cdb9e01 3060#ifdef __VOS__
f337b084 3061 /* VOS does not automatically map a floating-point overflow
fc241834
RGS
3062 during conversion from double to float into infinity, so we
3063 do it by hand. This code should either be generalized for
3064 any OS that needs it, or removed if and when VOS implements
3065 posix-976 (suggestion to support mapping to infinity).
3066 Paul.Green@stratus.com 02-04-02. */
f337b084 3067 if (anv > FLT_MAX)
fc241834 3068 afloat = _float_constants[0]; /* single prec. inf. */
f337b084 3069 else if (anv < -FLT_MAX)
fc241834 3070 afloat = _float_constants[0]; /* single prec. inf. */
f337b084
TH
3071 else afloat = (float) anv;
3072#else /* __VOS__ */
baf3cf9c 3073# if defined(VMS) && !defined(__IEEE_FP)
f337b084 3074 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
3075 * on Alpha; fake it if we don't have them.
3076 */
f337b084 3077 if (anv > FLT_MAX)
fc241834 3078 afloat = FLT_MAX;
f337b084 3079 else if (anv < -FLT_MAX)
fc241834 3080 afloat = -FLT_MAX;
f337b084 3081 else afloat = (float)anv;
baf3cf9c 3082# else
f337b084 3083 afloat = (float)anv;
baf3cf9c 3084# endif
f337b084 3085#endif /* __VOS__ */
1109a392 3086 DO_BO_PACK_N(afloat, float);
f337b084 3087 PUSH_VAR(utf8, cur, afloat);
a6ec74c1
JH
3088 }
3089 break;
3090 case 'd':
a6ec74c1 3091 while (len-- > 0) {
f337b084
TH
3092 double adouble;
3093 NV anv;
a6ec74c1 3094 fromstr = NEXTFROM;
f337b084 3095 anv = SvNV(fromstr);
5cdb9e01 3096#ifdef __VOS__
f337b084 3097 /* VOS does not automatically map a floating-point overflow
fc241834
RGS
3098 during conversion from long double to double into infinity,
3099 so we do it by hand. This code should either be generalized
3100 for any OS that needs it, or removed if and when VOS
3101 implements posix-976 (suggestion to support mapping to
3102 infinity). Paul.Green@stratus.com 02-04-02. */
f337b084 3103 if (anv > DBL_MAX)
fc241834 3104 adouble = _double_constants[0]; /* double prec. inf. */
f337b084 3105 else if (anv < -DBL_MAX)
fc241834 3106 adouble = _double_constants[0]; /* double prec. inf. */
f337b084
TH
3107 else adouble = (double) anv;
3108#else /* __VOS__ */
baf3cf9c 3109# if defined(VMS) && !defined(__IEEE_FP)
f337b084 3110 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
3111 * on Alpha; fake it if we don't have them.
3112 */
f337b084 3113 if (anv > DBL_MAX)
fc241834 3114 adouble = DBL_MAX;
f337b084 3115 else if (anv < -DBL_MAX)
fc241834 3116 adouble = -DBL_MAX;
f337b084 3117 else adouble = (double)anv;
baf3cf9c 3118# else
f337b084 3119 adouble = (double)anv;
baf3cf9c 3120# endif
f337b084 3121#endif /* __VOS__ */
1109a392 3122 DO_BO_PACK_N(adouble, double);
f337b084 3123 PUSH_VAR(utf8, cur, adouble);
a6ec74c1
JH
3124 }
3125 break;
fc241834
RGS
3126 case 'F': {
3127 NV anv;
1109a392 3128 Zero(&anv, 1, NV); /* can be long double with unused bits */
92d41999
JH
3129 while (len-- > 0) {
3130 fromstr = NEXTFROM;
3131 anv = SvNV(fromstr);
1109a392 3132 DO_BO_PACK_N(anv, NV);
fc241834 3133 PUSH_VAR(utf8, cur, anv);
92d41999
JH
3134 }
3135 break;
fc241834 3136 }
92d41999 3137#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
fc241834
RGS
3138 case 'D': {
3139 long double aldouble;
1109a392
MHM
3140 /* long doubles can have unused bits, which may be nonzero */
3141 Zero(&aldouble, 1, long double);
92d41999
JH
3142 while (len-- > 0) {
3143 fromstr = NEXTFROM;
3144 aldouble = (long double)SvNV(fromstr);
1109a392 3145 DO_BO_PACK_N(aldouble, long double);
fc241834 3146 PUSH_VAR(utf8, cur, aldouble);
92d41999
JH
3147 }
3148 break;
fc241834 3149 }
92d41999 3150#endif
7212898e 3151#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3152 case 'n' | TYPE_IS_SHRIEKING:
7212898e 3153#endif
a6ec74c1
JH
3154 case 'n':
3155 while (len-- > 0) {
f337b084 3156 I16 ai16;
a6ec74c1 3157 fromstr = NEXTFROM;
ef108786 3158 ai16 = (I16)SvIV(fromstr);
a6ec74c1 3159#ifdef HAS_HTONS
ef108786 3160 ai16 = PerlSock_htons(ai16);
a6ec74c1 3161#endif
f337b084 3162 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3163 }
3164 break;
7212898e 3165#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3166 case 'v' | TYPE_IS_SHRIEKING:
7212898e 3167#endif
a6ec74c1
JH
3168 case 'v':
3169 while (len-- > 0) {
f337b084 3170 I16 ai16;
a6ec74c1 3171 fromstr = NEXTFROM;
ef108786 3172 ai16 = (I16)SvIV(fromstr);
a6ec74c1 3173#ifdef HAS_HTOVS
ef108786 3174 ai16 = htovs(ai16);
a6ec74c1 3175#endif
f337b084 3176 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3177 }
3178 break;
49704364 3179 case 'S' | TYPE_IS_SHRIEKING:
a6ec74c1 3180#if SHORTSIZE != SIZE16
fc241834 3181 while (len-- > 0) {
f337b084 3182 unsigned short aushort;
fc241834
RGS
3183 fromstr = NEXTFROM;
3184 aushort = SvUV(fromstr);
3185 DO_BO_PACK(aushort, s);
f337b084 3186 PUSH_VAR(utf8, cur, aushort);
fc241834 3187 }
49704364
LW
3188 break;
3189#else
3190 /* Fall through! */
a6ec74c1 3191#endif
49704364 3192 case 'S':
fc241834 3193 while (len-- > 0) {
f337b084 3194 U16 au16;
fc241834
RGS
3195 fromstr = NEXTFROM;
3196 au16 = (U16)SvUV(fromstr);
3197 DO_BO_PACK(au16, 16);
f337b084 3198 PUSH16(utf8, cur, &au16);
a6ec74c1
JH
3199 }
3200 break;
49704364 3201 case 's' | TYPE_IS_SHRIEKING:
a6ec74c1 3202#if SHORTSIZE != SIZE16
fc241834 3203 while (len-- > 0) {
f337b084 3204 short ashort;
fc241834
RGS
3205 fromstr = NEXTFROM;
3206 ashort = SvIV(fromstr);
3207 DO_BO_PACK(ashort, s);
f337b084 3208 PUSH_VAR(utf8, cur, ashort);
a6ec74c1 3209 }
49704364
LW
3210 break;
3211#else
3212 /* Fall through! */
a6ec74c1 3213#endif
49704364
LW
3214 case 's':
3215 while (len-- > 0) {
f337b084 3216 I16 ai16;
49704364 3217 fromstr = NEXTFROM;
ef108786
MHM
3218 ai16 = (I16)SvIV(fromstr);
3219 DO_BO_PACK(ai16, 16);
f337b084 3220 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3221 }
3222 break;
3223 case 'I':
49704364 3224 case 'I' | TYPE_IS_SHRIEKING:
a6ec74c1 3225 while (len-- > 0) {
f337b084 3226 unsigned int auint;
a6ec74c1
JH
3227 fromstr = NEXTFROM;
3228 auint = SvUV(fromstr);
1109a392 3229 DO_BO_PACK(auint, i);
f337b084 3230 PUSH_VAR(utf8, cur, auint);
a6ec74c1
JH
3231 }
3232 break;
92d41999
JH
3233 case 'j':
3234 while (len-- > 0) {
f337b084 3235 IV aiv;
92d41999
JH
3236 fromstr = NEXTFROM;
3237 aiv = SvIV(fromstr);
1109a392
MHM
3238#if IVSIZE == INTSIZE
3239 DO_BO_PACK(aiv, i);
3240#elif IVSIZE == LONGSIZE
3241 DO_BO_PACK(aiv, l);
3242#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
3243 DO_BO_PACK(aiv, 64);
f337b084
TH
3244#else
3245 Perl_croak(aTHX_ "'j' not supported on this platform");
1109a392 3246#endif
f337b084 3247 PUSH_VAR(utf8, cur, aiv);
92d41999
JH
3248 }
3249 break;
3250 case 'J':
3251 while (len-- > 0) {
f337b084 3252 UV auv;
92d41999
JH
3253 fromstr = NEXTFROM;
3254 auv = SvUV(fromstr);
1109a392
MHM
3255#if UVSIZE == INTSIZE
3256 DO_BO_PACK(auv, i);
3257#elif UVSIZE == LONGSIZE
3258 DO_BO_PACK(auv, l);
3259#elif defined(HAS_QUAD) && UVSIZE == U64SIZE
3260 DO_BO_PACK(auv, 64);
f337b084
TH
3261#else
3262 Perl_croak(aTHX_ "'J' not supported on this platform");
1109a392 3263#endif
f337b084 3264 PUSH_VAR(utf8, cur, auv);
92d41999
JH
3265 }
3266 break;
a6ec74c1
JH
3267 case 'w':
3268 while (len-- > 0) {
f337b084 3269 NV anv;
a6ec74c1 3270 fromstr = NEXTFROM;
15e9f109 3271 anv = SvNV(fromstr);
a6ec74c1 3272
f337b084
TH
3273 if (anv < 0) {
3274 *cur = '\0';
b162af07 3275 SvCUR_set(cat, cur - start);
49704364 3276 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
f337b084 3277 }
a6ec74c1 3278
196b62db
NC
3279 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
3280 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
3281 any negative IVs will have already been got by the croak()
3282 above. IOK is untrue for fractions, so we test them
3283 against UV_MAX_P1. */
f337b084
TH
3284 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
3285 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
a6ec74c1 3286 char *in = buf + sizeof(buf);
196b62db 3287 UV auv = SvUV(fromstr);
a6ec74c1
JH
3288
3289 do {
eb160463 3290 *--in = (char)((auv & 0x7f) | 0x80);
a6ec74c1
JH
3291 auv >>= 7;
3292 } while (auv);
3293 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
3294 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3295 in, (buf + sizeof(buf)) - in);
3296 } else if (SvPOKp(fromstr))
3297 goto w_string;
a6ec74c1 3298 else if (SvNOKp(fromstr)) {
0258719b
NC
3299 /* 10**NV_MAX_10_EXP is the largest power of 10
3300 so 10**(NV_MAX_10_EXP+1) is definately unrepresentable
3301 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
3302 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
3303 And with that many bytes only Inf can overflow.
8f8d40ab
PG
3304 Some C compilers are strict about integral constant
3305 expressions so we conservatively divide by a slightly
3306 smaller integer instead of multiplying by the exact
3307 floating-point value.
0258719b
NC
3308 */
3309#ifdef NV_MAX_10_EXP
f337b084 3310 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
8f8d40ab 3311 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
0258719b 3312#else
f337b084 3313 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
8f8d40ab 3314 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
0258719b 3315#endif
a6ec74c1
JH
3316 char *in = buf + sizeof(buf);
3317
8b6e33c7 3318 anv = Perl_floor(anv);
a6ec74c1 3319 do {
8b6e33c7 3320 const NV next = Perl_floor(anv / 128);
a6ec74c1 3321 if (in <= buf) /* this cannot happen ;-) */
49704364 3322 Perl_croak(aTHX_ "Cannot compress integer in pack");
0258719b 3323 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
15e9f109
NC
3324 anv = next;
3325 } while (anv > 0);
a6ec74c1 3326 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
3327 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3328 in, (buf + sizeof(buf)) - in);
3329 } else {
8b6e33c7
AL
3330 const char *from;
3331 char *result, *in;
735b914b
JH
3332 SV *norm;
3333 STRLEN len;
3334 bool done;
3335
f337b084 3336 w_string:
735b914b 3337 /* Copy string and check for compliance */
349d4f2f 3338 from = SvPV_const(fromstr, len);
735b914b 3339 if ((norm = is_an_int(from, len)) == NULL)
49704364 3340 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
735b914b 3341
a02a5408 3342 Newx(result, len, char);
735b914b
JH
3343 in = result + len;
3344 done = FALSE;
f337b084 3345 while (!done) *--in = div128(norm, &done) | 0x80;
735b914b 3346 result[len - 1] &= 0x7F; /* clear continue bit */
f337b084
TH
3347 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3348 in, (result + len) - in);
735b914b
JH
3349 Safefree(result);
3350 SvREFCNT_dec(norm); /* free norm */
fc241834 3351 }
a6ec74c1
JH
3352 }
3353 break;
3354 case 'i':
49704364 3355 case 'i' | TYPE_IS_SHRIEKING:
a6ec74c1 3356 while (len-- > 0) {
f337b084 3357 int aint;
a6ec74c1
JH
3358 fromstr = NEXTFROM;
3359 aint = SvIV(fromstr);
1109a392 3360 DO_BO_PACK(aint, i);
f337b084 3361 PUSH_VAR(utf8, cur, aint);
a6ec74c1
JH
3362 }
3363 break;
7212898e 3364#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3365 case 'N' | TYPE_IS_SHRIEKING:
7212898e 3366#endif
a6ec74c1
JH
3367 case 'N':
3368 while (len-- > 0) {
f337b084 3369 U32 au32;
a6ec74c1 3370 fromstr = NEXTFROM;
ef108786 3371 au32 = SvUV(fromstr);
a6ec74c1 3372#ifdef HAS_HTONL
ef108786 3373 au32 = PerlSock_htonl(au32);
a6ec74c1 3374#endif
f337b084 3375 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3376 }
3377 break;
7212898e 3378#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3379 case 'V' | TYPE_IS_SHRIEKING:
7212898e 3380#endif
a6ec74c1
JH
3381 case 'V':
3382 while (len-- > 0) {
f337b084 3383 U32 au32;
a6ec74c1 3384 fromstr = NEXTFROM;
ef108786 3385 au32 = SvUV(fromstr);
a6ec74c1 3386#ifdef HAS_HTOVL
ef108786 3387 au32 = htovl(au32);
a6ec74c1 3388#endif
f337b084 3389 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3390 }
3391 break;
49704364 3392 case 'L' | TYPE_IS_SHRIEKING:
a6ec74c1 3393#if LONGSIZE != SIZE32
fc241834 3394 while (len-- > 0) {
f337b084 3395 unsigned long aulong;
fc241834
RGS
3396 fromstr = NEXTFROM;
3397 aulong = SvUV(fromstr);
3398 DO_BO_PACK(aulong, l);
f337b084 3399 PUSH_VAR(utf8, cur, aulong);
a6ec74c1 3400 }
49704364
LW
3401 break;
3402#else
3403 /* Fall though! */
a6ec74c1 3404#endif
49704364 3405 case 'L':
fc241834 3406 while (len-- > 0) {
f337b084 3407 U32 au32;
fc241834
RGS
3408 fromstr = NEXTFROM;
3409 au32 = SvUV(fromstr);
3410 DO_BO_PACK(au32, 32);
f337b084 3411 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3412 }
3413 break;
49704364 3414 case 'l' | TYPE_IS_SHRIEKING:
a6ec74c1 3415#if LONGSIZE != SIZE32
fc241834 3416 while (len-- > 0) {
f337b084 3417 long along;
fc241834
RGS
3418 fromstr = NEXTFROM;
3419 along = SvIV(fromstr);
3420 DO_BO_PACK(along, l);
f337b084 3421 PUSH_VAR(utf8, cur, along);
a6ec74c1 3422 }
49704364
LW
3423 break;
3424#else
3425 /* Fall though! */
a6ec74c1 3426#endif
49704364
LW
3427 case 'l':
3428 while (len-- > 0) {
f337b084 3429 I32 ai32;
49704364 3430 fromstr = NEXTFROM;
ef108786
MHM
3431 ai32 = SvIV(fromstr);
3432 DO_BO_PACK(ai32, 32);
f337b084 3433 PUSH32(utf8, cur, &ai32);
a6ec74c1
JH
3434 }
3435 break;
3436#ifdef HAS_QUAD
3437 case 'Q':
3438 while (len-- > 0) {
f337b084 3439 Uquad_t auquad;
a6ec74c1 3440 fromstr = NEXTFROM;
f337b084 3441 auquad = (Uquad_t) SvUV(fromstr);
1109a392 3442 DO_BO_PACK(auquad, 64);
f337b084 3443 PUSH_VAR(utf8, cur, auquad);
a6ec74c1
JH
3444 }
3445 break;
3446 case 'q':
3447 while (len-- > 0) {
f337b084 3448 Quad_t aquad;
a6ec74c1
JH
3449 fromstr = NEXTFROM;
3450 aquad = (Quad_t)SvIV(fromstr);
1109a392 3451 DO_BO_PACK(aquad, 64);
f337b084 3452 PUSH_VAR(utf8, cur, aquad);
a6ec74c1
JH
3453 }
3454 break;
f337b084 3455#endif /* HAS_QUAD */
a6ec74c1
JH
3456 case 'P':
3457 len = 1; /* assume SV is correct length */
f337b084 3458 GROWING(utf8, cat, start, cur, sizeof(char *));
49704364 3459 /* Fall through! */
a6ec74c1
JH
3460 case 'p':
3461 while (len-- > 0) {
83003860 3462 const char *aptr;
f337b084 3463
a6ec74c1 3464 fromstr = NEXTFROM;
28a4f200 3465 SvGETMAGIC(fromstr);
3466 if (!SvOK(fromstr)) aptr = NULL;
a6ec74c1 3467 else {
a6ec74c1
JH
3468 /* XXX better yet, could spirit away the string to
3469 * a safe spot and hang on to it until the result
3470 * of pack() (and all copies of the result) are
3471 * gone.
3472 */
041457d9
DM
3473 if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
3474 !SvREADONLY(fromstr))) && ckWARN(WARN_PACK)) {
9014280d 3475 Perl_warner(aTHX_ packWARN(WARN_PACK),
fc241834 3476 "Attempt to pack pointer to temporary value");
a6ec74c1
JH
3477 }
3478 if (SvPOK(fromstr) || SvNIOK(fromstr))
2596d9fe 3479 aptr = SvPV_nomg_const_nolen(fromstr);
a6ec74c1 3480 else
2596d9fe 3481 aptr = SvPV_force_flags_nolen(fromstr, 0);
a6ec74c1 3482 }
07409e01 3483 DO_BO_PACK_PC(aptr);
f337b084 3484 PUSH_VAR(utf8, cur, aptr);
a6ec74c1
JH
3485 }
3486 break;
fc241834 3487 case 'u': {
f7fe979e 3488 const char *aptr, *aend;
fc241834 3489 bool from_utf8;
f337b084 3490
a6ec74c1 3491 fromstr = NEXTFROM;
fc241834
RGS
3492 if (len <= 2) len = 45;
3493 else len = len / 3 * 3;
3494 if (len >= 64) {
0fdc08b6
RGS
3495 if (ckWARN(WARN_PACK))
3496 Perl_warner(aTHX_ packWARN(WARN_PACK),
fc241834
RGS
3497 "Field too wide in 'u' format in pack");
3498 len = 63;
3499 }
83003860 3500 aptr = SvPV_const(fromstr, fromlen);
fc241834
RGS
3501 from_utf8 = DO_UTF8(fromstr);
3502 if (from_utf8) {
3503 aend = aptr + fromlen;
3504 fromlen = sv_len_utf8(fromstr);
3505 } else aend = NULL; /* Unused, but keep compilers happy */
3506 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
a6ec74c1 3507 while (fromlen > 0) {
fc241834 3508 U8 *end;
a6ec74c1 3509 I32 todo;
fc241834 3510 U8 hunk[1+63/3*4+1];
a6ec74c1 3511
eb160463 3512 if ((I32)fromlen > len)
a6ec74c1
JH
3513 todo = len;
3514 else
3515 todo = fromlen;
fc241834
RGS
3516 if (from_utf8) {
3517 char buffer[64];
3518 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3519 'u' | TYPE_IS_PACK)) {