This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Zero-ing the new HV array is pointless, as we write to every element.
[perl5.git] / pp_pack.c
CommitLineData
a6ec74c1
JH
1/* pp_pack.c
2 *
4bb101f2 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
73cb7263 4 * 2000, 2001, 2002, 2003, 2004, 2005, by Larry Wall and others
a6ec74c1
JH
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
9 */
10
d31a8517
AT
11/*
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
16 * some salt.
17 */
18
166f8a29
DM
19/* This file contains pp ("push/pop") functions that
20 * execute the opcodes that make up a perl program. A typical pp function
21 * expects to find its arguments on the stack, and usually pushes its
22 * results onto the stack, hence the 'pp' terminology. Each OP structure
23 * contains a pointer to the relevant pp_foo() function.
24 *
25 * This particular file just contains pp_pack() and pp_unpack(). See the
26 * other pp*.c files for the rest of the pp_ functions.
27 */
28
29
a6ec74c1
JH
30#include "EXTERN.h"
31#define PERL_IN_PP_PACK_C
32#include "perl.h"
33
f7fe979e
AL
34/* Types used by pack/unpack */
35typedef enum {
36 e_no_len, /* no length */
37 e_number, /* number, [] */
38 e_star /* asterisk */
39} howlen_t;
40
41typedef struct tempsym {
42 const char* patptr; /* current template char */
43 const char* patend; /* one after last char */
44 const char* grpbeg; /* 1st char of ()-group */
45 const char* grpend; /* end of ()-group */
46 I32 code; /* template code (!<>) */
47 I32 length; /* length/repeat count */
48 howlen_t howlen; /* how length is given */
49 int level; /* () nesting level */
50 U32 flags; /* /=4, comma=2, pack=1 */
51 /* and group modifiers */
52 STRLEN strbeg; /* offset of group start */
53 struct tempsym *previous; /* previous group */
54} tempsym_t;
55
56#define TEMPSYM_INIT(symptr, p, e, f) \
57 STMT_START { \
58 (symptr)->patptr = (p); \
59 (symptr)->patend = (e); \
60 (symptr)->grpbeg = NULL; \
61 (symptr)->grpend = NULL; \
62 (symptr)->grpend = NULL; \
63 (symptr)->code = 0; \
64 (symptr)->length = 0; \
65 (symptr)->howlen = 0; \
66 (symptr)->level = 0; \
67 (symptr)->flags = (f); \
68 (symptr)->strbeg = 0; \
69 (symptr)->previous = NULL; \
70 } STMT_END
71
7212898e 72#if PERL_VERSION >= 9
f337b084
TH
73# define PERL_PACK_CAN_BYTEORDER
74# define PERL_PACK_CAN_SHRIEKSIGN
75#endif
76
77#ifndef CHAR_BIT
78# define CHAR_BIT 8
7212898e 79#endif
3473cf63
RGS
80/* Maximum number of bytes to which a byte can grow due to upgrade */
81#define UTF8_EXPAND 2
7212898e 82
a6ec74c1 83/*
a6ec74c1
JH
84 * Offset for integer pack/unpack.
85 *
86 * On architectures where I16 and I32 aren't really 16 and 32 bits,
87 * which for now are all Crays, pack and unpack have to play games.
88 */
89
90/*
91 * These values are required for portability of pack() output.
92 * If they're not right on your machine, then pack() and unpack()
93 * wouldn't work right anyway; you'll need to apply the Cray hack.
94 * (I'd like to check them with #if, but you can't use sizeof() in
95 * the preprocessor.) --???
96 */
97/*
98 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
99 defines are now in config.h. --Andy Dougherty April 1998
100 */
101#define SIZE16 2
102#define SIZE32 4
103
104/* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
105 --jhi Feb 1999 */
106
1109a392
MHM
107#if U16SIZE > SIZE16 || U32SIZE > SIZE32
108# if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
08ca2aa3
TH
109# define OFF16(p) ((char*)(p))
110# define OFF32(p) ((char*)(p))
a6ec74c1 111# else
1109a392 112# if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
a6ec74c1
JH
113# define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
114# define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
115# else
08ca2aa3 116 ++++ bad cray byte order
a6ec74c1
JH
117# endif
118# endif
a6ec74c1 119#else
08ca2aa3
TH
120# define OFF16(p) ((char *) (p))
121# define OFF32(p) ((char *) (p))
a6ec74c1
JH
122#endif
123
f337b084
TH
124/* Only to be used inside a loop (see the break) */
125#define SHIFT16(utf8, s, strend, p, datumtype) STMT_START { \
126 if (utf8) { \
127 if (!uni_to_bytes(aTHX_ &(s), strend, OFF16(p), SIZE16, datumtype)) break; \
128 } else { \
129 Copy(s, OFF16(p), SIZE16, char); \
130 (s) += SIZE16; \
131 } \
132} STMT_END
133
134/* Only to be used inside a loop (see the break) */
135#define SHIFT32(utf8, s, strend, p, datumtype) STMT_START { \
136 if (utf8) { \
137 if (!uni_to_bytes(aTHX_ &(s), strend, OFF32(p), SIZE32, datumtype)) break; \
138 } else { \
139 Copy(s, OFF32(p), SIZE32, char); \
140 (s) += SIZE32; \
141 } \
142} STMT_END
143
144#define PUSH16(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF16(p), SIZE16)
145#define PUSH32(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF32(p), SIZE32)
08ca2aa3
TH
146
147/* Only to be used inside a loop (see the break) */
f337b084 148#define SHIFT_VAR(utf8, s, strend, var, datumtype) \
08ca2aa3
TH
149STMT_START { \
150 if (utf8) { \
f337b084
TH
151 if (!uni_to_bytes(aTHX_ &s, strend, \
152 (char *) &var, sizeof(var), datumtype)) break;\
08ca2aa3
TH
153 } else { \
154 Copy(s, (char *) &var, sizeof(var), char); \
155 s += sizeof(var); \
156 } \
08ca2aa3
TH
157} STMT_END
158
f337b084 159#define PUSH_VAR(utf8, aptr, var) \
230e1fce 160 PUSH_BYTES(utf8, aptr, &(var), sizeof(var))
f337b084 161
49704364
WL
162/* Avoid stack overflow due to pathological templates. 100 should be plenty. */
163#define MAX_SUB_TEMPLATE_LEVEL 100
164
66c611c5 165/* flags (note that type modifiers can also be used as flags!) */
f337b084
TH
166#define FLAG_WAS_UTF8 0x40
167#define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
49704364 168#define FLAG_UNPACK_ONLY_ONE 0x10
f337b084 169#define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
49704364
WL
170#define FLAG_SLASH 0x04
171#define FLAG_COMMA 0x02
172#define FLAG_PACK 0x01
173
a6ec74c1
JH
174STATIC SV *
175S_mul128(pTHX_ SV *sv, U8 m)
176{
177 STRLEN len;
178 char *s = SvPV(sv, len);
179 char *t;
a6ec74c1
JH
180
181 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
182 SV *tmpNew = newSVpvn("0000000000", 10);
183
184 sv_catsv(tmpNew, sv);
185 SvREFCNT_dec(sv); /* free old sv */
186 sv = tmpNew;
187 s = SvPV(sv, len);
188 }
189 t = s + len - 1;
190 while (!*t) /* trailing '\0'? */
191 t--;
192 while (t > s) {
f7fe979e 193 const U32 i = ((*t - '0') << 7) + m;
eb160463
GS
194 *(t--) = '0' + (char)(i % 10);
195 m = (char)(i / 10);
a6ec74c1
JH
196 }
197 return (sv);
198}
199
200/* Explosives and implosives. */
201
202#if 'I' == 73 && 'J' == 74
203/* On an ASCII/ISO kind of system */
204#define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
205#else
206/*
207 Some other sort of character set - use memchr() so we don't match
208 the null byte.
209 */
210#define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
211#endif
212
66c611c5 213/* type modifiers */
62f95557 214#define TYPE_IS_SHRIEKING 0x100
1109a392
MHM
215#define TYPE_IS_BIG_ENDIAN 0x200
216#define TYPE_IS_LITTLE_ENDIAN 0x400
f337b084 217#define TYPE_IS_PACK 0x800
1109a392 218#define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
66c611c5 219#define TYPE_MODIFIERS(t) ((t) & ~0xFF)
1109a392
MHM
220#define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
221
7212898e 222#ifdef PERL_PACK_CAN_SHRIEKSIGN
28be1210 223# define SHRIEKING_ALLOWED_TYPES "sSiIlLxXnNvV@."
7212898e 224#else
28be1210 225# define SHRIEKING_ALLOWED_TYPES "sSiIlLxX"
7212898e
NC
226#endif
227
228#ifndef PERL_PACK_CAN_BYTEORDER
229/* Put "can't" first because it is shorter */
230# define TYPE_ENDIANNESS(t) 0
231# define TYPE_NO_ENDIANNESS(t) (t)
232
233# define ENDIANNESS_ALLOWED_TYPES ""
234
235# define DO_BO_UNPACK(var, type)
236# define DO_BO_PACK(var, type)
07409e01
NC
237# define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast)
238# define DO_BO_PACK_PTR(var, type, pre_cast, post_cast)
7212898e
NC
239# define DO_BO_UNPACK_N(var, type)
240# define DO_BO_PACK_N(var, type)
241# define DO_BO_UNPACK_P(var)
242# define DO_BO_PACK_P(var)
66c611c5 243
f337b084 244#else /* PERL_PACK_CAN_BYTEORDER */
7212898e
NC
245
246# define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
247# define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
248
249# define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
250
251# define DO_BO_UNPACK(var, type) \
1109a392 252 STMT_START { \
66c611c5 253 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
254 case TYPE_IS_BIG_ENDIAN: var = my_betoh ## type (var); break; \
255 case TYPE_IS_LITTLE_ENDIAN: var = my_letoh ## type (var); break; \
256 default: break; \
257 } \
258 } STMT_END
259
7212898e 260# define DO_BO_PACK(var, type) \
1109a392 261 STMT_START { \
66c611c5 262 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
263 case TYPE_IS_BIG_ENDIAN: var = my_htobe ## type (var); break; \
264 case TYPE_IS_LITTLE_ENDIAN: var = my_htole ## type (var); break; \
265 default: break; \
266 } \
267 } STMT_END
268
07409e01 269# define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast) \
1109a392 270 STMT_START { \
66c611c5 271 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392 272 case TYPE_IS_BIG_ENDIAN: \
07409e01 273 var = (post_cast*) my_betoh ## type ((pre_cast) var); \
1109a392
MHM
274 break; \
275 case TYPE_IS_LITTLE_ENDIAN: \
07409e01 276 var = (post_cast *) my_letoh ## type ((pre_cast) var); \
1109a392
MHM
277 break; \
278 default: \
279 break; \
280 } \
281 } STMT_END
282
07409e01 283# define DO_BO_PACK_PTR(var, type, pre_cast, post_cast) \
1109a392 284 STMT_START { \
66c611c5 285 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392 286 case TYPE_IS_BIG_ENDIAN: \
07409e01 287 var = (post_cast *) my_htobe ## type ((pre_cast) var); \
1109a392
MHM
288 break; \
289 case TYPE_IS_LITTLE_ENDIAN: \
07409e01 290 var = (post_cast *) my_htole ## type ((pre_cast) var); \
1109a392
MHM
291 break; \
292 default: \
293 break; \
294 } \
295 } STMT_END
296
7212898e 297# define BO_CANT_DOIT(action, type) \
66c611c5
MHM
298 STMT_START { \
299 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
300 case TYPE_IS_BIG_ENDIAN: \
301 Perl_croak(aTHX_ "Can't %s big-endian %ss on this " \
302 "platform", #action, #type); \
303 break; \
304 case TYPE_IS_LITTLE_ENDIAN: \
305 Perl_croak(aTHX_ "Can't %s little-endian %ss on this " \
306 "platform", #action, #type); \
307 break; \
308 default: \
309 break; \
310 } \
311 } STMT_END
312
7212898e 313# if PTRSIZE == INTSIZE
07409e01
NC
314# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, i, int, void)
315# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, i, int, void)
316# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, i, int, char)
317# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, i, int, char)
7212898e 318# elif PTRSIZE == LONGSIZE
07409e01
NC
319# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, long, void)
320# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, long, void)
321# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, long, char)
322# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, long, char)
7212898e
NC
323# else
324# define DO_BO_UNPACK_P(var) BO_CANT_DOIT(unpack, pointer)
325# define DO_BO_PACK_P(var) BO_CANT_DOIT(pack, pointer)
326# endif
1109a392 327
7212898e 328# if defined(my_htolen) && defined(my_letohn) && \
1109a392 329 defined(my_htoben) && defined(my_betohn)
7212898e 330# define DO_BO_UNPACK_N(var, type) \
1109a392 331 STMT_START { \
66c611c5 332 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
333 case TYPE_IS_BIG_ENDIAN: my_betohn(&var, sizeof(type)); break;\
334 case TYPE_IS_LITTLE_ENDIAN: my_letohn(&var, sizeof(type)); break;\
335 default: break; \
336 } \
337 } STMT_END
338
7212898e 339# define DO_BO_PACK_N(var, type) \
1109a392 340 STMT_START { \
66c611c5 341 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
342 case TYPE_IS_BIG_ENDIAN: my_htoben(&var, sizeof(type)); break;\
343 case TYPE_IS_LITTLE_ENDIAN: my_htolen(&var, sizeof(type)); break;\
344 default: break; \
345 } \
346 } STMT_END
7212898e
NC
347# else
348# define DO_BO_UNPACK_N(var, type) BO_CANT_DOIT(unpack, type)
349# define DO_BO_PACK_N(var, type) BO_CANT_DOIT(pack, type)
350# endif
351
f337b084 352#endif /* PERL_PACK_CAN_BYTEORDER */
62f95557 353
78d46eaa 354#define PACK_SIZE_CANNOT_CSUM 0x80
f337b084 355#define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
78d46eaa
NC
356#define PACK_SIZE_MASK 0x3F
357
78d46eaa
NC
358/* These tables are regenerated by genpacksizetables.pl (and then hand pasted
359 in). You're unlikely ever to need to regenerate them. */
a7a3cfaa
TH
360
361#if TYPE_IS_SHRIEKING != 0x100
362 ++++shriek offset should be 256
363#endif
364
365typedef U8 packprops_t;
78d46eaa
NC
366#if 'J'-'I' == 1
367/* ASCII */
a7a3cfaa
TH
368const packprops_t packprops[512] = {
369 /* normal */
370 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
371 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
372 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
373 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
374 0, 0, 0,
375 /* C */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
80a13697 376#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
a7a3cfaa 377 /* D */ LONG_DOUBLESIZE,
80a13697 378#else
a7a3cfaa 379 0,
80a13697 380#endif
a7a3cfaa
TH
381 0,
382 /* F */ NVSIZE,
383 0, 0,
384 /* I */ sizeof(unsigned int),
385 /* J */ UVSIZE,
386 0,
387 /* L */ SIZE32,
388 0,
389 /* N */ SIZE32,
390 0, 0,
80a13697 391#if defined(HAS_QUAD)
a7a3cfaa 392 /* Q */ sizeof(Uquad_t),
80a13697 393#else
a7a3cfaa 394 0,
80a13697 395#endif
a7a3cfaa
TH
396 0,
397 /* S */ SIZE16,
398 0,
399 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
400 /* V */ SIZE32,
401 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
402 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
403 /* c */ sizeof(char),
404 /* d */ sizeof(double),
405 0,
406 /* f */ sizeof(float),
407 0, 0,
408 /* i */ sizeof(int),
409 /* j */ IVSIZE,
410 0,
411 /* l */ SIZE32,
412 0,
413 /* n */ SIZE16,
414 0,
415 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
80a13697 416#if defined(HAS_QUAD)
a7a3cfaa 417 /* q */ sizeof(Quad_t),
80a13697 418#else
a7a3cfaa 419 0,
80a13697 420#endif
a7a3cfaa
TH
421 0,
422 /* s */ SIZE16,
423 0, 0,
424 /* v */ SIZE16,
425 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
426 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
427 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
428 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
429 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
430 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
431 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
432 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
433 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
434 0, 0, 0, 0, 0, 0, 0, 0,
435 /* shrieking */
436 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
437 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
438 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
439 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
440 0, 0, 0, 0, 0, 0, 0, 0, 0,
441 /* I */ sizeof(unsigned int),
442 0, 0,
443 /* L */ sizeof(unsigned long),
444 0,
7212898e 445#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 446 /* N */ SIZE32,
7212898e 447#else
a7a3cfaa 448 0,
7212898e 449#endif
a7a3cfaa
TH
450 0, 0, 0, 0,
451 /* S */ sizeof(unsigned short),
452 0, 0,
7212898e 453#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 454 /* V */ SIZE32,
7212898e 455#else
a7a3cfaa 456 0,
7212898e 457#endif
a7a3cfaa
TH
458 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
459 0, 0,
460 /* i */ sizeof(int),
461 0, 0,
462 /* l */ sizeof(long),
463 0,
7212898e 464#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 465 /* n */ SIZE16,
7212898e 466#else
a7a3cfaa 467 0,
7212898e 468#endif
a7a3cfaa
TH
469 0, 0, 0, 0,
470 /* s */ sizeof(short),
471 0, 0,
7212898e 472#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 473 /* v */ SIZE16,
7212898e 474#else
a7a3cfaa 475 0,
7212898e 476#endif
a7a3cfaa
TH
477 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
478 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
479 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
480 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
481 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
482 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
483 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
484 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
485 0, 0, 0, 0, 0, 0, 0, 0, 0
78d46eaa
NC
486};
487#else
488/* EBCDIC (or bust) */
a7a3cfaa
TH
489const packprops_t packprops[512] = {
490 /* normal */
491 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
492 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
493 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
494 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
495 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
496 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
497 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
498 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
499 0, 0, 0,
500 /* c */ sizeof(char),
501 /* d */ sizeof(double),
502 0,
503 /* f */ sizeof(float),
504 0, 0,
505 /* i */ sizeof(int),
506 0, 0, 0, 0, 0, 0, 0,
507 /* j */ IVSIZE,
508 0,
509 /* l */ SIZE32,
510 0,
511 /* n */ SIZE16,
512 0,
513 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
80a13697 514#if defined(HAS_QUAD)
a7a3cfaa 515 /* q */ sizeof(Quad_t),
80a13697 516#else
a7a3cfaa 517 0,
80a13697 518#endif
a7a3cfaa
TH
519 0, 0, 0, 0, 0, 0, 0, 0, 0,
520 /* s */ SIZE16,
521 0, 0,
522 /* v */ SIZE16,
523 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
524 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
525 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
526 /* C */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
80a13697 527#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
a7a3cfaa 528 /* D */ LONG_DOUBLESIZE,
80a13697 529#else
a7a3cfaa 530 0,
80a13697 531#endif
a7a3cfaa
TH
532 0,
533 /* F */ NVSIZE,
534 0, 0,
535 /* I */ sizeof(unsigned int),
536 0, 0, 0, 0, 0, 0, 0,
537 /* J */ UVSIZE,
538 0,
539 /* L */ SIZE32,
540 0,
541 /* N */ SIZE32,
542 0, 0,
80a13697 543#if defined(HAS_QUAD)
a7a3cfaa 544 /* Q */ sizeof(Uquad_t),
80a13697 545#else
a7a3cfaa 546 0,
80a13697 547#endif
a7a3cfaa
TH
548 0, 0, 0, 0, 0, 0, 0, 0, 0,
549 /* S */ SIZE16,
550 0,
551 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
552 /* V */ SIZE32,
553 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
554 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
555 0, 0, 0, 0, 0, 0, 0, 0, 0,
556 /* shrieking */
557 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
558 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
559 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
560 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
561 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
562 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
563 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
564 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
565 0, 0, 0, 0, 0, 0, 0, 0, 0,
566 /* i */ sizeof(int),
567 0, 0, 0, 0, 0, 0, 0, 0, 0,
568 /* l */ sizeof(long),
569 0,
7212898e 570#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 571 /* n */ SIZE16,
7212898e 572#else
a7a3cfaa 573 0,
7212898e 574#endif
a7a3cfaa
TH
575 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
576 /* s */ sizeof(short),
577 0, 0,
7212898e 578#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 579 /* v */ SIZE16,
7212898e 580#else
a7a3cfaa 581 0,
7212898e 582#endif
a7a3cfaa
TH
583 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
584 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
585 0, 0, 0,
586 /* I */ sizeof(unsigned int),
587 0, 0, 0, 0, 0, 0, 0, 0, 0,
588 /* L */ sizeof(unsigned long),
589 0,
7212898e 590#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 591 /* N */ SIZE32,
7212898e 592#else
a7a3cfaa 593 0,
7212898e 594#endif
a7a3cfaa
TH
595 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
596 /* S */ sizeof(unsigned short),
597 0, 0,
7212898e 598#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 599 /* V */ SIZE32,
7212898e 600#else
a7a3cfaa 601 0,
7212898e 602#endif
a7a3cfaa
TH
603 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
604 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
78d46eaa
NC
605};
606#endif
607
08ca2aa3 608STATIC U8
f7fe979e 609uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
08ca2aa3
TH
610{
611 UV val;
612 STRLEN retlen;
f337b084
TH
613 val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
614 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
08ca2aa3
TH
615 /* We try to process malformed UTF-8 as much as possible (preferrably with
616 warnings), but these two mean we make no progress in the string and
617 might enter an infinite loop */
618 if (retlen == (STRLEN) -1 || retlen == 0)
f337b084
TH
619 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
620 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3 621 if (val >= 0x100) {
f337b084 622 if (ckWARN(WARN_UNPACK))
08ca2aa3
TH
623 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
624 "Character in '%c' format wrapped in unpack",
f337b084 625 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3
TH
626 val &= 0xff;
627 }
628 *s += retlen;
fe2774ed 629 return (U8)val;
08ca2aa3
TH
630}
631
f337b084
TH
632#define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
633 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
08ca2aa3
TH
634 *(U8 *)(s)++)
635
636STATIC bool
f7fe979e 637uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
08ca2aa3
TH
638{
639 UV val;
640 STRLEN retlen;
f7fe979e 641 const char *from = *s;
08ca2aa3 642 int bad = 0;
f7fe979e 643 const U32 flags = ckWARN(WARN_UTF8) ?
08ca2aa3
TH
644 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
645 for (;buf_len > 0; buf_len--) {
646 if (from >= end) return FALSE;
f337b084 647 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
08ca2aa3
TH
648 if (retlen == (STRLEN) -1 || retlen == 0) {
649 from += UTF8SKIP(from);
650 bad |= 1;
651 } else from += retlen;
652 if (val >= 0x100) {
653 bad |= 2;
654 val &= 0xff;
655 }
fe2774ed 656 *(U8 *)buf++ = (U8)val;
08ca2aa3
TH
657 }
658 /* We have enough characters for the buffer. Did we have problems ? */
659 if (bad) {
660 if (bad & 1) {
661 /* Rewalk the string fragment while warning */
f7fe979e 662 const char *ptr;
9e27e96a 663 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
08ca2aa3
TH
664 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
665 if (ptr >= end) break;
f337b084 666 utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
08ca2aa3
TH
667 }
668 if (from > end) from = end;
669 }
670 if ((bad & 2) && ckWARN(WARN_UNPACK))
fc241834 671 Perl_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
f337b084
TH
672 WARN_PACK : WARN_UNPACK),
673 "Character(s) in '%c' format wrapped in %s",
fc241834 674 (int) TYPE_NO_MODIFIERS(datumtype),
f337b084 675 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
08ca2aa3
TH
676 }
677 *s = from;
678 return TRUE;
679}
680
681STATIC bool
f7fe979e 682next_uni_uu(pTHX_ const char **s, const char *end, I32 *out)
08ca2aa3
TH
683{
684 UV val;
685 STRLEN retlen;
f337b084 686 val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
08ca2aa3
TH
687 if (val >= 0x100 || !ISUUCHAR(val) ||
688 retlen == (STRLEN) -1 || retlen == 0) {
689 *out = 0;
690 return FALSE;
691 }
692 *out = PL_uudmap[val] & 077;
f337b084 693 *s += retlen;
08ca2aa3
TH
694 return TRUE;
695}
78d46eaa 696
f337b084 697STATIC void
f7fe979e 698bytes_to_uni(pTHX_ const U8 *start, STRLEN len, char **dest) {
f337b084 699 U8 buffer[UTF8_MAXLEN];
f7fe979e 700 const U8 *end = start + len;
f337b084
TH
701 char *d = *dest;
702 while (start < end) {
f7fe979e 703 const int length =
f337b084
TH
704 uvuni_to_utf8_flags(buffer, NATIVE_TO_UNI(*start), 0) - buffer;
705 switch(length) {
706 case 1:
707 *d++ = buffer[0];
708 break;
709 case 2:
710 *d++ = buffer[0];
711 *d++ = buffer[1];
712 break;
713 default:
714 Perl_croak(aTHX_ "Perl bug: value %d UTF-8 expands to %d bytes",
715 *start, length);
716 }
717 start++;
718 }
719 *dest = d;
720}
721
230e1fce
NC
722#define PUSH_BYTES(utf8, cur, buf, len) \
723STMT_START { \
724 if (utf8) bytes_to_uni(aTHX_ (U8 *) buf, len, &(cur)); \
725 else { \
726 Copy(buf, cur, len, char); \
727 (cur) += (len); \
728 } \
f337b084
TH
729} STMT_END
730
731#define GROWING(utf8, cat, start, cur, in_len) \
732STMT_START { \
733 STRLEN glen = (in_len); \
3473cf63 734 if (utf8) glen *= UTF8_EXPAND; \
f337b084
TH
735 if ((cur) + glen >= (start) + SvLEN(cat)) { \
736 (start) = sv_exp_grow(aTHX_ cat, glen); \
737 (cur) = (start) + SvCUR(cat); \
738 } \
739} STMT_END
740
741#define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
742STMT_START { \
f7fe979e 743 const STRLEN glen = (in_len); \
f337b084 744 STRLEN gl = glen; \
3473cf63 745 if (utf8) gl *= UTF8_EXPAND; \
f337b084
TH
746 if ((cur) + gl >= (start) + SvLEN(cat)) { \
747 *cur = '\0'; \
b162af07 748 SvCUR_set((cat), (cur) - (start)); \
f337b084
TH
749 (start) = sv_exp_grow(aTHX_ cat, gl); \
750 (cur) = (start) + SvCUR(cat); \
751 } \
752 PUSH_BYTES(utf8, cur, buf, glen); \
753} STMT_END
754
755#define PUSH_BYTE(utf8, s, byte) \
756STMT_START { \
757 if (utf8) { \
f7fe979e 758 const U8 au8 = (byte); \
f337b084
TH
759 bytes_to_uni(aTHX_ &au8, 1, &(s)); \
760 } else *(U8 *)(s)++ = (byte); \
761} STMT_END
762
763/* Only to be used inside a loop (see the break) */
764#define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
765STMT_START { \
766 STRLEN retlen; \
767 if (str >= end) break; \
768 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
769 if (retlen == (STRLEN) -1 || retlen == 0) { \
770 *cur = '\0'; \
771 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
772 } \
773 str += retlen; \
774} STMT_END
775
f7fe979e
AL
776static const char *_action( const tempsym_t* symptr )
777{
778 return ( symptr->flags & FLAG_PACK ) ? "pack" : "unpack";
779}
780
206947d2 781/* Returns the sizeof() struct described by pat */
028d1f6d 782STATIC I32
f337b084 783S_measure_struct(pTHX_ tempsym_t* symptr)
206947d2 784{
f337b084 785 I32 total = 0;
206947d2 786
49704364 787 while (next_symbol(symptr)) {
f337b084 788 I32 len;
f7fe979e 789 int size;
f337b084
TH
790
791 switch (symptr->howlen) {
fc241834 792 case e_star:
49704364 793 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
f7fe979e 794 _action( symptr ) );
49704364 795 break;
f337b084
TH
796 default:
797 /* e_no_len and e_number */
798 len = symptr->length;
799 break;
49704364
WL
800 }
801
a7a3cfaa 802 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
80a13697 803 if (!size) {
f7fe979e 804 int star;
80a13697
NC
805 /* endianness doesn't influence the size of a type */
806 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
807 default:
808 Perl_croak(aTHX_ "Invalid type '%c' in %s",
809 (int)TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 810 _action( symptr ) );
28be1210
TH
811#ifdef PERL_PACK_CAN_SHRIEKSIGN
812 case '.' | TYPE_IS_SHRIEKING:
813 case '@' | TYPE_IS_SHRIEKING:
814#endif
80a13697 815 case '@':
28be1210 816 case '.':
80a13697
NC
817 case '/':
818 case 'U': /* XXXX Is it correct? */
819 case 'w':
820 case 'u':
821 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
28be1210 822 (int) TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 823 _action( symptr ) );
80a13697
NC
824 case '%':
825 size = 0;
826 break;
827 case '(':
fc241834
RGS
828 {
829 tempsym_t savsym = *symptr;
830 symptr->patptr = savsym.grpbeg;
831 symptr->patend = savsym.grpend;
832 /* XXXX Theoretically, we need to measure many times at
833 different positions, since the subexpression may contain
834 alignment commands, but be not of aligned length.
835 Need to detect this and croak(). */
836 size = measure_struct(symptr);
837 *symptr = savsym;
838 break;
839 }
80a13697
NC
840 case 'X' | TYPE_IS_SHRIEKING:
841 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
842 */
843 if (!len) /* Avoid division by 0 */
844 len = 1;
845 len = total % len; /* Assumed: the start is aligned. */
846 /* FALL THROUGH */
847 case 'X':
848 size = -1;
849 if (total < len)
f7fe979e 850 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
80a13697
NC
851 break;
852 case 'x' | TYPE_IS_SHRIEKING:
853 if (!len) /* Avoid division by 0 */
854 len = 1;
855 star = total % len; /* Assumed: the start is aligned. */
856 if (star) /* Other portable ways? */
857 len = len - star;
858 else
859 len = 0;
860 /* FALL THROUGH */
861 case 'x':
862 case 'A':
863 case 'Z':
864 case 'a':
80a13697
NC
865 size = 1;
866 break;
867 case 'B':
868 case 'b':
869 len = (len + 7)/8;
870 size = 1;
871 break;
872 case 'H':
873 case 'h':
874 len = (len + 1)/2;
875 size = 1;
876 break;
78d46eaa 877
80a13697
NC
878 case 'P':
879 len = 1;
880 size = sizeof(char*);
78d46eaa
NC
881 break;
882 }
206947d2
IZ
883 }
884 total += len * size;
885 }
886 return total;
887}
888
49704364
WL
889
890/* locate matching closing parenthesis or bracket
891 * returns char pointer to char after match, or NULL
892 */
f7fe979e
AL
893STATIC const char *
894S_group_end(pTHX_ register const char *patptr, register const char *patend, char ender)
18529408 895{
49704364 896 while (patptr < patend) {
f7fe979e 897 const char c = *patptr++;
49704364
WL
898
899 if (isSPACE(c))
900 continue;
901 else if (c == ender)
902 return patptr-1;
903 else if (c == '#') {
904 while (patptr < patend && *patptr != '\n')
905 patptr++;
906 continue;
907 } else if (c == '(')
908 patptr = group_end(patptr, patend, ')') + 1;
909 else if (c == '[')
910 patptr = group_end(patptr, patend, ']') + 1;
18529408 911 }
49704364
WL
912 Perl_croak(aTHX_ "No group ending character '%c' found in template",
913 ender);
914 return 0;
18529408
IZ
915}
916
49704364
WL
917
918/* Convert unsigned decimal number to binary.
919 * Expects a pointer to the first digit and address of length variable
920 * Advances char pointer to 1st non-digit char and returns number
fc241834 921 */
f7fe979e
AL
922STATIC const char *
923S_get_num(pTHX_ register const char *patptr, I32 *lenptr )
49704364
WL
924{
925 I32 len = *patptr++ - '0';
926 while (isDIGIT(*patptr)) {
927 if (len >= 0x7FFFFFFF/10)
928 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
929 len = (len * 10) + (*patptr++ - '0');
930 }
931 *lenptr = len;
932 return patptr;
933}
934
935/* The marvellous template parsing routine: Using state stored in *symptr,
936 * locates next template code and count
937 */
938STATIC bool
f337b084 939S_next_symbol(pTHX_ tempsym_t* symptr )
18529408 940{
f7fe979e
AL
941 const char* patptr = symptr->patptr;
942 const char* patend = symptr->patend;
49704364
WL
943
944 symptr->flags &= ~FLAG_SLASH;
945
946 while (patptr < patend) {
947 if (isSPACE(*patptr))
948 patptr++;
949 else if (*patptr == '#') {
950 patptr++;
951 while (patptr < patend && *patptr != '\n')
952 patptr++;
953 if (patptr < patend)
954 patptr++;
955 } else {
fc241834 956 /* We should have found a template code */
49704364 957 I32 code = *patptr++ & 0xFF;
66c611c5 958 U32 inherited_modifiers = 0;
49704364
WL
959
960 if (code == ','){ /* grandfather in commas but with a warning */
961 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
962 symptr->flags |= FLAG_COMMA;
963 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
f7fe979e 964 "Invalid type ',' in %s", _action( symptr ) );
49704364
WL
965 }
966 continue;
967 }
fc241834 968
49704364 969 /* for '(', skip to ')' */
fc241834 970 if (code == '(') {
49704364
WL
971 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
972 Perl_croak(aTHX_ "()-group starts with a count in %s",
f7fe979e 973 _action( symptr ) );
49704364
WL
974 symptr->grpbeg = patptr;
975 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
976 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
977 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
f7fe979e 978 _action( symptr ) );
49704364
WL
979 }
980
66c611c5
MHM
981 /* look for group modifiers to inherit */
982 if (TYPE_ENDIANNESS(symptr->flags)) {
983 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
984 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
985 }
986
1109a392
MHM
987 /* look for modifiers */
988 while (patptr < patend) {
b7787f18
AL
989 const char *allowed;
990 I32 modifier;
1109a392
MHM
991 switch (*patptr) {
992 case '!':
993 modifier = TYPE_IS_SHRIEKING;
7212898e 994 allowed = SHRIEKING_ALLOWED_TYPES;
1109a392 995 break;
7212898e 996#ifdef PERL_PACK_CAN_BYTEORDER
1109a392
MHM
997 case '>':
998 modifier = TYPE_IS_BIG_ENDIAN;
66c611c5 999 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392
MHM
1000 break;
1001 case '<':
1002 modifier = TYPE_IS_LITTLE_ENDIAN;
66c611c5 1003 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392 1004 break;
f337b084 1005#endif /* PERL_PACK_CAN_BYTEORDER */
1109a392 1006 default:
b7787f18
AL
1007 allowed = "";
1008 modifier = 0;
1109a392
MHM
1009 break;
1010 }
66c611c5 1011
1109a392
MHM
1012 if (modifier == 0)
1013 break;
66c611c5 1014
1109a392
MHM
1015 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
1016 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
f7fe979e 1017 allowed, _action( symptr ) );
66c611c5
MHM
1018
1019 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
1109a392 1020 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
f7fe979e 1021 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
66c611c5
MHM
1022 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
1023 TYPE_ENDIANNESS_MASK)
1024 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
f7fe979e 1025 *patptr, _action( symptr ) );
66c611c5 1026
1109a392
MHM
1027 if (ckWARN(WARN_UNPACK)) {
1028 if (code & modifier)
1029 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
1030 "Duplicate modifier '%c' after '%c' in %s",
1031 *patptr, (int) TYPE_NO_MODIFIERS(code),
f7fe979e 1032 _action( symptr ) );
1109a392 1033 }
66c611c5 1034
1109a392
MHM
1035 code |= modifier;
1036 patptr++;
49704364
WL
1037 }
1038
66c611c5
MHM
1039 /* inherit modifiers */
1040 code |= inherited_modifiers;
1041
fc241834 1042 /* look for count and/or / */
49704364
WL
1043 if (patptr < patend) {
1044 if (isDIGIT(*patptr)) {
1045 patptr = get_num( patptr, &symptr->length );
1046 symptr->howlen = e_number;
1047
1048 } else if (*patptr == '*') {
1049 patptr++;
1050 symptr->howlen = e_star;
1051
1052 } else if (*patptr == '[') {
f7fe979e 1053 const char* lenptr = ++patptr;
49704364
WL
1054 symptr->howlen = e_number;
1055 patptr = group_end( patptr, patend, ']' ) + 1;
1056 /* what kind of [] is it? */
1057 if (isDIGIT(*lenptr)) {
1058 lenptr = get_num( lenptr, &symptr->length );
1059 if( *lenptr != ']' )
1060 Perl_croak(aTHX_ "Malformed integer in [] in %s",
f7fe979e 1061 _action( symptr ) );
49704364
WL
1062 } else {
1063 tempsym_t savsym = *symptr;
1064 symptr->patend = patptr-1;
1065 symptr->patptr = lenptr;
1066 savsym.length = measure_struct(symptr);
1067 *symptr = savsym;
1068 }
1069 } else {
1070 symptr->howlen = e_no_len;
1071 symptr->length = 1;
1072 }
1073
1074 /* try to find / */
1075 while (patptr < patend) {
1076 if (isSPACE(*patptr))
1077 patptr++;
1078 else if (*patptr == '#') {
1079 patptr++;
1080 while (patptr < patend && *patptr != '\n')
1081 patptr++;
1082 if (patptr < patend)
1083 patptr++;
1084 } else {
66c611c5 1085 if (*patptr == '/') {
49704364
WL
1086 symptr->flags |= FLAG_SLASH;
1087 patptr++;
66c611c5
MHM
1088 if (patptr < patend &&
1089 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
49704364 1090 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
f7fe979e 1091 _action( symptr ) );
49704364
WL
1092 }
1093 break;
1094 }
18529408 1095 }
49704364
WL
1096 } else {
1097 /* at end - no count, no / */
1098 symptr->howlen = e_no_len;
1099 symptr->length = 1;
1100 }
1101
1102 symptr->code = code;
fc241834 1103 symptr->patptr = patptr;
49704364 1104 return TRUE;
18529408 1105 }
49704364 1106 }
fc241834 1107 symptr->patptr = patptr;
49704364 1108 return FALSE;
18529408
IZ
1109}
1110
18529408 1111/*
fc241834 1112 There is no way to cleanly handle the case where we should process the
08ca2aa3 1113 string per byte in its upgraded form while it's really in downgraded form
fc241834
RGS
1114 (e.g. estimates like strend-s as an upper bound for the number of
1115 characters left wouldn't work). So if we foresee the need of this
1116 (pattern starts with U or contains U0), we want to work on the encoded
1117 version of the string. Users are advised to upgrade their pack string
08ca2aa3
TH
1118 themselves if they need to do a lot of unpacks like this on it
1119*/
f7fe979e 1120/* XXX These can be const */
fc241834 1121STATIC bool
08ca2aa3
TH
1122need_utf8(const char *pat, const char *patend)
1123{
1124 bool first = TRUE;
1125 while (pat < patend) {
1126 if (pat[0] == '#') {
1127 pat++;
f7fe979e 1128 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
1129 if (!pat) return FALSE;
1130 } else if (pat[0] == 'U') {
1131 if (first || pat[1] == '0') return TRUE;
1132 } else first = FALSE;
1133 pat++;
1134 }
1135 return FALSE;
1136}
1137
1138STATIC char
1139first_symbol(const char *pat, const char *patend) {
1140 while (pat < patend) {
1141 if (pat[0] != '#') return pat[0];
1142 pat++;
f7fe979e 1143 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
1144 if (!pat) return 0;
1145 pat++;
1146 }
1147 return 0;
1148}
1149
1150/*
18529408
IZ
1151=for apidoc unpack_str
1152
7accc089
JH
1153The engine implementing unpack() Perl function. Note: parameters strbeg, new_s
1154and ocnt are not used. This call should not be used, use unpackstring instead.
18529408
IZ
1155
1156=cut */
1157
1158I32
f7fe979e 1159Perl_unpack_str(pTHX_ const char *pat, const char *patend, const char *s, const char *strbeg, const char *strend, char **new_s, I32 ocnt, U32 flags)
a6ec74c1 1160{
f7fe979e 1161 tempsym_t sym;
9e27e96a
AL
1162 (void)strbeg;
1163 (void)new_s;
1164 (void)ocnt;
08ca2aa3 1165
f337b084 1166 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
08ca2aa3
TH
1167 else if (need_utf8(pat, patend)) {
1168 /* We probably should try to avoid this in case a scalar context call
1169 wouldn't get to the "U0" */
1170 STRLEN len = strend - s;
230e1fce 1171 s = (char *) bytes_to_utf8((U8 *) s, &len);
08ca2aa3
TH
1172 SAVEFREEPV(s);
1173 strend = s + len;
f337b084 1174 flags |= FLAG_DO_UTF8;
08ca2aa3
TH
1175 }
1176
f337b084
TH
1177 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
1178 flags |= FLAG_PARSE_UTF8;
08ca2aa3 1179
f7fe979e 1180 TEMPSYM_INIT(&sym, pat, patend, flags);
49704364
WL
1181
1182 return unpack_rec(&sym, s, s, strend, NULL );
1183}
1184
7accc089
JH
1185/*
1186=for apidoc unpackstring
1187
608d3aed
WL
1188The engine implementing unpack() Perl function. C<unpackstring> puts the
1189extracted list items on the stack and returns the number of elements.
1190Issue C<PUTBACK> before and C<SPAGAIN> after the call to this function.
7accc089
JH
1191
1192=cut */
1193
1194I32
f7fe979e 1195Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
7accc089 1196{
f7fe979e 1197 tempsym_t sym;
08ca2aa3 1198
f337b084 1199 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
08ca2aa3
TH
1200 else if (need_utf8(pat, patend)) {
1201 /* We probably should try to avoid this in case a scalar context call
1202 wouldn't get to the "U0" */
1203 STRLEN len = strend - s;
230e1fce 1204 s = (char *) bytes_to_utf8((U8 *) s, &len);
08ca2aa3
TH
1205 SAVEFREEPV(s);
1206 strend = s + len;
f337b084 1207 flags |= FLAG_DO_UTF8;
08ca2aa3
TH
1208 }
1209
f337b084
TH
1210 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
1211 flags |= FLAG_PARSE_UTF8;
08ca2aa3 1212
f7fe979e 1213 TEMPSYM_INIT(&sym, pat, patend, flags);
7accc089
JH
1214
1215 return unpack_rec(&sym, s, s, strend, NULL );
1216}
1217
49704364
WL
1218STATIC
1219I32
f7fe979e 1220S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
49704364 1221{
27da23d5 1222 dVAR; dSP;
18529408 1223 SV *sv;
f7fe979e 1224 const I32 start_sp_offset = SP - PL_stack_base;
49704364 1225 howlen_t howlen;
a6ec74c1 1226
a6ec74c1 1227 I32 checksum = 0;
92d41999 1228 UV cuv = 0;
a6ec74c1 1229 NV cdouble = 0.0;
f337b084 1230 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
49704364 1231 bool beyond = FALSE;
21c16052 1232 bool explicit_length;
9e27e96a 1233 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
f337b084 1234 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
28be1210 1235 symptr->strbeg = s - strbeg;
49704364 1236
49704364 1237 while (next_symbol(symptr)) {
a7a3cfaa 1238 packprops_t props;
9e27e96a 1239 I32 len;
f337b084 1240 I32 datumtype = symptr->code;
206947d2 1241 /* do first one only unless in list context
08ca2aa3 1242 / is implemented by unpacking the count, then popping it from the
206947d2 1243 stack, so must check that we're not in the middle of a / */
49704364 1244 if ( unpack_only_one
206947d2 1245 && (SP - PL_stack_base == start_sp_offset + 1)
49704364 1246 && (datumtype != '/') ) /* XXX can this be omitted */
206947d2 1247 break;
49704364 1248
f337b084 1249 switch (howlen = symptr->howlen) {
fc241834
RGS
1250 case e_star:
1251 len = strend - strbeg; /* long enough */
49704364 1252 break;
f337b084
TH
1253 default:
1254 /* e_no_len and e_number */
1255 len = symptr->length;
1256 break;
49704364 1257 }
18529408 1258
21c16052 1259 explicit_length = TRUE;
a6ec74c1 1260 redo_switch:
49704364 1261 beyond = s >= strend;
a7a3cfaa
TH
1262
1263 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
1264 if (props) {
1265 /* props nonzero means we can process this letter. */
9e27e96a
AL
1266 const long size = props & PACK_SIZE_MASK;
1267 const long howmany = (strend - s) / size;
a7a3cfaa
TH
1268 if (len > howmany)
1269 len = howmany;
1270
1271 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
1272 if (len && unpack_only_one) len = 1;
1273 EXTEND(SP, len);
1274 EXTEND_MORTAL(len);
78d46eaa
NC
1275 }
1276 }
a7a3cfaa 1277
1109a392 1278 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 1279 default:
1109a392 1280 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
49704364 1281
a6ec74c1 1282 case '%':
49704364 1283 if (howlen == e_no_len)
18529408 1284 len = 16; /* len is not specified */
a6ec74c1 1285 checksum = len;
92d41999 1286 cuv = 0;
a6ec74c1 1287 cdouble = 0;
18529408 1288 continue;
a6ec74c1 1289 break;
18529408
IZ
1290 case '(':
1291 {
49704364 1292 tempsym_t savsym = *symptr;
9e27e96a 1293 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
66c611c5 1294 symptr->flags |= group_modifiers;
49704364 1295 symptr->patend = savsym.grpend;
28be1210 1296 symptr->previous = &savsym;
49704364 1297 symptr->level++;
18529408
IZ
1298 PUTBACK;
1299 while (len--) {
49704364 1300 symptr->patptr = savsym.grpbeg;
f337b084
TH
1301 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
1302 else symptr->flags &= ~FLAG_PARSE_UTF8;
08ca2aa3
TH
1303 unpack_rec(symptr, s, strbeg, strend, &s);
1304 if (s == strend && savsym.howlen == e_star)
49704364 1305 break; /* No way to continue */
18529408
IZ
1306 }
1307 SPAGAIN;
28be1210 1308 savsym.flags = symptr->flags & ~group_modifiers;
49704364 1309 *symptr = savsym;
18529408
IZ
1310 break;
1311 }
28be1210
TH
1312#ifdef PERL_PACK_CAN_SHRIEKSIGN
1313 case '.' | TYPE_IS_SHRIEKING:
1314#endif
1315 case '.': {
9e27e96a 1316 const char *from;
28be1210
TH
1317 SV *sv;
1318#ifdef PERL_PACK_CAN_SHRIEKSIGN
9e27e96a 1319 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
28be1210 1320#else /* PERL_PACK_CAN_SHRIEKSIGN */
9e27e96a 1321 const bool u8 = utf8;
28be1210
TH
1322#endif
1323 if (howlen == e_star) from = strbeg;
1324 else if (len <= 0) from = s;
1325 else {
1326 tempsym_t *group = symptr;
1327
1328 while (--len && group) group = group->previous;
1329 from = group ? strbeg + group->strbeg : strbeg;
1330 }
1331 sv = from <= s ?
00646304
CB
1332 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
1333 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
28be1210
TH
1334 XPUSHs(sv_2mortal(sv));
1335 break;
1336 }
1337#ifdef PERL_PACK_CAN_SHRIEKSIGN
1338 case '@' | TYPE_IS_SHRIEKING:
1339#endif
a6ec74c1 1340 case '@':
28be1210
TH
1341 s = strbeg + symptr->strbeg;
1342#ifdef PERL_PACK_CAN_SHRIEKSIGN
1343 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
1344#else /* PERL_PACK_CAN_SHRIEKSIGN */
1345 if (utf8)
1346#endif
1347 {
08ca2aa3
TH
1348 while (len > 0) {
1349 if (s >= strend)
1350 Perl_croak(aTHX_ "'@' outside of string in unpack");
1351 s += UTF8SKIP(s);
1352 len--;
1353 }
1354 if (s > strend)
1355 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
1356 } else {
28be1210 1357 if (strend-s < len)
fc241834 1358 Perl_croak(aTHX_ "'@' outside of string in unpack");
28be1210 1359 s += len;
08ca2aa3 1360 }
a6ec74c1 1361 break;
62f95557
IZ
1362 case 'X' | TYPE_IS_SHRIEKING:
1363 if (!len) /* Avoid division by 0 */
1364 len = 1;
08ca2aa3 1365 if (utf8) {
f7fe979e 1366 const char *hop, *last;
f337b084
TH
1367 I32 l = len;
1368 hop = last = strbeg;
1369 while (hop < s) {
1370 hop += UTF8SKIP(hop);
1371 if (--l == 0) {
08ca2aa3 1372 last = hop;
f337b084
TH
1373 l = len;
1374 }
fc241834 1375 }
f337b084
TH
1376 if (last > s)
1377 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
08ca2aa3
TH
1378 s = last;
1379 break;
f337b084
TH
1380 }
1381 len = (s - strbeg) % len;
62f95557 1382 /* FALL THROUGH */
a6ec74c1 1383 case 'X':
08ca2aa3
TH
1384 if (utf8) {
1385 while (len > 0) {
1386 if (s <= strbeg)
1387 Perl_croak(aTHX_ "'X' outside of string in unpack");
f337b084 1388 while (--s, UTF8_IS_CONTINUATION(*s)) {
08ca2aa3
TH
1389 if (s <= strbeg)
1390 Perl_croak(aTHX_ "'X' outside of string in unpack");
1391 }
1392 len--;
1393 }
1394 } else {
fc241834
RGS
1395 if (len > s - strbeg)
1396 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1397 s -= len;
08ca2aa3 1398 }
a6ec74c1 1399 break;
9e27e96a
AL
1400 case 'x' | TYPE_IS_SHRIEKING: {
1401 I32 ai32;
62f95557
IZ
1402 if (!len) /* Avoid division by 0 */
1403 len = 1;
230e1fce
NC
1404 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1405 else ai32 = (s - strbeg) % len;
08ca2aa3
TH
1406 if (ai32 == 0) break;
1407 len -= ai32;
9e27e96a 1408 }
62f95557 1409 /* FALL THROUGH */
a6ec74c1 1410 case 'x':
08ca2aa3
TH
1411 if (utf8) {
1412 while (len>0) {
1413 if (s >= strend)
1414 Perl_croak(aTHX_ "'x' outside of string in unpack");
1415 s += UTF8SKIP(s);
1416 len--;
1417 }
1418 } else {
fc241834
RGS
1419 if (len > strend - s)
1420 Perl_croak(aTHX_ "'x' outside of string in unpack");
1421 s += len;
f337b084 1422 }
a6ec74c1
JH
1423 break;
1424 case '/':
49704364
WL
1425 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1426 break;
a6ec74c1
JH
1427 case 'A':
1428 case 'Z':
1429 case 'a':
08ca2aa3
TH
1430 if (checksum) {
1431 /* Preliminary length estimate is assumed done in 'W' */
1432 if (len > strend - s) len = strend - s;
1433 goto W_checksum;
1434 }
1435 if (utf8) {
1436 I32 l;
f7fe979e 1437 const char *hop;
08ca2aa3
TH
1438 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1439 if (hop >= strend) {
1440 if (hop > strend)
1441 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1442 break;
fc241834 1443 }
a6ec74c1 1444 }
08ca2aa3
TH
1445 if (hop > strend)
1446 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1447 len = hop - s;
1448 } else if (len > strend - s)
1449 len = strend - s;
1450
1451 if (datumtype == 'Z') {
1452 /* 'Z' strips stuff after first null */
f7fe979e 1453 const char *ptr, *end;
f337b084
TH
1454 end = s + len;
1455 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
08ca2aa3
TH
1456 sv = newSVpvn(s, ptr-s);
1457 if (howlen == e_star) /* exact for 'Z*' */
1458 len = ptr-s + (ptr != strend ? 1 : 0);
1459 } else if (datumtype == 'A') {
1460 /* 'A' strips both nulls and spaces */
f7fe979e 1461 const char *ptr;
18bdf90a
TH
1462 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1463 for (ptr = s+len-1; ptr >= s; ptr--)
1464 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
230e1fce 1465 !is_utf8_space((U8 *) ptr)) break;
18bdf90a
TH
1466 if (ptr >= s) ptr += UTF8SKIP(ptr);
1467 else ptr++;
28be1210 1468 if (ptr > s+len)
18bdf90a
TH
1469 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1470 } else {
1471 for (ptr = s+len-1; ptr >= s; ptr--)
1472 if (*ptr != 0 && !isSPACE(*ptr)) break;
1473 ptr++;
1474 }
08ca2aa3
TH
1475 sv = newSVpvn(s, ptr-s);
1476 } else sv = newSVpvn(s, len);
1477
1478 if (utf8) {
1479 SvUTF8_on(sv);
1480 /* Undo any upgrade done due to need_utf8() */
f337b084 1481 if (!(symptr->flags & FLAG_WAS_UTF8))
08ca2aa3 1482 sv_utf8_downgrade(sv, 0);
a6ec74c1
JH
1483 }
1484 XPUSHs(sv_2mortal(sv));
08ca2aa3 1485 s += len;
a6ec74c1
JH
1486 break;
1487 case 'B':
08ca2aa3
TH
1488 case 'b': {
1489 char *str;
49704364 1490 if (howlen == e_star || len > (strend - s) * 8)
a6ec74c1
JH
1491 len = (strend - s) * 8;
1492 if (checksum) {
1493 if (!PL_bitcount) {
08ca2aa3 1494 int bits;
a6ec74c1
JH
1495 Newz(601, PL_bitcount, 256, char);
1496 for (bits = 1; bits < 256; bits++) {
1497 if (bits & 1) PL_bitcount[bits]++;
1498 if (bits & 2) PL_bitcount[bits]++;
1499 if (bits & 4) PL_bitcount[bits]++;
1500 if (bits & 8) PL_bitcount[bits]++;
1501 if (bits & 16) PL_bitcount[bits]++;
1502 if (bits & 32) PL_bitcount[bits]++;
1503 if (bits & 64) PL_bitcount[bits]++;
1504 if (bits & 128) PL_bitcount[bits]++;
1505 }
1506 }
f337b084 1507 if (utf8)
08ca2aa3 1508 while (len >= 8 && s < strend) {
f337b084 1509 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
08ca2aa3
TH
1510 len -= 8;
1511 }
f337b084 1512 else
fc241834 1513 while (len >= 8) {
08ca2aa3 1514 cuv += PL_bitcount[*(U8 *)s++];
fc241834
RGS
1515 len -= 8;
1516 }
08ca2aa3
TH
1517 if (len && s < strend) {
1518 U8 bits;
f337b084
TH
1519 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1520 if (datumtype == 'b')
a6ec74c1 1521 while (len-- > 0) {
92d41999 1522 if (bits & 1) cuv++;
a6ec74c1
JH
1523 bits >>= 1;
1524 }
f337b084 1525 else
a6ec74c1 1526 while (len-- > 0) {
08ca2aa3 1527 if (bits & 0x80) cuv++;
a6ec74c1
JH
1528 bits <<= 1;
1529 }
fc241834 1530 }
a6ec74c1
JH
1531 break;
1532 }
08ca2aa3
TH
1533
1534 sv = sv_2mortal(NEWSV(35, len ? len : 1));
a6ec74c1
JH
1535 SvPOK_on(sv);
1536 str = SvPVX(sv);
1537 if (datumtype == 'b') {
f337b084 1538 U8 bits = 0;
f7fe979e 1539 const I32 ai32 = len;
08ca2aa3
TH
1540 for (len = 0; len < ai32; len++) {
1541 if (len & 7) bits >>= 1;
1542 else if (utf8) {
1543 if (s >= strend) break;
f337b084 1544 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1545 } else bits = *(U8 *) s++;
1546 *str++ = bits & 1 ? '1' : '0';
a6ec74c1 1547 }
08ca2aa3 1548 } else {
f337b084 1549 U8 bits = 0;
f7fe979e 1550 const I32 ai32 = len;
08ca2aa3
TH
1551 for (len = 0; len < ai32; len++) {
1552 if (len & 7) bits <<= 1;
1553 else if (utf8) {
1554 if (s >= strend) break;
f337b084 1555 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1556 } else bits = *(U8 *) s++;
1557 *str++ = bits & 0x80 ? '1' : '0';
a6ec74c1
JH
1558 }
1559 }
1560 *str = '\0';
08ca2aa3
TH
1561 SvCUR_set(sv, str - SvPVX(sv));
1562 XPUSHs(sv);
a6ec74c1 1563 break;
08ca2aa3 1564 }
a6ec74c1 1565 case 'H':
08ca2aa3
TH
1566 case 'h': {
1567 char *str;
fc241834 1568 /* Preliminary length estimate, acceptable for utf8 too */
49704364 1569 if (howlen == e_star || len > (strend - s) * 2)
a6ec74c1 1570 len = (strend - s) * 2;
fc241834 1571 sv = sv_2mortal(NEWSV(35, len ? len : 1));
a6ec74c1
JH
1572 SvPOK_on(sv);
1573 str = SvPVX(sv);
1574 if (datumtype == 'h') {
f337b084 1575 U8 bits = 0;
9e27e96a 1576 I32 ai32 = len;
fc241834
RGS
1577 for (len = 0; len < ai32; len++) {
1578 if (len & 1) bits >>= 4;
1579 else if (utf8) {
1580 if (s >= strend) break;
f337b084 1581 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
fc241834 1582 } else bits = * (U8 *) s++;
a6ec74c1
JH
1583 *str++ = PL_hexdigit[bits & 15];
1584 }
08ca2aa3 1585 } else {
f337b084 1586 U8 bits = 0;
f7fe979e 1587 const I32 ai32 = len;
08ca2aa3
TH
1588 for (len = 0; len < ai32; len++) {
1589 if (len & 1) bits <<= 4;
1590 else if (utf8) {
1591 if (s >= strend) break;
f337b084 1592 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3 1593 } else bits = *(U8 *) s++;
a6ec74c1
JH
1594 *str++ = PL_hexdigit[(bits >> 4) & 15];
1595 }
1596 }
1597 *str = '\0';
08ca2aa3
TH
1598 SvCUR_set(sv, str - SvPVX(sv));
1599 XPUSHs(sv);
a6ec74c1 1600 break;
08ca2aa3 1601 }
a6ec74c1 1602 case 'c':
73cb7263 1603 while (len-- > 0) {
f337b084 1604 int aint = SHIFT_BYTE(utf8, s, strend, datumtype);
73cb7263
NC
1605 if (aint >= 128) /* fake up signed chars */
1606 aint -= 256;
08ca2aa3 1607 if (!checksum)
ac7f3b1b 1608 PUSHs(sv_2mortal(newSViv((IV)aint)));
73cb7263
NC
1609 else if (checksum > bits_in_uv)
1610 cdouble += (NV)aint;
1611 else
1612 cuv += aint;
a6ec74c1
JH
1613 }
1614 break;
1615 case 'C':
08ca2aa3
TH
1616 case 'W':
1617 W_checksum:
35bcd338 1618 if (len == 0) {
fc241834 1619 if (explicit_length && datumtype == 'C')
08ca2aa3 1620 /* Switch to "character" mode */
f337b084 1621 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
35bcd338
JH
1622 break;
1623 }
fc241834 1624 if (datumtype == 'C' ?
f337b084
TH
1625 (symptr->flags & FLAG_DO_UTF8) &&
1626 !(symptr->flags & FLAG_WAS_UTF8) : utf8) {
08ca2aa3 1627 while (len-- > 0 && s < strend) {
08ca2aa3 1628 STRLEN retlen;
f7fe979e 1629 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
f337b084 1630 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
08ca2aa3
TH
1631 if (retlen == (STRLEN) -1 || retlen == 0)
1632 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1633 s += retlen;
1634 if (!checksum)
1635 PUSHs(sv_2mortal(newSVuv((UV) val)));
1636 else if (checksum > bits_in_uv)
1637 cdouble += (NV) val;
d6d3e8bd 1638 else
08ca2aa3 1639 cuv += val;
fc241834 1640 }
08ca2aa3 1641 } else if (!checksum)
a6ec74c1 1642 while (len-- > 0) {
f7fe979e 1643 const U8 ch = *(U8 *) s++;
08ca2aa3 1644 PUSHs(sv_2mortal(newSVuv((UV) ch)));
a6ec74c1 1645 }
08ca2aa3
TH
1646 else if (checksum > bits_in_uv)
1647 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1648 else
1649 while (len-- > 0) cuv += *(U8 *) s++;
a6ec74c1
JH
1650 break;
1651 case 'U':
35bcd338 1652 if (len == 0) {
08ca2aa3
TH
1653 if (explicit_length) {
1654 /* Switch to "bytes in UTF-8" mode */
f337b084 1655 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
08ca2aa3
TH
1656 else
1657 /* Should be impossible due to the need_utf8() test */
1658 Perl_croak(aTHX_ "U0 mode on a byte string");
1659 }
35bcd338
JH
1660 break;
1661 }
08ca2aa3 1662 if (len > strend - s) len = strend - s;
fc241834 1663 if (!checksum) {
08ca2aa3
TH
1664 if (len && unpack_only_one) len = 1;
1665 EXTEND(SP, len);
1666 EXTEND_MORTAL(len);
fc241834 1667 }
08ca2aa3
TH
1668 while (len-- > 0 && s < strend) {
1669 STRLEN retlen;
1670 UV auv;
1671 if (utf8) {
1672 U8 result[UTF8_MAXLEN];
f7fe979e 1673 const char *ptr = s;
08ca2aa3 1674 STRLEN len;
08ca2aa3
TH
1675 /* Bug: warns about bad utf8 even if we are short on bytes
1676 and will break out of the loop */
230e1fce
NC
1677 if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1678 'U'))
08ca2aa3
TH
1679 break;
1680 len = UTF8SKIP(result);
fc241834 1681 if (!uni_to_bytes(aTHX_ &ptr, strend,
230e1fce 1682 (char *) &result[1], len-1, 'U')) break;
08ca2aa3
TH
1683 auv = utf8n_to_uvuni(result, len, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1684 s = ptr;
1685 } else {
1686 auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1687 if (retlen == (STRLEN) -1 || retlen == 0)
1688 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1689 s += retlen;
1690 }
1691 if (!checksum)
1692 PUSHs(sv_2mortal(newSVuv((UV) auv)));
73cb7263 1693 else if (checksum > bits_in_uv)
08ca2aa3 1694 cdouble += (NV) auv;
73cb7263 1695 else
08ca2aa3 1696 cuv += auv;
a6ec74c1
JH
1697 }
1698 break;
49704364
WL
1699 case 's' | TYPE_IS_SHRIEKING:
1700#if SHORTSIZE != SIZE16
73cb7263 1701 while (len-- > 0) {
08ca2aa3 1702 short ashort;
f337b084
TH
1703 SHIFT_VAR(utf8, s, strend, ashort, datumtype);
1704 DO_BO_UNPACK(ashort, s);
08ca2aa3 1705 if (!checksum)
ac7f3b1b 1706 PUSHs(sv_2mortal(newSViv((IV)ashort)));
73cb7263
NC
1707 else if (checksum > bits_in_uv)
1708 cdouble += (NV)ashort;
1709 else
1710 cuv += ashort;
49704364
WL
1711 }
1712 break;
1713#else
1714 /* Fallthrough! */
a6ec74c1 1715#endif
49704364 1716 case 's':
73cb7263 1717 while (len-- > 0) {
08ca2aa3
TH
1718 I16 ai16;
1719
1720#if U16SIZE > SIZE16
1721 ai16 = 0;
1722#endif
f337b084 1723 SHIFT16(utf8, s, strend, &ai16, datumtype);
73cb7263 1724 DO_BO_UNPACK(ai16, 16);
1109a392 1725#if U16SIZE > SIZE16
73cb7263
NC
1726 if (ai16 > 32767)
1727 ai16 -= 65536;
a6ec74c1 1728#endif
08ca2aa3 1729 if (!checksum)
ac7f3b1b 1730 PUSHs(sv_2mortal(newSViv((IV)ai16)));
73cb7263
NC
1731 else if (checksum > bits_in_uv)
1732 cdouble += (NV)ai16;
1733 else
1734 cuv += ai16;
a6ec74c1
JH
1735 }
1736 break;
49704364
WL
1737 case 'S' | TYPE_IS_SHRIEKING:
1738#if SHORTSIZE != SIZE16
73cb7263 1739 while (len-- > 0) {
08ca2aa3 1740 unsigned short aushort;
f337b084
TH
1741 SHIFT_VAR(utf8, s, strend, aushort, datumtype);
1742 DO_BO_UNPACK(aushort, s);
08ca2aa3
TH
1743 if (!checksum)
1744 PUSHs(sv_2mortal(newSVuv((UV) aushort)));
73cb7263
NC
1745 else if (checksum > bits_in_uv)
1746 cdouble += (NV)aushort;
1747 else
1748 cuv += aushort;
49704364
WL
1749 }
1750 break;
1751#else
1752 /* Fallhrough! */
1753#endif
a6ec74c1
JH
1754 case 'v':
1755 case 'n':
1756 case 'S':
73cb7263 1757 while (len-- > 0) {
08ca2aa3
TH
1758 U16 au16;
1759#if U16SIZE > SIZE16
1760 au16 = 0;
1761#endif
f337b084 1762 SHIFT16(utf8, s, strend, &au16, datumtype);
08ca2aa3 1763 DO_BO_UNPACK(au16, 16);
a6ec74c1 1764#ifdef HAS_NTOHS
73cb7263
NC
1765 if (datumtype == 'n')
1766 au16 = PerlSock_ntohs(au16);
a6ec74c1
JH
1767#endif
1768#ifdef HAS_VTOHS
73cb7263
NC
1769 if (datumtype == 'v')
1770 au16 = vtohs(au16);
a6ec74c1 1771#endif
08ca2aa3
TH
1772 if (!checksum)
1773 PUSHs(sv_2mortal(newSVuv((UV)au16)));
73cb7263 1774 else if (checksum > bits_in_uv)
f337b084 1775 cdouble += (NV) au16;
73cb7263
NC
1776 else
1777 cuv += au16;
a6ec74c1
JH
1778 }
1779 break;
7212898e 1780#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7
MHM
1781 case 'v' | TYPE_IS_SHRIEKING:
1782 case 'n' | TYPE_IS_SHRIEKING:
73cb7263 1783 while (len-- > 0) {
08ca2aa3
TH
1784 I16 ai16;
1785# if U16SIZE > SIZE16
1786 ai16 = 0;
1787# endif
f337b084 1788 SHIFT16(utf8, s, strend, &ai16, datumtype);
08ca2aa3 1789# ifdef HAS_NTOHS
73cb7263 1790 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
08ca2aa3
TH
1791 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1792# endif /* HAS_NTOHS */
1793# ifdef HAS_VTOHS
73cb7263 1794 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
08ca2aa3
TH
1795 ai16 = (I16) vtohs((U16) ai16);
1796# endif /* HAS_VTOHS */
1797 if (!checksum)
ac7f3b1b 1798 PUSHs(sv_2mortal(newSViv((IV)ai16)));
73cb7263 1799 else if (checksum > bits_in_uv)
08ca2aa3 1800 cdouble += (NV) ai16;
73cb7263
NC
1801 else
1802 cuv += ai16;
068bd2e7
MHM
1803 }
1804 break;
08ca2aa3 1805#endif /* PERL_PACK_CAN_SHRIEKSIGN */
a6ec74c1 1806 case 'i':
49704364 1807 case 'i' | TYPE_IS_SHRIEKING:
73cb7263 1808 while (len-- > 0) {
08ca2aa3 1809 int aint;
f337b084
TH
1810 SHIFT_VAR(utf8, s, strend, aint, datumtype);
1811 DO_BO_UNPACK(aint, i);
08ca2aa3 1812 if (!checksum)
ac7f3b1b 1813 PUSHs(sv_2mortal(newSViv((IV)aint)));
73cb7263
NC
1814 else if (checksum > bits_in_uv)
1815 cdouble += (NV)aint;
1816 else
1817 cuv += aint;
a6ec74c1
JH
1818 }
1819 break;
1820 case 'I':
49704364 1821 case 'I' | TYPE_IS_SHRIEKING:
73cb7263 1822 while (len-- > 0) {
08ca2aa3 1823 unsigned int auint;
f337b084
TH
1824 SHIFT_VAR(utf8, s, strend, auint, datumtype);
1825 DO_BO_UNPACK(auint, i);
08ca2aa3 1826 if (!checksum)
ac7f3b1b 1827 PUSHs(sv_2mortal(newSVuv((UV)auint)));
73cb7263
NC
1828 else if (checksum > bits_in_uv)
1829 cdouble += (NV)auint;
1830 else
1831 cuv += auint;
a6ec74c1
JH
1832 }
1833 break;
92d41999 1834 case 'j':
73cb7263 1835 while (len-- > 0) {
08ca2aa3 1836 IV aiv;
f337b084 1837 SHIFT_VAR(utf8, s, strend, aiv, datumtype);
1109a392 1838#if IVSIZE == INTSIZE
f337b084 1839 DO_BO_UNPACK(aiv, i);
1109a392 1840#elif IVSIZE == LONGSIZE
f337b084 1841 DO_BO_UNPACK(aiv, l);
1109a392 1842#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
f337b084 1843 DO_BO_UNPACK(aiv, 64);
08ca2aa3
TH
1844#else
1845 Perl_croak(aTHX_ "'j' not supported on this platform");
1109a392 1846#endif
08ca2aa3 1847 if (!checksum)
ac7f3b1b 1848 PUSHs(sv_2mortal(newSViv(aiv)));
73cb7263
NC
1849 else if (checksum > bits_in_uv)
1850 cdouble += (NV)aiv;
1851 else
1852 cuv += aiv;
92d41999
JH
1853 }
1854 break;
1855 case 'J':
73cb7263 1856 while (len-- > 0) {
08ca2aa3 1857 UV auv;
f337b084 1858 SHIFT_VAR(utf8, s, strend, auv, datumtype);
08ca2aa3 1859#if IVSIZE == INTSIZE
f337b084 1860 DO_BO_UNPACK(auv, i);
08ca2aa3 1861#elif IVSIZE == LONGSIZE
f337b084 1862 DO_BO_UNPACK(auv, l);
08ca2aa3 1863#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
f337b084 1864 DO_BO_UNPACK(auv, 64);
08ca2aa3
TH
1865#else
1866 Perl_croak(aTHX_ "'J' not supported on this platform");
1109a392 1867#endif
08ca2aa3 1868 if (!checksum)
ac7f3b1b 1869 PUSHs(sv_2mortal(newSVuv(auv)));
73cb7263
NC
1870 else if (checksum > bits_in_uv)
1871 cdouble += (NV)auv;
1872 else
1873 cuv += auv;
92d41999
JH
1874 }
1875 break;
49704364
WL
1876 case 'l' | TYPE_IS_SHRIEKING:
1877#if LONGSIZE != SIZE32
73cb7263 1878 while (len-- > 0) {
08ca2aa3 1879 long along;
f337b084
TH
1880 SHIFT_VAR(utf8, s, strend, along, datumtype);
1881 DO_BO_UNPACK(along, l);
08ca2aa3 1882 if (!checksum)
ac7f3b1b 1883 PUSHs(sv_2mortal(newSViv((IV)along)));
73cb7263
NC
1884 else if (checksum > bits_in_uv)
1885 cdouble += (NV)along;
1886 else
1887 cuv += along;
49704364
WL
1888 }
1889 break;
1890#else
1891 /* Fallthrough! */
a6ec74c1 1892#endif
49704364 1893 case 'l':
73cb7263 1894 while (len-- > 0) {
08ca2aa3
TH
1895 I32 ai32;
1896#if U32SIZE > SIZE32
1897 ai32 = 0;
1898#endif
f337b084 1899 SHIFT32(utf8, s, strend, &ai32, datumtype);
73cb7263 1900 DO_BO_UNPACK(ai32, 32);
25a9bd2a 1901#if U32SIZE > SIZE32
08ca2aa3 1902 if (ai32 > 2147483647) ai32 -= 4294967296;
a6ec74c1 1903#endif
08ca2aa3 1904 if (!checksum)
ac7f3b1b 1905 PUSHs(sv_2mortal(newSViv((IV)ai32)));
73cb7263
NC
1906 else if (checksum > bits_in_uv)
1907 cdouble += (NV)ai32;
1908 else
1909 cuv += ai32;
a6ec74c1
JH
1910 }
1911 break;
49704364
WL
1912 case 'L' | TYPE_IS_SHRIEKING:
1913#if LONGSIZE != SIZE32
73cb7263 1914 while (len-- > 0) {
08ca2aa3 1915 unsigned long aulong;
f337b084
TH
1916 SHIFT_VAR(utf8, s, strend, aulong, datumtype);
1917 DO_BO_UNPACK(aulong, l);
08ca2aa3 1918 if (!checksum)
ac7f3b1b 1919 PUSHs(sv_2mortal(newSVuv((UV)aulong)));
73cb7263
NC
1920 else if (checksum > bits_in_uv)
1921 cdouble += (NV)aulong;
1922 else
1923 cuv += aulong;
49704364
WL
1924 }
1925 break;
1926#else
1927 /* Fall through! */
1928#endif
a6ec74c1
JH
1929 case 'V':
1930 case 'N':
1931 case 'L':
73cb7263 1932 while (len-- > 0) {
08ca2aa3
TH
1933 U32 au32;
1934#if U32SIZE > SIZE32
1935 au32 = 0;
1936#endif
f337b084 1937 SHIFT32(utf8, s, strend, &au32, datumtype);
08ca2aa3 1938 DO_BO_UNPACK(au32, 32);
a6ec74c1 1939#ifdef HAS_NTOHL
73cb7263
NC
1940 if (datumtype == 'N')
1941 au32 = PerlSock_ntohl(au32);
a6ec74c1
JH
1942#endif
1943#ifdef HAS_VTOHL
73cb7263
NC
1944 if (datumtype == 'V')
1945 au32 = vtohl(au32);
a6ec74c1 1946#endif
08ca2aa3 1947 if (!checksum)
fc241834
RGS
1948 PUSHs(sv_2mortal(newSVuv((UV)au32)));
1949 else if (checksum > bits_in_uv)
1950 cdouble += (NV)au32;
1951 else
1952 cuv += au32;
a6ec74c1
JH
1953 }
1954 break;
7212898e 1955#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7
MHM
1956 case 'V' | TYPE_IS_SHRIEKING:
1957 case 'N' | TYPE_IS_SHRIEKING:
73cb7263 1958 while (len-- > 0) {
08ca2aa3
TH
1959 I32 ai32;
1960# if U32SIZE > SIZE32
1961 ai32 = 0;
1962# endif
f337b084 1963 SHIFT32(utf8, s, strend, &ai32, datumtype);
08ca2aa3 1964# ifdef HAS_NTOHL
73cb7263
NC
1965 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1966 ai32 = (I32)PerlSock_ntohl((U32)ai32);
08ca2aa3
TH
1967# endif
1968# ifdef HAS_VTOHL
73cb7263
NC
1969 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1970 ai32 = (I32)vtohl((U32)ai32);
08ca2aa3
TH
1971# endif
1972 if (!checksum)
ac7f3b1b 1973 PUSHs(sv_2mortal(newSViv((IV)ai32)));
73cb7263
NC
1974 else if (checksum > bits_in_uv)
1975 cdouble += (NV)ai32;
1976 else
1977 cuv += ai32;
068bd2e7
MHM
1978 }
1979 break;
08ca2aa3 1980#endif /* PERL_PACK_CAN_SHRIEKSIGN */
a6ec74c1 1981 case 'p':
a6ec74c1 1982 while (len-- > 0) {
f7fe979e 1983 const char *aptr;
f337b084 1984 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
07409e01 1985 DO_BO_UNPACK_PC(aptr);
c4c5f44a
NC
1986 /* newSVpv generates undef if aptr is NULL */
1987 PUSHs(sv_2mortal(newSVpv(aptr, 0)));
a6ec74c1
JH
1988 }
1989 break;
1990 case 'w':
a6ec74c1
JH
1991 {
1992 UV auv = 0;
1993 U32 bytes = 0;
fc241834 1994
08ca2aa3
TH
1995 while (len > 0 && s < strend) {
1996 U8 ch;
f337b084 1997 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3 1998 auv = (auv << 7) | (ch & 0x7f);
a6ec74c1 1999 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
08ca2aa3 2000 if (ch < 0x80) {
a6ec74c1 2001 bytes = 0;
ac7f3b1b 2002 PUSHs(sv_2mortal(newSVuv(auv)));
a6ec74c1
JH
2003 len--;
2004 auv = 0;
08ca2aa3 2005 continue;
a6ec74c1 2006 }
08ca2aa3 2007 if (++bytes >= sizeof(UV)) { /* promote to string */
a6ec74c1
JH
2008 char *t;
2009 STRLEN n_a;
2010
2011 sv = Perl_newSVpvf(aTHX_ "%.*"UVf, (int)TYPE_DIGITS(UV), auv);
2012 while (s < strend) {
f337b084 2013 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3
TH
2014 sv = mul128(sv, (U8)(ch & 0x7f));
2015 if (!(ch & 0x80)) {
a6ec74c1
JH
2016 bytes = 0;
2017 break;
2018 }
2019 }
2020 t = SvPV(sv, n_a);
2021 while (*t == '0')
2022 t++;
2023 sv_chop(sv, t);
2024 PUSHs(sv_2mortal(sv));
2025 len--;
2026 auv = 0;
2027 }
2028 }
2029 if ((s >= strend) && bytes)
49704364 2030 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
a6ec74c1
JH
2031 }
2032 break;
2033 case 'P':
49704364
WL
2034 if (symptr->howlen == e_star)
2035 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
a6ec74c1 2036 EXTEND(SP, 1);
08ca2aa3
TH
2037 if (sizeof(char*) <= strend - s) {
2038 char *aptr;
f337b084 2039 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
07409e01 2040 DO_BO_UNPACK_PC(aptr);
fc241834
RGS
2041 /* newSVpvn generates undef if aptr is NULL */
2042 PUSHs(sv_2mortal(newSVpvn(aptr, len)));
08ca2aa3 2043 }
a6ec74c1
JH
2044 break;
2045#ifdef HAS_QUAD
2046 case 'q':
73cb7263 2047 while (len-- > 0) {
08ca2aa3 2048 Quad_t aquad;
f337b084
TH
2049 SHIFT_VAR(utf8, s, strend, aquad, datumtype);
2050 DO_BO_UNPACK(aquad, 64);
08ca2aa3
TH
2051 if (!checksum)
2052 PUSHs(sv_2mortal(aquad >= IV_MIN && aquad <= IV_MAX ?
ac7f3b1b 2053 newSViv((IV)aquad) : newSVnv((NV)aquad)));
73cb7263
NC
2054 else if (checksum > bits_in_uv)
2055 cdouble += (NV)aquad;
2056 else
2057 cuv += aquad;
2058 }
a6ec74c1
JH
2059 break;
2060 case 'Q':
73cb7263 2061 while (len-- > 0) {
08ca2aa3 2062 Uquad_t auquad;
f337b084
TH
2063 SHIFT_VAR(utf8, s, strend, auquad, datumtype);
2064 DO_BO_UNPACK(auquad, 64);
08ca2aa3
TH
2065 if (!checksum)
2066 PUSHs(sv_2mortal(auquad <= UV_MAX ?
2067 newSVuv((UV)auquad):newSVnv((NV)auquad)));
73cb7263
NC
2068 else if (checksum > bits_in_uv)
2069 cdouble += (NV)auquad;
2070 else
2071 cuv += auquad;
a6ec74c1
JH
2072 }
2073 break;
08ca2aa3 2074#endif /* HAS_QUAD */
a6ec74c1
JH
2075 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
2076 case 'f':
73cb7263 2077 while (len-- > 0) {
08ca2aa3 2078 float afloat;
f337b084 2079 SHIFT_VAR(utf8, s, strend, afloat, datumtype);
08ca2aa3
TH
2080 DO_BO_UNPACK_N(afloat, float);
2081 if (!checksum)
2082 PUSHs(sv_2mortal(newSVnv((NV)afloat)));
2083 else
73cb7263 2084 cdouble += afloat;
fc241834 2085 }
a6ec74c1
JH
2086 break;
2087 case 'd':
73cb7263 2088 while (len-- > 0) {
08ca2aa3 2089 double adouble;
f337b084 2090 SHIFT_VAR(utf8, s, strend, adouble, datumtype);
08ca2aa3
TH
2091 DO_BO_UNPACK_N(adouble, double);
2092 if (!checksum)
2093 PUSHs(sv_2mortal(newSVnv((NV)adouble)));
2094 else
73cb7263 2095 cdouble += adouble;
fc241834 2096 }
a6ec74c1 2097 break;
92d41999 2098 case 'F':
73cb7263 2099 while (len-- > 0) {
08ca2aa3 2100 NV anv;
f337b084 2101 SHIFT_VAR(utf8, s, strend, anv, datumtype);
08ca2aa3
TH
2102 DO_BO_UNPACK_N(anv, NV);
2103 if (!checksum)
2104 PUSHs(sv_2mortal(newSVnv(anv)));
2105 else
73cb7263 2106 cdouble += anv;
fc241834 2107 }
92d41999
JH
2108 break;
2109#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2110 case 'D':
73cb7263 2111 while (len-- > 0) {
08ca2aa3 2112 long double aldouble;
f337b084 2113 SHIFT_VAR(utf8, s, strend, aldouble, datumtype);
08ca2aa3
TH
2114 DO_BO_UNPACK_N(aldouble, long double);
2115 if (!checksum)
2116 PUSHs(sv_2mortal(newSVnv((NV)aldouble)));
2117 else
2118 cdouble += aldouble;
92d41999
JH
2119 }
2120 break;
2121#endif
a6ec74c1
JH
2122 case 'u':
2123 /* MKS:
2124 * Initialise the decode mapping. By using a table driven
2125 * algorithm, the code will be character-set independent
2126 * (and just as fast as doing character arithmetic)
2127 */
2128 if (PL_uudmap['M'] == 0) {
2129 int i;
2130
2131 for (i = 0; i < sizeof(PL_uuemap); i += 1)
2132 PL_uudmap[(U8)PL_uuemap[i]] = i;
2133 /*
2134 * Because ' ' and '`' map to the same value,
2135 * we need to decode them both the same.
2136 */
2137 PL_uudmap[' '] = 0;
2138 }
08ca2aa3 2139 {
f7fe979e 2140 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
08ca2aa3
TH
2141 sv = sv_2mortal(NEWSV(42, l));
2142 if (l) SvPOK_on(sv);
2143 }
2144 if (utf8) {
2145 while (next_uni_uu(aTHX_ &s, strend, &len)) {
2146 I32 a, b, c, d;
2147 char hunk[4];
2148
2149 hunk[3] = '\0';
2150 while (len > 0) {
2151 next_uni_uu(aTHX_ &s, strend, &a);
2152 next_uni_uu(aTHX_ &s, strend, &b);
2153 next_uni_uu(aTHX_ &s, strend, &c);
2154 next_uni_uu(aTHX_ &s, strend, &d);
2155 hunk[0] = (char)((a << 2) | (b >> 4));
2156 hunk[1] = (char)((b << 4) | (c >> 2));
2157 hunk[2] = (char)((c << 6) | d);
2158 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2159 len -= 3;
2160 }
2161 if (s < strend) {
f7fe979e
AL
2162 if (*s == '\n') {
2163 s++;
2164 }
08ca2aa3
TH
2165 else {
2166 /* possible checksum byte */
f7fe979e
AL
2167 const char *skip = s+UTF8SKIP(s);
2168 if (skip < strend && *skip == '\n')
2169 s = skip+1;
08ca2aa3
TH
2170 }
2171 }
2172 }
2173 } else {
fc241834
RGS
2174 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
2175 I32 a, b, c, d;
2176 char hunk[4];
a6ec74c1 2177
fc241834
RGS
2178 hunk[3] = '\0';
2179 len = PL_uudmap[*(U8*)s++] & 077;
2180 while (len > 0) {
2181 if (s < strend && ISUUCHAR(*s))
2182 a = PL_uudmap[*(U8*)s++] & 077;
2183 else
2184 a = 0;
2185 if (s < strend && ISUUCHAR(*s))
2186 b = PL_uudmap[*(U8*)s++] & 077;
2187 else
2188 b = 0;
2189 if (s < strend && ISUUCHAR(*s))
2190 c = PL_uudmap[*(U8*)s++] & 077;
2191 else
2192 c = 0;
2193 if (s < strend && ISUUCHAR(*s))
2194 d = PL_uudmap[*(U8*)s++] & 077;
2195 else
2196 d = 0;
2197 hunk[0] = (char)((a << 2) | (b >> 4));
2198 hunk[1] = (char)((b << 4) | (c >> 2));
2199 hunk[2] = (char)((c << 6) | d);
2200 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2201 len -= 3;
2202 }
2203 if (*s == '\n')
2204 s++;
2205 else /* possible checksum byte */
2206 if (s + 1 < strend && s[1] == '\n')
2207 s += 2;
a6ec74c1 2208 }
08ca2aa3
TH
2209 }
2210 XPUSHs(sv);
a6ec74c1
JH
2211 break;
2212 }
49704364 2213
a6ec74c1 2214 if (checksum) {
1109a392 2215 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
92d41999 2216 (checksum > bits_in_uv &&
08ca2aa3
TH
2217 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
2218 NV trouble, anv;
a6ec74c1 2219
08ca2aa3 2220 anv = (NV) (1 << (checksum & 15));
a6ec74c1
JH
2221 while (checksum >= 16) {
2222 checksum -= 16;
08ca2aa3 2223 anv *= 65536.0;
a6ec74c1 2224 }
a6ec74c1 2225 while (cdouble < 0.0)
08ca2aa3
TH
2226 cdouble += anv;
2227 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
c4c5f44a 2228 sv = newSVnv(cdouble);
a6ec74c1
JH
2229 }
2230 else {
fa8ec7c1
NC
2231 if (checksum < bits_in_uv) {
2232 UV mask = ((UV)1 << checksum) - 1;
92d41999 2233 cuv &= mask;
a6ec74c1 2234 }
c4c5f44a 2235 sv = newSVuv(cuv);
a6ec74c1
JH
2236 }
2237 XPUSHs(sv_2mortal(sv));
2238 checksum = 0;
2239 }
fc241834 2240
49704364
WL
2241 if (symptr->flags & FLAG_SLASH){
2242 if (SP - PL_stack_base - start_sp_offset <= 0)
2243 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
2244 if( next_symbol(symptr) ){
2245 if( symptr->howlen == e_number )
2246 Perl_croak(aTHX_ "Count after length/code in unpack" );
2247 if( beyond ){
2248 /* ...end of char buffer then no decent length available */
2249 Perl_croak(aTHX_ "length/code after end of string in unpack" );
2250 } else {
2251 /* take top of stack (hope it's numeric) */
2252 len = POPi;
2253 if( len < 0 )
2254 Perl_croak(aTHX_ "Negative '/' count in unpack" );
2255 }
2256 } else {
2257 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
2258 }
2259 datumtype = symptr->code;
21c16052 2260 explicit_length = FALSE;
49704364
WL
2261 goto redo_switch;
2262 }
a6ec74c1 2263 }
49704364 2264
18529408
IZ
2265 if (new_s)
2266 *new_s = s;
2267 PUTBACK;
2268 return SP - PL_stack_base - start_sp_offset;
2269}
2270
2271PP(pp_unpack)
2272{
2273 dSP;
bab9c0ac 2274 dPOPPOPssrl;
18529408
IZ
2275 I32 gimme = GIMME_V;
2276 STRLEN llen;
2277 STRLEN rlen;
f7fe979e
AL
2278 const char *pat = SvPV(left, llen);
2279 const char *s = SvPV(right, rlen);
2280 const char *strend = s + rlen;
2281 const char *patend = pat + llen;
08ca2aa3 2282 I32 cnt;
18529408
IZ
2283
2284 PUTBACK;
7accc089 2285 cnt = unpackstring(pat, patend, s, strend,
49704364 2286 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
f337b084 2287 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
49704364 2288
18529408
IZ
2289 SPAGAIN;
2290 if ( !cnt && gimme == G_SCALAR )
2291 PUSHs(&PL_sv_undef);
a6ec74c1
JH
2292 RETURN;
2293}
2294
f337b084 2295STATIC U8 *
f7fe979e 2296doencodes(U8 *h, const char *s, I32 len)
a6ec74c1 2297{
f337b084 2298 *h++ = PL_uuemap[len];
a6ec74c1 2299 while (len > 2) {
f337b084
TH
2300 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2301 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
2302 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
2303 *h++ = PL_uuemap[(077 & (s[2] & 077))];
a6ec74c1
JH
2304 s += 3;
2305 len -= 3;
2306 }
2307 if (len > 0) {
f7fe979e 2308 const char r = (len > 1 ? s[1] : '\0');
f337b084
TH
2309 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2310 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
2311 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
2312 *h++ = PL_uuemap[0];
a6ec74c1 2313 }
f337b084
TH
2314 *h++ = '\n';
2315 return h;
a6ec74c1
JH
2316}
2317
2318STATIC SV *
f7fe979e 2319S_is_an_int(pTHX_ const char *s, STRLEN l)
a6ec74c1
JH
2320{
2321 STRLEN n_a;
2322 SV *result = newSVpvn(s, l);
2323 char *result_c = SvPV(result, n_a); /* convenience */
2324 char *out = result_c;
2325 bool skip = 1;
2326 bool ignore = 0;
2327
2328 while (*s) {
2329 switch (*s) {
2330 case ' ':
2331 break;
2332 case '+':
2333 if (!skip) {
2334 SvREFCNT_dec(result);
2335 return (NULL);
2336 }
2337 break;
2338 case '0':
2339 case '1':
2340 case '2':
2341 case '3':
2342 case '4':
2343 case '5':
2344 case '6':
2345 case '7':
2346 case '8':
2347 case '9':
2348 skip = 0;
2349 if (!ignore) {
2350 *(out++) = *s;
2351 }
2352 break;
2353 case '.':
2354 ignore = 1;
2355 break;
2356 default:
2357 SvREFCNT_dec(result);
2358 return (NULL);
2359 }
2360 s++;
2361 }
2362 *(out++) = '\0';
2363 SvCUR_set(result, out - result_c);
2364 return (result);
2365}
2366
2367/* pnum must be '\0' terminated */
2368STATIC int
2369S_div128(pTHX_ SV *pnum, bool *done)
2370{
2371 STRLEN len;
2372 char *s = SvPV(pnum, len);
2373 int m = 0;
2374 int r = 0;
2375 char *t = s;
2376
2377 *done = 1;
2378 while (*t) {
f7fe979e 2379 const int i = m * 10 + (*t - '0');
a6ec74c1
JH
2380 m = i & 0x7F;
2381 r = (i >> 7); /* r < 10 */
2382 if (r) {
2383 *done = 0;
2384 }
2385 *(t++) = '0' + r;
2386 }
2387 *(t++) = '\0';
2388 SvCUR_set(pnum, (STRLEN) (t - s));
2389 return (m);
2390}
2391
18529408
IZ
2392/*
2393=for apidoc pack_cat
2394
7accc089
JH
2395The engine implementing pack() Perl function. Note: parameters next_in_list and
2396flags are not used. This call should not be used; use packlist instead.
18529408
IZ
2397
2398=cut */
2399
49704364 2400
18529408 2401void
f7fe979e 2402Perl_pack_cat(pTHX_ SV *cat, const char *pat, const char *patend, register SV **beglist, SV **endlist, SV ***next_in_list, U32 flags)
a6ec74c1 2403{
aadb217d 2404 tempsym_t sym;
9e27e96a
AL
2405 (void)next_in_list;
2406 (void)flags;
7accc089 2407
f7fe979e 2408 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
aadb217d 2409
7accc089
JH
2410 (void)pack_rec( cat, &sym, beglist, endlist );
2411}
2412
2413
2414/*
2415=for apidoc packlist
2416
2417The engine implementing pack() Perl function.
2418
2419=cut */
2420
2421
2422void
f7fe979e 2423Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, register SV **beglist, SV **endlist )
7accc089 2424{
f337b084 2425 STRLEN no_len;
aadb217d
JH
2426 tempsym_t sym;
2427
f7fe979e 2428 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
49704364 2429
f337b084
TH
2430 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
2431 Also make sure any UTF8 flag is loaded */
2432 SvPV_force(cat, no_len);
2433 if (DO_UTF8(cat)) sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
2434
49704364
WL
2435 (void)pack_rec( cat, &sym, beglist, endlist );
2436}
2437
f337b084
TH
2438/* like sv_utf8_upgrade, but also repoint the group start markers */
2439STATIC void
2440marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2441 STRLEN len;
2442 tempsym_t *group;
f7fe979e
AL
2443 const char *from_ptr, *from_start, *from_end, **marks, **m;
2444 char *to_start, *to_ptr;
f337b084
TH
2445
2446 if (SvUTF8(sv)) return;
2447
2448 from_start = SvPVX(sv);
2449 from_end = from_start + SvCUR(sv);
2450 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2451 if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
2452 if (from_ptr == from_end) {
2453 /* Simple case: no character needs to be changed */
2454 SvUTF8_on(sv);
2455 return;
2456 }
2457
3473cf63 2458 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
f337b084
TH
2459 New('U', to_start, len, char);
2460 Copy(from_start, to_start, from_ptr-from_start, char);
2461 to_ptr = to_start + (from_ptr-from_start);
2462
f7fe979e 2463 New('U', marks, sym_ptr->level+2, const char *);
f337b084
TH
2464 for (group=sym_ptr; group; group = group->previous)
2465 marks[group->level] = from_start + group->strbeg;
2466 marks[sym_ptr->level+1] = from_end+1;
2467 for (m = marks; *m < from_ptr; m++)
2468 *m = to_start + (*m-from_start);
2469
2470 for (;from_ptr < from_end; from_ptr++) {
2471 while (*m == from_ptr) *m++ = to_ptr;
230e1fce 2472 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
f337b084
TH
2473 }
2474 *to_ptr = 0;
2475
2476 while (*m == from_ptr) *m++ = to_ptr;
2477 if (m != marks + sym_ptr->level+1) {
2478 Safefree(marks);
2479 Safefree(to_start);
2480 Perl_croak(aTHX_ "Assertion: marks beyond string end");
2481 }
2482 for (group=sym_ptr; group; group = group->previous)
2483 group->strbeg = marks[group->level] - to_start;
2484 Safefree(marks);
2485
2486 if (SvOOK(sv)) {
2487 if (SvIVX(sv)) {
b162af07 2488 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
f337b084
TH
2489 from_start -= SvIVX(sv);
2490 SvIV_set(sv, 0);
2491 }
2492 SvFLAGS(sv) &= ~SVf_OOK;
2493 }
2494 if (SvLEN(sv) != 0)
2495 Safefree(from_start);
f880fe2f 2496 SvPV_set(sv, to_start);
b162af07
SP
2497 SvCUR_set(sv, to_ptr - to_start);
2498 SvLEN_set(sv, len);
f337b084
TH
2499 SvUTF8_on(sv);
2500}
2501
2502/* Exponential string grower. Makes string extension effectively O(n)
2503 needed says how many extra bytes we need (not counting the final '\0')
2504 Only grows the string if there is an actual lack of space
2505*/
2506STATIC char *
2507sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
f7fe979e
AL
2508 const STRLEN cur = SvCUR(sv);
2509 const STRLEN len = SvLEN(sv);
f337b084
TH
2510 STRLEN extend;
2511 if (len - cur > needed) return SvPVX(sv);
2512 extend = needed > len ? needed : len;
2513 return SvGROW(sv, len+extend+1);
2514}
49704364
WL
2515
2516STATIC
2517SV **
f337b084 2518S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
49704364 2519{
49704364 2520 tempsym_t lookahead;
f337b084
TH
2521 I32 items = endlist - beglist;
2522 bool found = next_symbol(symptr);
2523 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
2524
2525 if (symptr->level == 0 && found && symptr->code == 'U') {
2526 marked_upgrade(aTHX_ cat, symptr);
2527 symptr->flags |= FLAG_DO_UTF8;
2528 utf8 = 0;
49704364 2529 }
f337b084 2530 symptr->strbeg = SvCUR(cat);
49704364
WL
2531
2532 while (found) {
f337b084
TH
2533 SV *fromstr;
2534 STRLEN fromlen;
2535 I32 len;
a6ec74c1 2536 SV *lengthcode = Nullsv;
49704364 2537 I32 datumtype = symptr->code;
f337b084
TH
2538 howlen_t howlen = symptr->howlen;
2539 char *start = SvPVX(cat);
2540 char *cur = start + SvCUR(cat);
49704364 2541
f337b084
TH
2542#define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2543
2544 switch (howlen) {
fc241834 2545 case e_star:
f337b084
TH
2546 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2547 0 : items;
2548 break;
2549 default:
2550 /* e_no_len and e_number */
2551 len = symptr->length;
49704364
WL
2552 break;
2553 }
2554
f337b084 2555 if (len) {
a7a3cfaa 2556 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
f337b084 2557
a7a3cfaa
TH
2558 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2559 /* We can process this letter. */
2560 STRLEN size = props & PACK_SIZE_MASK;
2561 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2562 }
f337b084
TH
2563 }
2564
49704364
WL
2565 /* Look ahead for next symbol. Do we have code/code? */
2566 lookahead = *symptr;
2567 found = next_symbol(&lookahead);
246f24af
TH
2568 if (symptr->flags & FLAG_SLASH) {
2569 IV count;
f337b084 2570 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
246f24af
TH
2571 if (strchr("aAZ", lookahead.code)) {
2572 if (lookahead.howlen == e_number) count = lookahead.length;
2573 else {
2574 if (items > 0)
2575 count = DO_UTF8(*beglist) ?
2576 sv_len_utf8(*beglist) : sv_len(*beglist);
2577 else count = 0;
2578 if (lookahead.code == 'Z') count++;
2579 }
2580 } else {
2581 if (lookahead.howlen == e_number && lookahead.length < items)
2582 count = lookahead.length;
2583 else count = items;
2584 }
2585 lookahead.howlen = e_number;
2586 lookahead.length = count;
2587 lengthcode = sv_2mortal(newSViv(count));
a6ec74c1 2588 }
49704364 2589
fc241834
RGS
2590 /* Code inside the switch must take care to properly update
2591 cat (CUR length and '\0' termination) if it updated *cur and
f337b084 2592 doesn't simply leave using break */
1109a392 2593 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 2594 default:
f337b084
TH
2595 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2596 (int) TYPE_NO_MODIFIERS(datumtype));
a6ec74c1 2597 case '%':
49704364 2598 Perl_croak(aTHX_ "'%%' may not be used in pack");
28be1210
TH
2599 {
2600 char *from;
2601#ifdef PERL_PACK_CAN_SHRIEKSIGN
2602 case '.' | TYPE_IS_SHRIEKING:
2603#endif
2604 case '.':
2605 if (howlen == e_star) from = start;
2606 else if (len == 0) from = cur;
2607 else {
2608 tempsym_t *group = symptr;
2609
2610 while (--len && group) group = group->previous;
2611 from = group ? start + group->strbeg : start;
2612 }
2613 fromstr = NEXTFROM;
2614 len = SvIV(fromstr);
2615 goto resize;
2616#ifdef PERL_PACK_CAN_SHRIEKSIGN
2617 case '@' | TYPE_IS_SHRIEKING:
2618#endif
a6ec74c1 2619 case '@':
28be1210
TH
2620 from = start + symptr->strbeg;
2621 resize:
2622#ifdef PERL_PACK_CAN_SHRIEKSIGN
2623 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
2624#else /* PERL_PACK_CAN_SHRIEKSIGN */
2625 if (utf8)
2626#endif
2627 if (len >= 0) {
2628 while (len && from < cur) {
2629 from += UTF8SKIP(from);
2630 len--;
2631 }
2632 if (from > cur)
2633 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2634 if (len) {
2635 /* Here we know from == cur */
2636 grow:
2637 GROWING(0, cat, start, cur, len);
2638 Zero(cur, len, char);
2639 cur += len;
2640 } else if (from < cur) {
2641 len = cur - from;
2642 goto shrink;
2643 } else goto no_change;
2644 } else {
2645 cur = from;
2646 len = -len;
2647 goto utf8_shrink;
f337b084 2648 }
28be1210
TH
2649 else {
2650 len -= cur - from;
f337b084 2651 if (len > 0) goto grow;
28be1210 2652 if (len == 0) goto no_change;
fc241834 2653 len = -len;
28be1210 2654 goto shrink;
f337b084 2655 }
a6ec74c1 2656 break;
28be1210 2657 }
fc241834 2658 case '(': {
49704364 2659 tempsym_t savsym = *symptr;
66c611c5
MHM
2660 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2661 symptr->flags |= group_modifiers;
49704364
WL
2662 symptr->patend = savsym.grpend;
2663 symptr->level++;
f337b084 2664 symptr->previous = &lookahead;
18529408 2665 while (len--) {
f337b084
TH
2666 U32 was_utf8;
2667 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2668 else symptr->flags &= ~FLAG_PARSE_UTF8;
2669 was_utf8 = SvUTF8(cat);
49704364 2670 symptr->patptr = savsym.grpbeg;
f337b084
TH
2671 beglist = pack_rec(cat, symptr, beglist, endlist);
2672 if (SvUTF8(cat) != was_utf8)
2673 /* This had better be an upgrade while in utf8==0 mode */
2674 utf8 = 1;
2675
49704364 2676 if (savsym.howlen == e_star && beglist == endlist)
18529408
IZ
2677 break; /* No way to continue */
2678 }
f337b084
TH
2679 lookahead.flags = symptr->flags & ~group_modifiers;
2680 goto no_change;
18529408 2681 }
62f95557
IZ
2682 case 'X' | TYPE_IS_SHRIEKING:
2683 if (!len) /* Avoid division by 0 */
2684 len = 1;
f337b084
TH
2685 if (utf8) {
2686 char *hop, *last;
2687 I32 l = len;
2688 hop = last = start;
2689 while (hop < cur) {
2690 hop += UTF8SKIP(hop);
2691 if (--l == 0) {
2692 last = hop;
2693 l = len;
2694 }
2695 }
2696 if (last > cur)
2697 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2698 cur = last;
2699 break;
2700 }
2701 len = (cur-start) % len;
62f95557 2702 /* FALL THROUGH */
a6ec74c1 2703 case 'X':
f337b084
TH
2704 if (utf8) {
2705 if (len < 1) goto no_change;
28be1210 2706 utf8_shrink:
f337b084
TH
2707 while (len > 0) {
2708 if (cur <= start)
28be1210
TH
2709 Perl_croak(aTHX_ "'%c' outside of string in pack",
2710 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2711 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2712 if (cur <= start)
28be1210
TH
2713 Perl_croak(aTHX_ "'%c' outside of string in pack",
2714 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2715 }
2716 len--;
2717 }
2718 } else {
fc241834 2719 shrink:
f337b084 2720 if (cur - start < len)
28be1210
TH
2721 Perl_croak(aTHX_ "'%c' outside of string in pack",
2722 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2723 cur -= len;
2724 }
2725 if (cur < start+symptr->strbeg) {
2726 /* Make sure group starts don't point into the void */
2727 tempsym_t *group;
9e27e96a 2728 const STRLEN length = cur-start;
f337b084
TH
2729 for (group = symptr;
2730 group && length < group->strbeg;
2731 group = group->previous) group->strbeg = length;
2732 lookahead.strbeg = length;
2733 }
a6ec74c1 2734 break;
fc241834
RGS
2735 case 'x' | TYPE_IS_SHRIEKING: {
2736 I32 ai32;
62f95557
IZ
2737 if (!len) /* Avoid division by 0 */
2738 len = 1;
230e1fce 2739 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
fc241834
RGS
2740 else ai32 = (cur - start) % len;
2741 if (ai32 == 0) goto no_change;
2742 len -= ai32;
2743 }
2744 /* FALL THROUGH */
a6ec74c1 2745 case 'x':
f337b084 2746 goto grow;
a6ec74c1
JH
2747 case 'A':
2748 case 'Z':
f337b084 2749 case 'a': {
f7fe979e 2750 const char *aptr;
f337b084 2751
a6ec74c1
JH
2752 fromstr = NEXTFROM;
2753 aptr = SvPV(fromstr, fromlen);
f337b084 2754 if (DO_UTF8(fromstr)) {
f7fe979e 2755 const char *end, *s;
f337b084
TH
2756
2757 if (!utf8 && !SvUTF8(cat)) {
2758 marked_upgrade(aTHX_ cat, symptr);
2759 lookahead.flags |= FLAG_DO_UTF8;
2760 lookahead.strbeg = symptr->strbeg;
2761 utf8 = 1;
2762 start = SvPVX(cat);
2763 cur = start + SvCUR(cat);
2764 }
fc241834 2765 if (howlen == e_star) {
f337b084
TH
2766 if (utf8) goto string_copy;
2767 len = fromlen+1;
2768 }
2769 s = aptr;
2770 end = aptr + fromlen;
2771 fromlen = datumtype == 'Z' ? len-1 : len;
2772 while ((I32) fromlen > 0 && s < end) {
2773 s += UTF8SKIP(s);
2774 fromlen--;
2775 }
2776 if (s > end)
2777 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2778 if (utf8) {
fc241834 2779 len = fromlen;
f337b084
TH
2780 if (datumtype == 'Z') len++;
2781 fromlen = s-aptr;
2782 len += fromlen;
fc241834 2783
f337b084 2784 goto string_copy;
fc241834 2785 }
f337b084
TH
2786 fromlen = len - fromlen;
2787 if (datumtype == 'Z') fromlen--;
2788 if (howlen == e_star) {
2789 len = fromlen;
2790 if (datumtype == 'Z') len++;
fc241834 2791 }
f337b084 2792 GROWING(0, cat, start, cur, len);
fc241834 2793 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
f337b084
TH
2794 datumtype | TYPE_IS_PACK))
2795 Perl_croak(aTHX_ "Perl bug: predicted utf8 length not available");
2796 cur += fromlen;
a6ec74c1 2797 len -= fromlen;
f337b084
TH
2798 } else if (utf8) {
2799 if (howlen == e_star) {
2800 len = fromlen;
2801 if (datumtype == 'Z') len++;
a6ec74c1 2802 }
f337b084
TH
2803 if (len <= (I32) fromlen) {
2804 fromlen = len;
2805 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2806 }
fc241834 2807 /* assumes a byte expands to at most UTF8_EXPAND bytes on
3473cf63
RGS
2808 upgrade, so:
2809 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2810 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
f337b084
TH
2811 len -= fromlen;
2812 while (fromlen > 0) {
230e1fce 2813 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
f337b084
TH
2814 aptr++;
2815 fromlen--;
fc241834 2816 }
f337b084
TH
2817 } else {
2818 string_copy:
2819 if (howlen == e_star) {
2820 len = fromlen;
2821 if (datumtype == 'Z') len++;
2822 }
2823 if (len <= (I32) fromlen) {
2824 fromlen = len;
2825 if (datumtype == 'Z' && fromlen > 0) fromlen--;
a6ec74c1 2826 }
f337b084
TH
2827 GROWING(0, cat, start, cur, len);
2828 Copy(aptr, cur, fromlen, char);
2829 cur += fromlen;
2830 len -= fromlen;
a6ec74c1 2831 }
f337b084
TH
2832 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2833 cur += len;
a6ec74c1 2834 break;
f337b084 2835 }
a6ec74c1 2836 case 'B':
f337b084
TH
2837 case 'b': {
2838 char *str, *end;
2839 I32 l, field_len;
2840 U8 bits;
2841 bool utf8_source;
2842 U32 utf8_flags;
a6ec74c1 2843
fc241834
RGS
2844 fromstr = NEXTFROM;
2845 str = SvPV(fromstr, fromlen);
f337b084
TH
2846 end = str + fromlen;
2847 if (DO_UTF8(fromstr)) {
2848 utf8_source = TRUE;
2849 utf8_flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
2850 } else {
2851 utf8_source = FALSE;
2852 utf8_flags = 0; /* Unused, but keep compilers happy */
2853 }
2854 if (howlen == e_star) len = fromlen;
2855 field_len = (len+7)/8;
2856 GROWING(utf8, cat, start, cur, field_len);
2857 if (len > (I32)fromlen) len = fromlen;
2858 bits = 0;
2859 l = 0;
2860 if (datumtype == 'B')
2861 while (l++ < len) {
2862 if (utf8_source) {
2863 UV val;
2864 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2865 bits |= val & 1;
2866 } else bits |= *str++ & 1;
2867 if (l & 7) bits <<= 1;
fc241834 2868 else {
f337b084
TH
2869 PUSH_BYTE(utf8, cur, bits);
2870 bits = 0;
a6ec74c1
JH
2871 }
2872 }
f337b084
TH
2873 else
2874 /* datumtype == 'b' */
2875 while (l++ < len) {
2876 if (utf8_source) {
2877 UV val;
2878 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2879 if (val & 1) bits |= 0x80;
2880 } else if (*str++ & 1)
2881 bits |= 0x80;
2882 if (l & 7) bits >>= 1;
fc241834 2883 else {
f337b084
TH
2884 PUSH_BYTE(utf8, cur, bits);
2885 bits = 0;
a6ec74c1
JH
2886 }
2887 }
f337b084
TH
2888 l--;
2889 if (l & 7) {
fc241834 2890 if (datumtype == 'B')
f337b084 2891 bits <<= 7 - (l & 7);
fc241834 2892 else
f337b084
TH
2893 bits >>= 7 - (l & 7);
2894 PUSH_BYTE(utf8, cur, bits);
2895 l += 7;
a6ec74c1 2896 }
f337b084
TH
2897 /* Determine how many chars are left in the requested field */
2898 l /= 8;
2899 if (howlen == e_star) field_len = 0;
2900 else field_len -= l;
2901 Zero(cur, field_len, char);
2902 cur += field_len;
a6ec74c1 2903 break;
f337b084 2904 }
a6ec74c1 2905 case 'H':
f337b084
TH
2906 case 'h': {
2907 char *str, *end;
2908 I32 l, field_len;
2909 U8 bits;
2910 bool utf8_source;
2911 U32 utf8_flags;
a6ec74c1 2912
fc241834
RGS
2913 fromstr = NEXTFROM;
2914 str = SvPV(fromstr, fromlen);
f337b084
TH
2915 end = str + fromlen;
2916 if (DO_UTF8(fromstr)) {
2917 utf8_source = TRUE;
2918 utf8_flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
2919 } else {
2920 utf8_source = FALSE;
2921 utf8_flags = 0; /* Unused, but keep compilers happy */
2922 }
2923 if (howlen == e_star) len = fromlen;
2924 field_len = (len+1)/2;
2925 GROWING(utf8, cat, start, cur, field_len);
2926 if (!utf8 && len > (I32)fromlen) len = fromlen;
2927 bits = 0;
2928 l = 0;
2929 if (datumtype == 'H')
2930 while (l++ < len) {
2931 if (utf8_source) {
2932 UV val;
2933 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2934 if (val < 256 && isALPHA(val))
2935 bits |= (val + 9) & 0xf;
a6ec74c1 2936 else
f337b084
TH
2937 bits |= val & 0xf;
2938 } else if (isALPHA(*str))
2939 bits |= (*str++ + 9) & 0xf;
2940 else
2941 bits |= *str++ & 0xf;
2942 if (l & 1) bits <<= 4;
fc241834 2943 else {
f337b084
TH
2944 PUSH_BYTE(utf8, cur, bits);
2945 bits = 0;
a6ec74c1
JH
2946 }
2947 }
f337b084
TH
2948 else
2949 while (l++ < len) {
2950 if (utf8_source) {
2951 UV val;
2952 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2953 if (val < 256 && isALPHA(val))
2954 bits |= ((val + 9) & 0xf) << 4;
a6ec74c1 2955 else
f337b084
TH
2956 bits |= (val & 0xf) << 4;
2957 } else if (isALPHA(*str))
2958 bits |= ((*str++ + 9) & 0xf) << 4;
2959 else
2960 bits |= (*str++ & 0xf) << 4;
2961 if (l & 1) bits >>= 4;
fc241834 2962 else {
f337b084
TH
2963 PUSH_BYTE(utf8, cur, bits);
2964 bits = 0;
a6ec74c1 2965 }
fc241834 2966 }
f337b084
TH
2967 l--;
2968 if (l & 1) {
2969 PUSH_BYTE(utf8, cur, bits);
2970 l++;
2971 }
2972 /* Determine how many chars are left in the requested field */
2973 l /= 2;
2974 if (howlen == e_star) field_len = 0;
2975 else field_len -= l;
2976 Zero(cur, field_len, char);
2977 cur += field_len;
2978 break;
fc241834
RGS
2979 }
2980 case 'c':
f337b084
TH
2981 while (len-- > 0) {
2982 IV aiv;
2983 fromstr = NEXTFROM;
2984 aiv = SvIV(fromstr);
2985 if ((-128 > aiv || aiv > 127) &&
2986 ckWARN(WARN_PACK))
2987 Perl_warner(aTHX_ packWARN(WARN_PACK),
2988 "Character in 'c' format wrapped in pack");
2989 PUSH_BYTE(utf8, cur, aiv & 0xff);
a6ec74c1
JH
2990 }
2991 break;
2992 case 'C':
f337b084
TH
2993 if (len == 0) {
2994 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2995 break;
2996 }
2997 GROWING(0, cat, start, cur, len);
a6ec74c1 2998 while (len-- > 0) {
f337b084 2999 IV aiv;
a6ec74c1 3000 fromstr = NEXTFROM;
f337b084
TH
3001 aiv = SvIV(fromstr);
3002 if ((0 > aiv || aiv > 0xff) &&
fc241834
RGS
3003 ckWARN(WARN_PACK))
3004 Perl_warner(aTHX_ packWARN(WARN_PACK),
3005 "Character in 'C' format wrapped in pack");
f337b084
TH
3006 *cur++ = aiv & 0xff;
3007 }
fc241834
RGS
3008 break;
3009 case 'W': {
3010 char *end;
3011 U8 in_bytes = IN_BYTES;
3012
3013 end = start+SvLEN(cat)-1;
3014 if (utf8) end -= UTF8_MAXLEN-1;
3015 while (len-- > 0) {
3016 UV auv;
3017 fromstr = NEXTFROM;
3018 auv = SvUV(fromstr);
3019 if (in_bytes) auv = auv % 0x100;
3020 if (utf8) {
3021 W_utf8:
3022 if (cur > end) {
3023 *cur = '\0';
b162af07 3024 SvCUR_set(cat, cur - start);
fc241834
RGS
3025
3026 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3027 end = start+SvLEN(cat)-UTF8_MAXLEN;
3028 }
230e1fce
NC
3029 cur = (char *) uvuni_to_utf8_flags((U8 *) cur,
3030 NATIVE_TO_UNI(auv),
3031 ckWARN(WARN_UTF8) ?
3032 0 : UNICODE_ALLOW_ANY);
fc241834
RGS
3033 } else {
3034 if (auv >= 0x100) {
3035 if (!SvUTF8(cat)) {
3036 *cur = '\0';
b162af07 3037 SvCUR_set(cat, cur - start);
fc241834
RGS
3038 marked_upgrade(aTHX_ cat, symptr);
3039 lookahead.flags |= FLAG_DO_UTF8;
3040 lookahead.strbeg = symptr->strbeg;
3041 utf8 = 1;
3042 start = SvPVX(cat);
3043 cur = start + SvCUR(cat);
3044 end = start+SvLEN(cat)-UTF8_MAXLEN;
3045 goto W_utf8;
3046 }
3047 if (ckWARN(WARN_PACK))
3048 Perl_warner(aTHX_ packWARN(WARN_PACK),
3049 "Character in 'W' format wrapped in pack");
3050 auv &= 0xff;
3051 }
3052 if (cur >= end) {
3053 *cur = '\0';
b162af07 3054 SvCUR_set(cat, cur - start);
fc241834
RGS
3055 GROWING(0, cat, start, cur, len+1);
3056 end = start+SvLEN(cat)-1;
3057 }
fe2774ed 3058 *(U8 *) cur++ = (U8)auv;
a6ec74c1
JH
3059 }
3060 }
3061 break;
fc241834
RGS
3062 }
3063 case 'U': {
3064 char *end;
3065
3066 if (len == 0) {
3067 if (!(symptr->flags & FLAG_DO_UTF8)) {
3068 marked_upgrade(aTHX_ cat, symptr);
3069 lookahead.flags |= FLAG_DO_UTF8;
3070 lookahead.strbeg = symptr->strbeg;
3071 }
3072 utf8 = 0;
3073 goto no_change;
3074 }
3075
3076 end = start+SvLEN(cat);
3077 if (!utf8) end -= UTF8_MAXLEN;
a6ec74c1 3078 while (len-- > 0) {
fc241834 3079 UV auv;
a6ec74c1 3080 fromstr = NEXTFROM;
fc241834
RGS
3081 auv = SvUV(fromstr);
3082 if (utf8) {
230e1fce 3083 U8 buffer[UTF8_MAXLEN], *endb;
fc241834
RGS
3084 endb = uvuni_to_utf8_flags(buffer, auv,
3085 ckWARN(WARN_UTF8) ?
3086 0 : UNICODE_ALLOW_ANY);
3087 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
3088 *cur = '\0';
b162af07 3089 SvCUR_set(cat, cur - start);
fc241834
RGS
3090 GROWING(0, cat, start, cur,
3091 len+(endb-buffer)*UTF8_EXPAND);
3092 end = start+SvLEN(cat);
3093 }
3094 bytes_to_uni(aTHX_ buffer, endb-buffer, &cur);
3095 } else {
3096 if (cur >= end) {
3097 *cur = '\0';
b162af07 3098 SvCUR_set(cat, cur - start);
fc241834
RGS
3099 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3100 end = start+SvLEN(cat)-UTF8_MAXLEN;
3101 }
230e1fce
NC
3102 cur = (char *) uvuni_to_utf8_flags((U8 *) cur, auv,
3103 ckWARN(WARN_UTF8) ?
3104 0 : UNICODE_ALLOW_ANY);
fc241834 3105 }
a6ec74c1 3106 }
a6ec74c1 3107 break;
fc241834 3108 }
a6ec74c1
JH
3109 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
3110 case 'f':
a6ec74c1 3111 while (len-- > 0) {
f337b084
TH
3112 float afloat;
3113 NV anv;
a6ec74c1 3114 fromstr = NEXTFROM;
f337b084 3115 anv = SvNV(fromstr);
5cdb9e01 3116#ifdef __VOS__
f337b084 3117 /* VOS does not automatically map a floating-point overflow
fc241834
RGS
3118 during conversion from double to float into infinity, so we
3119 do it by hand. This code should either be generalized for
3120 any OS that needs it, or removed if and when VOS implements
3121 posix-976 (suggestion to support mapping to infinity).
3122 Paul.Green@stratus.com 02-04-02. */
f337b084 3123 if (anv > FLT_MAX)
fc241834 3124 afloat = _float_constants[0]; /* single prec. inf. */
f337b084 3125 else if (anv < -FLT_MAX)
fc241834 3126 afloat = _float_constants[0]; /* single prec. inf. */
f337b084
TH
3127 else afloat = (float) anv;
3128#else /* __VOS__ */
baf3cf9c 3129# if defined(VMS) && !defined(__IEEE_FP)
f337b084 3130 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
3131 * on Alpha; fake it if we don't have them.
3132 */
f337b084 3133 if (anv > FLT_MAX)
fc241834 3134 afloat = FLT_MAX;
f337b084 3135 else if (anv < -FLT_MAX)
fc241834 3136 afloat = -FLT_MAX;
f337b084 3137 else afloat = (float)anv;
baf3cf9c 3138# else
f337b084 3139 afloat = (float)anv;
baf3cf9c 3140# endif
f337b084 3141#endif /* __VOS__ */
1109a392 3142 DO_BO_PACK_N(afloat, float);
f337b084 3143 PUSH_VAR(utf8, cur, afloat);
a6ec74c1
JH
3144 }
3145 break;
3146 case 'd':
a6ec74c1 3147 while (len-- > 0) {
f337b084
TH
3148 double adouble;
3149 NV anv;
a6ec74c1 3150 fromstr = NEXTFROM;
f337b084 3151 anv = SvNV(fromstr);
5cdb9e01 3152#ifdef __VOS__
f337b084 3153 /* VOS does not automatically map a floating-point overflow
fc241834
RGS
3154 during conversion from long double to double into infinity,
3155 so we do it by hand. This code should either be generalized
3156 for any OS that needs it, or removed if and when VOS
3157 implements posix-976 (suggestion to support mapping to
3158 infinity). Paul.Green@stratus.com 02-04-02. */
f337b084 3159 if (anv > DBL_MAX)
fc241834 3160 adouble = _double_constants[0]; /* double prec. inf. */
f337b084 3161 else if (anv < -DBL_MAX)
fc241834 3162 adouble = _double_constants[0]; /* double prec. inf. */
f337b084
TH
3163 else adouble = (double) anv;
3164#else /* __VOS__ */
baf3cf9c 3165# if defined(VMS) && !defined(__IEEE_FP)
f337b084 3166 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
3167 * on Alpha; fake it if we don't have them.
3168 */
f337b084 3169 if (anv > DBL_MAX)
fc241834 3170 adouble = DBL_MAX;
f337b084 3171 else if (anv < -DBL_MAX)
fc241834 3172 adouble = -DBL_MAX;
f337b084 3173 else adouble = (double)anv;
baf3cf9c 3174# else
f337b084 3175 adouble = (double)anv;
baf3cf9c 3176# endif
f337b084 3177#endif /* __VOS__ */
1109a392 3178 DO_BO_PACK_N(adouble, double);
f337b084 3179 PUSH_VAR(utf8, cur, adouble);
a6ec74c1
JH
3180 }
3181 break;
fc241834
RGS
3182 case 'F': {
3183 NV anv;
1109a392 3184 Zero(&anv, 1, NV); /* can be long double with unused bits */
92d41999
JH
3185 while (len-- > 0) {
3186 fromstr = NEXTFROM;
3187 anv = SvNV(fromstr);
1109a392 3188 DO_BO_PACK_N(anv, NV);
fc241834 3189 PUSH_VAR(utf8, cur, anv);
92d41999
JH
3190 }
3191 break;
fc241834 3192 }
92d41999 3193#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
fc241834
RGS
3194 case 'D': {
3195 long double aldouble;
1109a392
MHM
3196 /* long doubles can have unused bits, which may be nonzero */
3197 Zero(&aldouble, 1, long double);
92d41999
JH
3198 while (len-- > 0) {
3199 fromstr = NEXTFROM;
3200 aldouble = (long double)SvNV(fromstr);
1109a392 3201 DO_BO_PACK_N(aldouble, long double);
fc241834 3202 PUSH_VAR(utf8, cur, aldouble);
92d41999
JH
3203 }
3204 break;
fc241834 3205 }
92d41999 3206#endif
7212898e 3207#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3208 case 'n' | TYPE_IS_SHRIEKING:
7212898e 3209#endif
a6ec74c1
JH
3210 case 'n':
3211 while (len-- > 0) {
f337b084 3212 I16 ai16;
a6ec74c1 3213 fromstr = NEXTFROM;
ef108786 3214 ai16 = (I16)SvIV(fromstr);
a6ec74c1 3215#ifdef HAS_HTONS
ef108786 3216 ai16 = PerlSock_htons(ai16);
a6ec74c1 3217#endif
f337b084 3218 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3219 }
3220 break;
7212898e 3221#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3222 case 'v' | TYPE_IS_SHRIEKING:
7212898e 3223#endif
a6ec74c1
JH
3224 case 'v':
3225 while (len-- > 0) {
f337b084 3226 I16 ai16;
a6ec74c1 3227 fromstr = NEXTFROM;
ef108786 3228 ai16 = (I16)SvIV(fromstr);
a6ec74c1 3229#ifdef HAS_HTOVS
ef108786 3230 ai16 = htovs(ai16);
a6ec74c1 3231#endif
f337b084 3232 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3233 }
3234 break;
49704364 3235 case 'S' | TYPE_IS_SHRIEKING:
a6ec74c1 3236#if SHORTSIZE != SIZE16
fc241834 3237 while (len-- > 0) {
f337b084 3238 unsigned short aushort;
fc241834
RGS
3239 fromstr = NEXTFROM;
3240 aushort = SvUV(fromstr);
3241 DO_BO_PACK(aushort, s);
f337b084 3242 PUSH_VAR(utf8, cur, aushort);
fc241834 3243 }
49704364
WL
3244 break;
3245#else
3246 /* Fall through! */
a6ec74c1 3247#endif
49704364 3248 case 'S':
fc241834 3249 while (len-- > 0) {
f337b084 3250 U16 au16;
fc241834
RGS
3251 fromstr = NEXTFROM;
3252 au16 = (U16)SvUV(fromstr);
3253 DO_BO_PACK(au16, 16);
f337b084 3254 PUSH16(utf8, cur, &au16);
a6ec74c1
JH
3255 }
3256 break;
49704364 3257 case 's' | TYPE_IS_SHRIEKING:
a6ec74c1 3258#if SHORTSIZE != SIZE16
fc241834 3259 while (len-- > 0) {
f337b084 3260 short ashort;
fc241834
RGS
3261 fromstr = NEXTFROM;
3262 ashort = SvIV(fromstr);
3263 DO_BO_PACK(ashort, s);
f337b084 3264 PUSH_VAR(utf8, cur, ashort);
a6ec74c1 3265 }
49704364
WL
3266 break;
3267#else
3268 /* Fall through! */
a6ec74c1 3269#endif
49704364
WL
3270 case 's':
3271 while (len-- > 0) {
f337b084 3272 I16 ai16;
49704364 3273 fromstr = NEXTFROM;
ef108786
MHM
3274 ai16 = (I16)SvIV(fromstr);
3275 DO_BO_PACK(ai16, 16);
f337b084 3276 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3277 }
3278 break;
3279 case 'I':
49704364 3280 case 'I' | TYPE_IS_SHRIEKING:
a6ec74c1 3281 while (len-- > 0) {
f337b084 3282 unsigned int auint;
a6ec74c1
JH
3283 fromstr = NEXTFROM;
3284 auint = SvUV(fromstr);
1109a392 3285 DO_BO_PACK(auint, i);
f337b084 3286 PUSH_VAR(utf8, cur, auint);
a6ec74c1
JH
3287 }
3288 break;
92d41999
JH
3289 case 'j':
3290 while (len-- > 0) {
f337b084 3291 IV aiv;
92d41999
JH
3292 fromstr = NEXTFROM;
3293 aiv = SvIV(fromstr);
1109a392
MHM
3294#if IVSIZE == INTSIZE
3295 DO_BO_PACK(aiv, i);
3296#elif IVSIZE == LONGSIZE
3297 DO_BO_PACK(aiv, l);
3298#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
3299 DO_BO_PACK(aiv, 64);
f337b084
TH
3300#else
3301 Perl_croak(aTHX_ "'j' not supported on this platform");
1109a392 3302#endif
f337b084 3303 PUSH_VAR(utf8, cur, aiv);
92d41999
JH
3304 }
3305 break;
3306 case 'J':
3307 while (len-- > 0) {
f337b084 3308 UV auv;
92d41999
JH
3309 fromstr = NEXTFROM;
3310 auv = SvUV(fromstr);
1109a392
MHM
3311#if UVSIZE == INTSIZE
3312 DO_BO_PACK(auv, i);
3313#elif UVSIZE == LONGSIZE
3314 DO_BO_PACK(auv, l);
3315#elif defined(HAS_QUAD) && UVSIZE == U64SIZE
3316 DO_BO_PACK(auv, 64);
f337b084
TH
3317#else
3318 Perl_croak(aTHX_ "'J' not supported on this platform");
1109a392 3319#endif
f337b084 3320 PUSH_VAR(utf8, cur, auv);
92d41999
JH
3321 }
3322 break;
a6ec74c1
JH
3323 case 'w':
3324 while (len-- > 0) {
f337b084 3325 NV anv;
a6ec74c1 3326 fromstr = NEXTFROM;
15e9f109 3327 anv = SvNV(fromstr);
a6ec74c1 3328
f337b084
TH
3329 if (anv < 0) {
3330 *cur = '\0';
b162af07 3331 SvCUR_set(cat, cur - start);
49704364 3332 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
f337b084 3333 }
a6ec74c1 3334
196b62db
NC
3335 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
3336 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
3337 any negative IVs will have already been got by the croak()
3338 above. IOK is untrue for fractions, so we test them
3339 against UV_MAX_P1. */
f337b084
TH
3340 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
3341 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
a6ec74c1 3342 char *in = buf + sizeof(buf);
196b62db 3343 UV auv = SvUV(fromstr);
a6ec74c1
JH
3344
3345 do {
eb160463 3346 *--in = (char)((auv & 0x7f) | 0x80);
a6ec74c1
JH
3347 auv >>= 7;
3348 } while (auv);
3349 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
3350 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3351 in, (buf + sizeof(buf)) - in);
3352 } else if (SvPOKp(fromstr))
3353 goto w_string;
a6ec74c1 3354 else if (SvNOKp(fromstr)) {
0258719b
NC
3355 /* 10**NV_MAX_10_EXP is the largest power of 10
3356 so 10**(NV_MAX_10_EXP+1) is definately unrepresentable
3357 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
3358 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
3359 And with that many bytes only Inf can overflow.
8f8d40ab
PG
3360 Some C compilers are strict about integral constant
3361 expressions so we conservatively divide by a slightly
3362 smaller integer instead of multiplying by the exact
3363 floating-point value.
0258719b
NC
3364 */
3365#ifdef NV_MAX_10_EXP
f337b084 3366 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
8f8d40ab 3367 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
0258719b 3368#else
f337b084 3369 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
8f8d40ab 3370 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
0258719b 3371#endif
a6ec74c1
JH
3372 char *in = buf + sizeof(buf);
3373
15e9f109 3374 anv = Perl_floor(anv);
a6ec74c1 3375 do {
15e9f109 3376 NV next = Perl_floor(anv / 128);
a6ec74c1 3377 if (in <= buf) /* this cannot happen ;-) */
49704364 3378 Perl_croak(aTHX_ "Cannot compress integer in pack");
0258719b 3379 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
15e9f109
NC
3380 anv = next;
3381 } while (anv > 0);
a6ec74c1 3382 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
3383 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3384 in, (buf + sizeof(buf)) - in);
3385 } else {
735b914b
JH
3386 char *from, *result, *in;
3387 SV *norm;
3388 STRLEN len;
3389 bool done;
3390
f337b084 3391 w_string:
735b914b
JH
3392 /* Copy string and check for compliance */
3393 from = SvPV(fromstr, len);
3394 if ((norm = is_an_int(from, len)) == NULL)
49704364 3395 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
735b914b
JH
3396
3397 New('w', result, len, char);
3398 in = result + len;
3399 done = FALSE;
f337b084 3400 while (!done) *--in = div128(norm, &done) | 0x80;
735b914b 3401 result[len - 1] &= 0x7F; /* clear continue bit */
f337b084
TH
3402 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3403 in, (result + len) - in);
735b914b
JH
3404 Safefree(result);
3405 SvREFCNT_dec(norm); /* free norm */
fc241834 3406 }
a6ec74c1
JH
3407 }
3408 break;
3409 case 'i':
49704364 3410 case 'i' | TYPE_IS_SHRIEKING:
a6ec74c1 3411 while (len-- > 0) {
f337b084 3412 int aint;
a6ec74c1
JH
3413 fromstr = NEXTFROM;
3414 aint = SvIV(fromstr);
1109a392 3415 DO_BO_PACK(aint, i);
f337b084 3416 PUSH_VAR(utf8, cur, aint);
a6ec74c1
JH
3417 }
3418 break;
7212898e 3419#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3420 case 'N' | TYPE_IS_SHRIEKING:
7212898e 3421#endif
a6ec74c1
JH
3422 case 'N':
3423 while (len-- > 0) {
f337b084 3424 U32 au32;
a6ec74c1 3425 fromstr = NEXTFROM;
ef108786 3426 au32 = SvUV(fromstr);
a6ec74c1 3427#ifdef HAS_HTONL
ef108786 3428 au32 = PerlSock_htonl(au32);
a6ec74c1 3429#endif
f337b084 3430 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3431 }
3432 break;
7212898e 3433#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3434 case 'V' | TYPE_IS_SHRIEKING:
7212898e 3435#endif
a6ec74c1
JH
3436 case 'V':
3437 while (len-- > 0) {
f337b084 3438 U32 au32;
a6ec74c1 3439 fromstr = NEXTFROM;
ef108786 3440 au32 = SvUV(fromstr);
a6ec74c1 3441#ifdef HAS_HTOVL
ef108786 3442 au32 = htovl(au32);
a6ec74c1 3443#endif
f337b084 3444 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3445 }
3446 break;
49704364 3447 case 'L' | TYPE_IS_SHRIEKING:
a6ec74c1 3448#if LONGSIZE != SIZE32
fc241834 3449 while (len-- > 0) {
f337b084 3450 unsigned long aulong;
fc241834
RGS
3451 fromstr = NEXTFROM;
3452 aulong = SvUV(fromstr);
3453 DO_BO_PACK(aulong, l);
f337b084 3454 PUSH_VAR(utf8, cur, aulong);
a6ec74c1 3455 }
49704364
WL
3456 break;
3457#else
3458 /* Fall though! */
a6ec74c1 3459#endif
49704364 3460 case 'L':
fc241834 3461 while (len-- > 0) {
f337b084 3462 U32 au32;
fc241834
RGS
3463 fromstr = NEXTFROM;
3464 au32 = SvUV(fromstr);
3465 DO_BO_PACK(au32, 32);
f337b084 3466 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3467 }
3468 break;
49704364 3469 case 'l' | TYPE_IS_SHRIEKING:
a6ec74c1 3470#if LONGSIZE != SIZE32
fc241834 3471 while (len-- > 0) {
f337b084 3472 long along;
fc241834
RGS
3473 fromstr = NEXTFROM;
3474 along = SvIV(fromstr);
3475 DO_BO_PACK(along, l);
f337b084 3476 PUSH_VAR(utf8, cur, along);
a6ec74c1 3477 }
49704364
WL
3478 break;
3479#else
3480 /* Fall though! */
a6ec74c1 3481#endif
49704364
WL
3482 case 'l':
3483 while (len-- > 0) {
f337b084 3484 I32 ai32;
49704364 3485 fromstr = NEXTFROM;
ef108786
MHM
3486 ai32 = SvIV(fromstr);
3487 DO_BO_PACK(ai32, 32);
f337b084 3488 PUSH32(utf8, cur, &ai32);
a6ec74c1
JH
3489 }
3490 break;
3491#ifdef HAS_QUAD
3492 case 'Q':
3493 while (len-- > 0) {
f337b084 3494 Uquad_t auquad;
a6ec74c1 3495 fromstr = NEXTFROM;
f337b084 3496 auquad = (Uquad_t) SvUV(fromstr);
1109a392 3497 DO_BO_PACK(auquad, 64);
f337b084 3498 PUSH_VAR(utf8, cur, auquad);
a6ec74c1
JH
3499 }
3500 break;
3501 case 'q':
3502 while (len-- > 0) {
f337b084 3503 Quad_t aquad;
a6ec74c1
JH
3504 fromstr = NEXTFROM;
3505 aquad = (Quad_t)SvIV(fromstr);
1109a392 3506 DO_BO_PACK(aquad, 64);
f337b084 3507 PUSH_VAR(utf8, cur, aquad);
a6ec74c1
JH
3508 }
3509 break;
f337b084 3510#endif /* HAS_QUAD */
a6ec74c1
JH
3511 case 'P':
3512 len = 1; /* assume SV is correct length */
f337b084 3513 GROWING(utf8, cat, start, cur, sizeof(char *));
49704364 3514 /* Fall through! */
a6ec74c1
JH
3515 case 'p':
3516 while (len-- > 0) {
f337b084
TH
3517 char *aptr;
3518
a6ec74c1 3519 fromstr = NEXTFROM;
28a4f200
TH
3520 SvGETMAGIC(fromstr);
3521 if (!SvOK(fromstr)) aptr = NULL;
a6ec74c1
JH
3522 else {
3523 STRLEN n_a;
3524 /* XXX better yet, could spirit away the string to
3525 * a safe spot and hang on to it until the result
3526 * of pack() (and all copies of the result) are
3527 * gone.
3528 */
f337b084
TH
3529 if (ckWARN(WARN_PACK) &&
3530 (SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
3531 !SvREADONLY(fromstr)))) {
9014280d 3532 Perl_warner(aTHX_ packWARN(WARN_PACK),
fc241834 3533 "Attempt to pack pointer to temporary value");
a6ec74c1
JH
3534 }
3535 if (SvPOK(fromstr) || SvNIOK(fromstr))
28a4f200 3536 aptr = SvPV_flags(fromstr, n_a, 0);
a6ec74c1 3537 else
28a4f200 3538 aptr = SvPV_force_flags(fromstr, n_a, 0);
a6ec74c1 3539 }
07409e01 3540 DO_BO_PACK_PC(aptr);
f337b084 3541 PUSH_VAR(utf8, cur, aptr);
a6ec74c1
JH
3542 }
3543 break;
fc241834 3544 case 'u': {
f7fe979e 3545 const char *aptr, *aend;
fc241834 3546 bool from_utf8;
f337b084 3547
a6ec74c1 3548 fromstr = NEXTFROM;
fc241834
RGS
3549 if (len <= 2) len = 45;
3550 else len = len / 3 * 3;
3551 if (len >= 64) {
3552 Perl_warner(aTHX_ packWARN(WARN_PACK),
3553 "Field too wide in 'u' format in pack");
3554 len = 63;
3555 }
a6ec74c1 3556 aptr = SvPV(fromstr, fromlen);
fc241834
RGS
3557 from_utf8 = DO_UTF8(fromstr);
3558 if (from_utf8) {
3559 aend = aptr + fromlen;
3560 fromlen = sv_len_utf8(fromstr);
3561 } else aend = NULL; /* Unused, but keep compilers happy */
3562 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
a6ec74c1 3563 while (fromlen > 0) {
fc241834 3564 U8 *end;
a6ec74c1 3565 I32 todo;
fc241834 3566 U8 hunk[1+63/3*4+1];
a6ec74c1 3567
eb160463 3568 if ((I32)fromlen > len)
a6ec74c1
JH
3569 todo = len;
3570 else
3571 todo = fromlen;
fc241834
RGS
3572 if (from_utf8) {
3573 char buffer[64];
3574 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3575 'u' | TYPE_IS_PACK)) {
3576 *cur = '\0';
b162af07 3577 SvCUR_set(cat, cur - start);
fc241834
RGS
3578 Perl_croak(aTHX_ "Assertion: string is shorter than advertised");
3579 }
3580 end = doencodes(hunk, buffer, todo);
3581 } else {
3582 end = doencodes(hunk, aptr, todo);
3583 aptr += todo;
3584 }
3585 PUSH_BYTES(utf8, cur, hunk, end-hunk);
3586 fromlen -= todo;
3587 }
a6ec74c1
JH
3588 break;
3589 }
f337b084
TH
3590 }
3591 *cur = '\0';
b162af07 3592 SvCUR_set(cat, cur - start);
f337b084 3593 no_change:
49704364 3594 *symptr = lookahead;
a6ec74c1 3595 }
49704364 3596 return beglist;
18529408
IZ
3597}
3598#undef NEXTFROM
3599
3600
3601PP(pp_pack)
3602{
3603 dSP; dMARK; dORIGMARK; dTARGET;
3604 register SV *cat = TARG;
3605 STRLEN fromlen;
f7fe979e
AL
3606 register const char *pat = SvPVx(*++MARK, fromlen);
3607 register const char *patend = pat + fromlen;
18529408
IZ
3608
3609 MARK++;
3610 sv_setpvn(cat, "", 0);
f337b084 3611 SvUTF8_off(cat);
18529408 3612
7accc089 3613 packlist(cat, pat, patend, MARK, SP + 1);
18529408 3614
a6ec74c1
JH
3615 SvSETMAGIC(cat);
3616 SP = ORIGMARK;
3617 PUSHs(cat);
3618 RETURN;
3619}
a6ec74c1 3620
73cb7263
NC
3621/*
3622 * Local variables:
3623 * c-indentation-style: bsd
3624 * c-basic-offset: 4
3625 * indent-tabs-mode: t
3626 * End:
3627 *
37442d52
RGS
3628 * ex: set ts=8 sts=4 sw=4 noet:
3629 */