This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Address Hugo's comment on Dave's change (26240)
[perl5.git] / pp_pack.c
CommitLineData
a6ec74c1
JH
1/* pp_pack.c
2 *
4bb101f2 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
73cb7263 4 * 2000, 2001, 2002, 2003, 2004, 2005, by Larry Wall and others
a6ec74c1
JH
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
9 */
10
d31a8517
AT
11/*
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
16 * some salt.
17 */
18
166f8a29
DM
19/* This file contains pp ("push/pop") functions that
20 * execute the opcodes that make up a perl program. A typical pp function
21 * expects to find its arguments on the stack, and usually pushes its
22 * results onto the stack, hence the 'pp' terminology. Each OP structure
23 * contains a pointer to the relevant pp_foo() function.
24 *
25 * This particular file just contains pp_pack() and pp_unpack(). See the
26 * other pp*.c files for the rest of the pp_ functions.
27 */
28
29
a6ec74c1
JH
30#include "EXTERN.h"
31#define PERL_IN_PP_PACK_C
32#include "perl.h"
33
f7fe979e
AL
34/* Types used by pack/unpack */
35typedef enum {
36 e_no_len, /* no length */
37 e_number, /* number, [] */
38 e_star /* asterisk */
39} howlen_t;
40
41typedef struct tempsym {
42 const char* patptr; /* current template char */
43 const char* patend; /* one after last char */
44 const char* grpbeg; /* 1st char of ()-group */
45 const char* grpend; /* end of ()-group */
46 I32 code; /* template code (!<>) */
47 I32 length; /* length/repeat count */
48 howlen_t howlen; /* how length is given */
49 int level; /* () nesting level */
50 U32 flags; /* /=4, comma=2, pack=1 */
51 /* and group modifiers */
52 STRLEN strbeg; /* offset of group start */
53 struct tempsym *previous; /* previous group */
54} tempsym_t;
55
56#define TEMPSYM_INIT(symptr, p, e, f) \
57 STMT_START { \
58 (symptr)->patptr = (p); \
59 (symptr)->patend = (e); \
60 (symptr)->grpbeg = NULL; \
61 (symptr)->grpend = NULL; \
62 (symptr)->grpend = NULL; \
63 (symptr)->code = 0; \
64 (symptr)->length = 0; \
65 (symptr)->howlen = 0; \
66 (symptr)->level = 0; \
67 (symptr)->flags = (f); \
68 (symptr)->strbeg = 0; \
69 (symptr)->previous = NULL; \
70 } STMT_END
71
7212898e 72#if PERL_VERSION >= 9
f337b084
TH
73# define PERL_PACK_CAN_BYTEORDER
74# define PERL_PACK_CAN_SHRIEKSIGN
75#endif
76
77#ifndef CHAR_BIT
78# define CHAR_BIT 8
7212898e 79#endif
3473cf63
RGS
80/* Maximum number of bytes to which a byte can grow due to upgrade */
81#define UTF8_EXPAND 2
7212898e 82
a6ec74c1 83/*
a6ec74c1
JH
84 * Offset for integer pack/unpack.
85 *
86 * On architectures where I16 and I32 aren't really 16 and 32 bits,
87 * which for now are all Crays, pack and unpack have to play games.
88 */
89
90/*
91 * These values are required for portability of pack() output.
92 * If they're not right on your machine, then pack() and unpack()
93 * wouldn't work right anyway; you'll need to apply the Cray hack.
94 * (I'd like to check them with #if, but you can't use sizeof() in
95 * the preprocessor.) --???
96 */
97/*
98 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
99 defines are now in config.h. --Andy Dougherty April 1998
100 */
101#define SIZE16 2
102#define SIZE32 4
103
104/* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
105 --jhi Feb 1999 */
106
1109a392
MHM
107#if U16SIZE > SIZE16 || U32SIZE > SIZE32
108# if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
08ca2aa3
TH
109# define OFF16(p) ((char*)(p))
110# define OFF32(p) ((char*)(p))
a6ec74c1 111# else
1109a392 112# if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
a6ec74c1
JH
113# define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
114# define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
115# else
08ca2aa3 116 ++++ bad cray byte order
a6ec74c1
JH
117# endif
118# endif
a6ec74c1 119#else
08ca2aa3
TH
120# define OFF16(p) ((char *) (p))
121# define OFF32(p) ((char *) (p))
a6ec74c1
JH
122#endif
123
f337b084
TH
124/* Only to be used inside a loop (see the break) */
125#define SHIFT16(utf8, s, strend, p, datumtype) STMT_START { \
126 if (utf8) { \
127 if (!uni_to_bytes(aTHX_ &(s), strend, OFF16(p), SIZE16, datumtype)) break; \
128 } else { \
129 Copy(s, OFF16(p), SIZE16, char); \
130 (s) += SIZE16; \
131 } \
132} STMT_END
133
134/* Only to be used inside a loop (see the break) */
135#define SHIFT32(utf8, s, strend, p, datumtype) STMT_START { \
136 if (utf8) { \
137 if (!uni_to_bytes(aTHX_ &(s), strend, OFF32(p), SIZE32, datumtype)) break; \
138 } else { \
139 Copy(s, OFF32(p), SIZE32, char); \
140 (s) += SIZE32; \
141 } \
142} STMT_END
143
144#define PUSH16(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF16(p), SIZE16)
145#define PUSH32(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF32(p), SIZE32)
08ca2aa3
TH
146
147/* Only to be used inside a loop (see the break) */
f337b084 148#define SHIFT_VAR(utf8, s, strend, var, datumtype) \
08ca2aa3
TH
149STMT_START { \
150 if (utf8) { \
f337b084
TH
151 if (!uni_to_bytes(aTHX_ &s, strend, \
152 (char *) &var, sizeof(var), datumtype)) break;\
08ca2aa3
TH
153 } else { \
154 Copy(s, (char *) &var, sizeof(var), char); \
155 s += sizeof(var); \
156 } \
08ca2aa3
TH
157} STMT_END
158
f337b084 159#define PUSH_VAR(utf8, aptr, var) \
230e1fce 160 PUSH_BYTES(utf8, aptr, &(var), sizeof(var))
f337b084 161
49704364
WL
162/* Avoid stack overflow due to pathological templates. 100 should be plenty. */
163#define MAX_SUB_TEMPLATE_LEVEL 100
164
66c611c5 165/* flags (note that type modifiers can also be used as flags!) */
f337b084
TH
166#define FLAG_WAS_UTF8 0x40
167#define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
49704364 168#define FLAG_UNPACK_ONLY_ONE 0x10
f337b084 169#define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
49704364
WL
170#define FLAG_SLASH 0x04
171#define FLAG_COMMA 0x02
172#define FLAG_PACK 0x01
173
a6ec74c1
JH
174STATIC SV *
175S_mul128(pTHX_ SV *sv, U8 m)
176{
177 STRLEN len;
178 char *s = SvPV(sv, len);
179 char *t;
a6ec74c1
JH
180
181 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
0bcc34c2 182 SV * const tmpNew = newSVpvn("0000000000", 10);
a6ec74c1
JH
183
184 sv_catsv(tmpNew, sv);
185 SvREFCNT_dec(sv); /* free old sv */
186 sv = tmpNew;
187 s = SvPV(sv, len);
188 }
189 t = s + len - 1;
190 while (!*t) /* trailing '\0'? */
191 t--;
192 while (t > s) {
f7fe979e 193 const U32 i = ((*t - '0') << 7) + m;
eb160463
GS
194 *(t--) = '0' + (char)(i % 10);
195 m = (char)(i / 10);
a6ec74c1
JH
196 }
197 return (sv);
198}
199
200/* Explosives and implosives. */
201
202#if 'I' == 73 && 'J' == 74
203/* On an ASCII/ISO kind of system */
204#define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
205#else
206/*
207 Some other sort of character set - use memchr() so we don't match
208 the null byte.
209 */
210#define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
211#endif
212
66c611c5 213/* type modifiers */
62f95557 214#define TYPE_IS_SHRIEKING 0x100
1109a392
MHM
215#define TYPE_IS_BIG_ENDIAN 0x200
216#define TYPE_IS_LITTLE_ENDIAN 0x400
f337b084 217#define TYPE_IS_PACK 0x800
1109a392 218#define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
66c611c5 219#define TYPE_MODIFIERS(t) ((t) & ~0xFF)
1109a392
MHM
220#define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
221
7212898e 222#ifdef PERL_PACK_CAN_SHRIEKSIGN
28be1210 223# define SHRIEKING_ALLOWED_TYPES "sSiIlLxXnNvV@."
7212898e 224#else
28be1210 225# define SHRIEKING_ALLOWED_TYPES "sSiIlLxX"
7212898e
NC
226#endif
227
228#ifndef PERL_PACK_CAN_BYTEORDER
229/* Put "can't" first because it is shorter */
230# define TYPE_ENDIANNESS(t) 0
231# define TYPE_NO_ENDIANNESS(t) (t)
232
233# define ENDIANNESS_ALLOWED_TYPES ""
234
235# define DO_BO_UNPACK(var, type)
236# define DO_BO_PACK(var, type)
07409e01
NC
237# define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast)
238# define DO_BO_PACK_PTR(var, type, pre_cast, post_cast)
7212898e
NC
239# define DO_BO_UNPACK_N(var, type)
240# define DO_BO_PACK_N(var, type)
241# define DO_BO_UNPACK_P(var)
242# define DO_BO_PACK_P(var)
45f723e1
NC
243# define DO_BO_UNPACK_PC(var)
244# define DO_BO_PACK_PC(var)
66c611c5 245
f337b084 246#else /* PERL_PACK_CAN_BYTEORDER */
7212898e
NC
247
248# define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
249# define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
250
251# define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
252
253# define DO_BO_UNPACK(var, type) \
1109a392 254 STMT_START { \
66c611c5 255 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
256 case TYPE_IS_BIG_ENDIAN: var = my_betoh ## type (var); break; \
257 case TYPE_IS_LITTLE_ENDIAN: var = my_letoh ## type (var); break; \
258 default: break; \
259 } \
260 } STMT_END
261
7212898e 262# define DO_BO_PACK(var, type) \
1109a392 263 STMT_START { \
66c611c5 264 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
265 case TYPE_IS_BIG_ENDIAN: var = my_htobe ## type (var); break; \
266 case TYPE_IS_LITTLE_ENDIAN: var = my_htole ## type (var); break; \
267 default: break; \
268 } \
269 } STMT_END
270
07409e01 271# define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast) \
1109a392 272 STMT_START { \
66c611c5 273 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392 274 case TYPE_IS_BIG_ENDIAN: \
07409e01 275 var = (post_cast*) my_betoh ## type ((pre_cast) var); \
1109a392
MHM
276 break; \
277 case TYPE_IS_LITTLE_ENDIAN: \
07409e01 278 var = (post_cast *) my_letoh ## type ((pre_cast) var); \
1109a392
MHM
279 break; \
280 default: \
281 break; \
282 } \
283 } STMT_END
284
07409e01 285# define DO_BO_PACK_PTR(var, type, pre_cast, post_cast) \
1109a392 286 STMT_START { \
66c611c5 287 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392 288 case TYPE_IS_BIG_ENDIAN: \
07409e01 289 var = (post_cast *) my_htobe ## type ((pre_cast) var); \
1109a392
MHM
290 break; \
291 case TYPE_IS_LITTLE_ENDIAN: \
07409e01 292 var = (post_cast *) my_htole ## type ((pre_cast) var); \
1109a392
MHM
293 break; \
294 default: \
295 break; \
296 } \
297 } STMT_END
298
7212898e 299# define BO_CANT_DOIT(action, type) \
66c611c5
MHM
300 STMT_START { \
301 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
302 case TYPE_IS_BIG_ENDIAN: \
303 Perl_croak(aTHX_ "Can't %s big-endian %ss on this " \
304 "platform", #action, #type); \
305 break; \
306 case TYPE_IS_LITTLE_ENDIAN: \
307 Perl_croak(aTHX_ "Can't %s little-endian %ss on this " \
308 "platform", #action, #type); \
309 break; \
310 default: \
311 break; \
312 } \
313 } STMT_END
314
7212898e 315# if PTRSIZE == INTSIZE
07409e01
NC
316# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, i, int, void)
317# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, i, int, void)
318# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, i, int, char)
319# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, i, int, char)
7212898e 320# elif PTRSIZE == LONGSIZE
07409e01
NC
321# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, long, void)
322# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, long, void)
323# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, long, char)
324# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, long, char)
7212898e
NC
325# else
326# define DO_BO_UNPACK_P(var) BO_CANT_DOIT(unpack, pointer)
327# define DO_BO_PACK_P(var) BO_CANT_DOIT(pack, pointer)
45f723e1
NC
328# define DO_BO_UNPACK_PC(var) BO_CANT_DOIT(unpack, pointer)
329# define DO_BO_PACK_PC(var) BO_CANT_DOIT(pack, pointer)
7212898e 330# endif
1109a392 331
7212898e 332# if defined(my_htolen) && defined(my_letohn) && \
1109a392 333 defined(my_htoben) && defined(my_betohn)
7212898e 334# define DO_BO_UNPACK_N(var, type) \
1109a392 335 STMT_START { \
66c611c5 336 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
337 case TYPE_IS_BIG_ENDIAN: my_betohn(&var, sizeof(type)); break;\
338 case TYPE_IS_LITTLE_ENDIAN: my_letohn(&var, sizeof(type)); break;\
339 default: break; \
340 } \
341 } STMT_END
342
7212898e 343# define DO_BO_PACK_N(var, type) \
1109a392 344 STMT_START { \
66c611c5 345 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
346 case TYPE_IS_BIG_ENDIAN: my_htoben(&var, sizeof(type)); break;\
347 case TYPE_IS_LITTLE_ENDIAN: my_htolen(&var, sizeof(type)); break;\
348 default: break; \
349 } \
350 } STMT_END
7212898e
NC
351# else
352# define DO_BO_UNPACK_N(var, type) BO_CANT_DOIT(unpack, type)
353# define DO_BO_PACK_N(var, type) BO_CANT_DOIT(pack, type)
354# endif
355
f337b084 356#endif /* PERL_PACK_CAN_BYTEORDER */
62f95557 357
78d46eaa 358#define PACK_SIZE_CANNOT_CSUM 0x80
f337b084 359#define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
78d46eaa
NC
360#define PACK_SIZE_MASK 0x3F
361
78d46eaa
NC
362/* These tables are regenerated by genpacksizetables.pl (and then hand pasted
363 in). You're unlikely ever to need to regenerate them. */
a7a3cfaa
TH
364
365#if TYPE_IS_SHRIEKING != 0x100
366 ++++shriek offset should be 256
367#endif
368
369typedef U8 packprops_t;
78d46eaa
NC
370#if 'J'-'I' == 1
371/* ASCII */
f43a9a31 372STATIC const packprops_t packprops[512] = {
a7a3cfaa
TH
373 /* normal */
374 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
375 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
376 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
377 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
378 0, 0, 0,
379 /* C */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
80a13697 380#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
a7a3cfaa 381 /* D */ LONG_DOUBLESIZE,
80a13697 382#else
a7a3cfaa 383 0,
80a13697 384#endif
a7a3cfaa
TH
385 0,
386 /* F */ NVSIZE,
387 0, 0,
388 /* I */ sizeof(unsigned int),
389 /* J */ UVSIZE,
390 0,
391 /* L */ SIZE32,
392 0,
393 /* N */ SIZE32,
394 0, 0,
80a13697 395#if defined(HAS_QUAD)
a7a3cfaa 396 /* Q */ sizeof(Uquad_t),
80a13697 397#else
a7a3cfaa 398 0,
80a13697 399#endif
a7a3cfaa
TH
400 0,
401 /* S */ SIZE16,
402 0,
403 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
404 /* V */ SIZE32,
405 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
406 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
407 /* c */ sizeof(char),
408 /* d */ sizeof(double),
409 0,
410 /* f */ sizeof(float),
411 0, 0,
412 /* i */ sizeof(int),
413 /* j */ IVSIZE,
414 0,
415 /* l */ SIZE32,
416 0,
417 /* n */ SIZE16,
418 0,
419 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
80a13697 420#if defined(HAS_QUAD)
a7a3cfaa 421 /* q */ sizeof(Quad_t),
80a13697 422#else
a7a3cfaa 423 0,
80a13697 424#endif
a7a3cfaa
TH
425 0,
426 /* s */ SIZE16,
427 0, 0,
428 /* v */ SIZE16,
429 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
430 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
431 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
432 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
433 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
434 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
435 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
436 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
437 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
438 0, 0, 0, 0, 0, 0, 0, 0,
439 /* shrieking */
440 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
441 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
442 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
443 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
444 0, 0, 0, 0, 0, 0, 0, 0, 0,
445 /* I */ sizeof(unsigned int),
446 0, 0,
447 /* L */ sizeof(unsigned long),
448 0,
7212898e 449#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 450 /* N */ SIZE32,
7212898e 451#else
a7a3cfaa 452 0,
7212898e 453#endif
a7a3cfaa
TH
454 0, 0, 0, 0,
455 /* S */ sizeof(unsigned short),
456 0, 0,
7212898e 457#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 458 /* V */ SIZE32,
7212898e 459#else
a7a3cfaa 460 0,
7212898e 461#endif
a7a3cfaa
TH
462 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
463 0, 0,
464 /* i */ sizeof(int),
465 0, 0,
466 /* l */ sizeof(long),
467 0,
7212898e 468#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 469 /* n */ SIZE16,
7212898e 470#else
a7a3cfaa 471 0,
7212898e 472#endif
a7a3cfaa
TH
473 0, 0, 0, 0,
474 /* s */ sizeof(short),
475 0, 0,
7212898e 476#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 477 /* v */ SIZE16,
7212898e 478#else
a7a3cfaa 479 0,
7212898e 480#endif
a7a3cfaa
TH
481 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
482 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
483 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
484 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
485 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
486 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
487 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
488 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
489 0, 0, 0, 0, 0, 0, 0, 0, 0
78d46eaa
NC
490};
491#else
492/* EBCDIC (or bust) */
f43a9a31 493STATIC const packprops_t packprops[512] = {
a7a3cfaa
TH
494 /* normal */
495 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
496 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
497 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
498 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
499 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
500 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
501 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
502 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
503 0, 0, 0,
504 /* c */ sizeof(char),
505 /* d */ sizeof(double),
506 0,
507 /* f */ sizeof(float),
508 0, 0,
509 /* i */ sizeof(int),
510 0, 0, 0, 0, 0, 0, 0,
511 /* j */ IVSIZE,
512 0,
513 /* l */ SIZE32,
514 0,
515 /* n */ SIZE16,
516 0,
517 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
80a13697 518#if defined(HAS_QUAD)
a7a3cfaa 519 /* q */ sizeof(Quad_t),
80a13697 520#else
a7a3cfaa 521 0,
80a13697 522#endif
a7a3cfaa
TH
523 0, 0, 0, 0, 0, 0, 0, 0, 0,
524 /* s */ SIZE16,
525 0, 0,
526 /* v */ SIZE16,
527 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
528 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
529 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
530 /* C */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
80a13697 531#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
a7a3cfaa 532 /* D */ LONG_DOUBLESIZE,
80a13697 533#else
a7a3cfaa 534 0,
80a13697 535#endif
a7a3cfaa
TH
536 0,
537 /* F */ NVSIZE,
538 0, 0,
539 /* I */ sizeof(unsigned int),
540 0, 0, 0, 0, 0, 0, 0,
541 /* J */ UVSIZE,
542 0,
543 /* L */ SIZE32,
544 0,
545 /* N */ SIZE32,
546 0, 0,
80a13697 547#if defined(HAS_QUAD)
a7a3cfaa 548 /* Q */ sizeof(Uquad_t),
80a13697 549#else
a7a3cfaa 550 0,
80a13697 551#endif
a7a3cfaa
TH
552 0, 0, 0, 0, 0, 0, 0, 0, 0,
553 /* S */ SIZE16,
554 0,
555 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
556 /* V */ SIZE32,
557 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
558 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
559 0, 0, 0, 0, 0, 0, 0, 0, 0,
560 /* shrieking */
561 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
562 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
563 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
564 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
565 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
566 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
567 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
568 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
569 0, 0, 0, 0, 0, 0, 0, 0, 0,
570 /* i */ sizeof(int),
571 0, 0, 0, 0, 0, 0, 0, 0, 0,
572 /* l */ sizeof(long),
573 0,
7212898e 574#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 575 /* n */ SIZE16,
7212898e 576#else
a7a3cfaa 577 0,
7212898e 578#endif
a7a3cfaa
TH
579 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
580 /* s */ sizeof(short),
581 0, 0,
7212898e 582#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 583 /* v */ SIZE16,
7212898e 584#else
a7a3cfaa 585 0,
7212898e 586#endif
a7a3cfaa
TH
587 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
588 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
589 0, 0, 0,
590 /* I */ sizeof(unsigned int),
591 0, 0, 0, 0, 0, 0, 0, 0, 0,
592 /* L */ sizeof(unsigned long),
593 0,
7212898e 594#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 595 /* N */ SIZE32,
7212898e 596#else
a7a3cfaa 597 0,
7212898e 598#endif
a7a3cfaa
TH
599 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
600 /* S */ sizeof(unsigned short),
601 0, 0,
7212898e 602#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 603 /* V */ SIZE32,
7212898e 604#else
a7a3cfaa 605 0,
7212898e 606#endif
a7a3cfaa
TH
607 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
608 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
78d46eaa
NC
609};
610#endif
611
08ca2aa3 612STATIC U8
f7fe979e 613uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
08ca2aa3 614{
08ca2aa3 615 STRLEN retlen;
0bcc34c2 616 UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
f337b084 617 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
08ca2aa3
TH
618 /* We try to process malformed UTF-8 as much as possible (preferrably with
619 warnings), but these two mean we make no progress in the string and
620 might enter an infinite loop */
621 if (retlen == (STRLEN) -1 || retlen == 0)
f337b084
TH
622 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
623 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3 624 if (val >= 0x100) {
f337b084 625 if (ckWARN(WARN_UNPACK))
08ca2aa3
TH
626 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
627 "Character in '%c' format wrapped in unpack",
f337b084 628 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3
TH
629 val &= 0xff;
630 }
631 *s += retlen;
fe2774ed 632 return (U8)val;
08ca2aa3
TH
633}
634
f337b084
TH
635#define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
636 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
08ca2aa3
TH
637 *(U8 *)(s)++)
638
639STATIC bool
f7fe979e 640uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
08ca2aa3
TH
641{
642 UV val;
643 STRLEN retlen;
f7fe979e 644 const char *from = *s;
08ca2aa3 645 int bad = 0;
f7fe979e 646 const U32 flags = ckWARN(WARN_UTF8) ?
08ca2aa3
TH
647 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
648 for (;buf_len > 0; buf_len--) {
649 if (from >= end) return FALSE;
f337b084 650 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
08ca2aa3
TH
651 if (retlen == (STRLEN) -1 || retlen == 0) {
652 from += UTF8SKIP(from);
653 bad |= 1;
654 } else from += retlen;
655 if (val >= 0x100) {
656 bad |= 2;
657 val &= 0xff;
658 }
fe2774ed 659 *(U8 *)buf++ = (U8)val;
08ca2aa3
TH
660 }
661 /* We have enough characters for the buffer. Did we have problems ? */
662 if (bad) {
663 if (bad & 1) {
664 /* Rewalk the string fragment while warning */
f7fe979e 665 const char *ptr;
9e27e96a 666 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
08ca2aa3
TH
667 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
668 if (ptr >= end) break;
f337b084 669 utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
08ca2aa3
TH
670 }
671 if (from > end) from = end;
672 }
673 if ((bad & 2) && ckWARN(WARN_UNPACK))
fc241834 674 Perl_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
f337b084
TH
675 WARN_PACK : WARN_UNPACK),
676 "Character(s) in '%c' format wrapped in %s",
fc241834 677 (int) TYPE_NO_MODIFIERS(datumtype),
f337b084 678 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
08ca2aa3
TH
679 }
680 *s = from;
681 return TRUE;
682}
683
684STATIC bool
f7fe979e 685next_uni_uu(pTHX_ const char **s, const char *end, I32 *out)
08ca2aa3 686{
08ca2aa3 687 STRLEN retlen;
0bcc34c2 688 const UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
08ca2aa3
TH
689 if (val >= 0x100 || !ISUUCHAR(val) ||
690 retlen == (STRLEN) -1 || retlen == 0) {
691 *out = 0;
692 return FALSE;
693 }
694 *out = PL_uudmap[val] & 077;
f337b084 695 *s += retlen;
08ca2aa3
TH
696 return TRUE;
697}
78d46eaa 698
f337b084 699STATIC void
f7fe979e 700bytes_to_uni(pTHX_ const U8 *start, STRLEN len, char **dest) {
f337b084 701 U8 buffer[UTF8_MAXLEN];
0bcc34c2 702 const U8 * const end = start + len;
f337b084
TH
703 char *d = *dest;
704 while (start < end) {
f7fe979e 705 const int length =
f337b084
TH
706 uvuni_to_utf8_flags(buffer, NATIVE_TO_UNI(*start), 0) - buffer;
707 switch(length) {
708 case 1:
709 *d++ = buffer[0];
710 break;
711 case 2:
712 *d++ = buffer[0];
713 *d++ = buffer[1];
714 break;
715 default:
716 Perl_croak(aTHX_ "Perl bug: value %d UTF-8 expands to %d bytes",
717 *start, length);
718 }
719 start++;
720 }
721 *dest = d;
722}
723
230e1fce
NC
724#define PUSH_BYTES(utf8, cur, buf, len) \
725STMT_START { \
726 if (utf8) bytes_to_uni(aTHX_ (U8 *) buf, len, &(cur)); \
727 else { \
728 Copy(buf, cur, len, char); \
729 (cur) += (len); \
730 } \
f337b084
TH
731} STMT_END
732
733#define GROWING(utf8, cat, start, cur, in_len) \
734STMT_START { \
735 STRLEN glen = (in_len); \
3473cf63 736 if (utf8) glen *= UTF8_EXPAND; \
f337b084
TH
737 if ((cur) + glen >= (start) + SvLEN(cat)) { \
738 (start) = sv_exp_grow(aTHX_ cat, glen); \
739 (cur) = (start) + SvCUR(cat); \
740 } \
741} STMT_END
742
743#define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
744STMT_START { \
f7fe979e 745 const STRLEN glen = (in_len); \
f337b084 746 STRLEN gl = glen; \
3473cf63 747 if (utf8) gl *= UTF8_EXPAND; \
f337b084
TH
748 if ((cur) + gl >= (start) + SvLEN(cat)) { \
749 *cur = '\0'; \
b162af07 750 SvCUR_set((cat), (cur) - (start)); \
f337b084
TH
751 (start) = sv_exp_grow(aTHX_ cat, gl); \
752 (cur) = (start) + SvCUR(cat); \
753 } \
754 PUSH_BYTES(utf8, cur, buf, glen); \
755} STMT_END
756
757#define PUSH_BYTE(utf8, s, byte) \
758STMT_START { \
759 if (utf8) { \
f7fe979e 760 const U8 au8 = (byte); \
f337b084
TH
761 bytes_to_uni(aTHX_ &au8, 1, &(s)); \
762 } else *(U8 *)(s)++ = (byte); \
763} STMT_END
764
765/* Only to be used inside a loop (see the break) */
766#define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
767STMT_START { \
768 STRLEN retlen; \
769 if (str >= end) break; \
770 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
771 if (retlen == (STRLEN) -1 || retlen == 0) { \
772 *cur = '\0'; \
773 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
774 } \
775 str += retlen; \
776} STMT_END
777
f7fe979e
AL
778static const char *_action( const tempsym_t* symptr )
779{
780 return ( symptr->flags & FLAG_PACK ) ? "pack" : "unpack";
781}
782
206947d2 783/* Returns the sizeof() struct described by pat */
028d1f6d 784STATIC I32
f337b084 785S_measure_struct(pTHX_ tempsym_t* symptr)
206947d2 786{
f337b084 787 I32 total = 0;
206947d2 788
49704364 789 while (next_symbol(symptr)) {
f337b084 790 I32 len;
f7fe979e 791 int size;
f337b084
TH
792
793 switch (symptr->howlen) {
fc241834 794 case e_star:
49704364 795 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
f7fe979e 796 _action( symptr ) );
49704364 797 break;
f337b084
TH
798 default:
799 /* e_no_len and e_number */
800 len = symptr->length;
801 break;
49704364
WL
802 }
803
a7a3cfaa 804 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
80a13697 805 if (!size) {
f7fe979e 806 int star;
80a13697
NC
807 /* endianness doesn't influence the size of a type */
808 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
809 default:
810 Perl_croak(aTHX_ "Invalid type '%c' in %s",
811 (int)TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 812 _action( symptr ) );
28be1210
TH
813#ifdef PERL_PACK_CAN_SHRIEKSIGN
814 case '.' | TYPE_IS_SHRIEKING:
815 case '@' | TYPE_IS_SHRIEKING:
816#endif
80a13697 817 case '@':
28be1210 818 case '.':
80a13697
NC
819 case '/':
820 case 'U': /* XXXX Is it correct? */
821 case 'w':
822 case 'u':
823 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
28be1210 824 (int) TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 825 _action( symptr ) );
80a13697
NC
826 case '%':
827 size = 0;
828 break;
829 case '(':
fc241834
RGS
830 {
831 tempsym_t savsym = *symptr;
832 symptr->patptr = savsym.grpbeg;
833 symptr->patend = savsym.grpend;
834 /* XXXX Theoretically, we need to measure many times at
835 different positions, since the subexpression may contain
836 alignment commands, but be not of aligned length.
837 Need to detect this and croak(). */
838 size = measure_struct(symptr);
839 *symptr = savsym;
840 break;
841 }
80a13697
NC
842 case 'X' | TYPE_IS_SHRIEKING:
843 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
844 */
845 if (!len) /* Avoid division by 0 */
846 len = 1;
847 len = total % len; /* Assumed: the start is aligned. */
848 /* FALL THROUGH */
849 case 'X':
850 size = -1;
851 if (total < len)
f7fe979e 852 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
80a13697
NC
853 break;
854 case 'x' | TYPE_IS_SHRIEKING:
855 if (!len) /* Avoid division by 0 */
856 len = 1;
857 star = total % len; /* Assumed: the start is aligned. */
858 if (star) /* Other portable ways? */
859 len = len - star;
860 else
861 len = 0;
862 /* FALL THROUGH */
863 case 'x':
864 case 'A':
865 case 'Z':
866 case 'a':
80a13697
NC
867 size = 1;
868 break;
869 case 'B':
870 case 'b':
871 len = (len + 7)/8;
872 size = 1;
873 break;
874 case 'H':
875 case 'h':
876 len = (len + 1)/2;
877 size = 1;
878 break;
78d46eaa 879
80a13697
NC
880 case 'P':
881 len = 1;
882 size = sizeof(char*);
78d46eaa
NC
883 break;
884 }
206947d2
IZ
885 }
886 total += len * size;
887 }
888 return total;
889}
890
49704364
WL
891
892/* locate matching closing parenthesis or bracket
893 * returns char pointer to char after match, or NULL
894 */
f7fe979e
AL
895STATIC const char *
896S_group_end(pTHX_ register const char *patptr, register const char *patend, char ender)
18529408 897{
49704364 898 while (patptr < patend) {
f7fe979e 899 const char c = *patptr++;
49704364
WL
900
901 if (isSPACE(c))
902 continue;
903 else if (c == ender)
904 return patptr-1;
905 else if (c == '#') {
906 while (patptr < patend && *patptr != '\n')
907 patptr++;
908 continue;
909 } else if (c == '(')
910 patptr = group_end(patptr, patend, ')') + 1;
911 else if (c == '[')
912 patptr = group_end(patptr, patend, ']') + 1;
18529408 913 }
49704364
WL
914 Perl_croak(aTHX_ "No group ending character '%c' found in template",
915 ender);
916 return 0;
18529408
IZ
917}
918
49704364
WL
919
920/* Convert unsigned decimal number to binary.
921 * Expects a pointer to the first digit and address of length variable
922 * Advances char pointer to 1st non-digit char and returns number
fc241834 923 */
f7fe979e
AL
924STATIC const char *
925S_get_num(pTHX_ register const char *patptr, I32 *lenptr )
49704364
WL
926{
927 I32 len = *patptr++ - '0';
928 while (isDIGIT(*patptr)) {
929 if (len >= 0x7FFFFFFF/10)
930 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
931 len = (len * 10) + (*patptr++ - '0');
932 }
933 *lenptr = len;
934 return patptr;
935}
936
937/* The marvellous template parsing routine: Using state stored in *symptr,
938 * locates next template code and count
939 */
940STATIC bool
f337b084 941S_next_symbol(pTHX_ tempsym_t* symptr )
18529408 942{
f7fe979e 943 const char* patptr = symptr->patptr;
0bcc34c2 944 const char* const patend = symptr->patend;
49704364
WL
945
946 symptr->flags &= ~FLAG_SLASH;
947
948 while (patptr < patend) {
949 if (isSPACE(*patptr))
950 patptr++;
951 else if (*patptr == '#') {
952 patptr++;
953 while (patptr < patend && *patptr != '\n')
954 patptr++;
955 if (patptr < patend)
956 patptr++;
957 } else {
fc241834 958 /* We should have found a template code */
49704364 959 I32 code = *patptr++ & 0xFF;
66c611c5 960 U32 inherited_modifiers = 0;
49704364
WL
961
962 if (code == ','){ /* grandfather in commas but with a warning */
963 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
964 symptr->flags |= FLAG_COMMA;
965 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
f7fe979e 966 "Invalid type ',' in %s", _action( symptr ) );
49704364
WL
967 }
968 continue;
969 }
fc241834 970
49704364 971 /* for '(', skip to ')' */
fc241834 972 if (code == '(') {
49704364
WL
973 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
974 Perl_croak(aTHX_ "()-group starts with a count in %s",
f7fe979e 975 _action( symptr ) );
49704364
WL
976 symptr->grpbeg = patptr;
977 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
978 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
979 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
f7fe979e 980 _action( symptr ) );
49704364
WL
981 }
982
66c611c5
MHM
983 /* look for group modifiers to inherit */
984 if (TYPE_ENDIANNESS(symptr->flags)) {
985 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
986 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
987 }
988
1109a392
MHM
989 /* look for modifiers */
990 while (patptr < patend) {
b7787f18
AL
991 const char *allowed;
992 I32 modifier;
1109a392
MHM
993 switch (*patptr) {
994 case '!':
995 modifier = TYPE_IS_SHRIEKING;
7212898e 996 allowed = SHRIEKING_ALLOWED_TYPES;
1109a392 997 break;
7212898e 998#ifdef PERL_PACK_CAN_BYTEORDER
1109a392
MHM
999 case '>':
1000 modifier = TYPE_IS_BIG_ENDIAN;
66c611c5 1001 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392
MHM
1002 break;
1003 case '<':
1004 modifier = TYPE_IS_LITTLE_ENDIAN;
66c611c5 1005 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392 1006 break;
f337b084 1007#endif /* PERL_PACK_CAN_BYTEORDER */
1109a392 1008 default:
b7787f18
AL
1009 allowed = "";
1010 modifier = 0;
1109a392
MHM
1011 break;
1012 }
66c611c5 1013
1109a392
MHM
1014 if (modifier == 0)
1015 break;
66c611c5 1016
1109a392
MHM
1017 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
1018 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
f7fe979e 1019 allowed, _action( symptr ) );
66c611c5
MHM
1020
1021 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
1109a392 1022 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
f7fe979e 1023 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
66c611c5
MHM
1024 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
1025 TYPE_ENDIANNESS_MASK)
1026 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
f7fe979e 1027 *patptr, _action( symptr ) );
66c611c5 1028
041457d9 1029 if ((code & modifier) && ckWARN(WARN_UNPACK)) {
1109a392
MHM
1030 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
1031 "Duplicate modifier '%c' after '%c' in %s",
1032 *patptr, (int) TYPE_NO_MODIFIERS(code),
f7fe979e 1033 _action( symptr ) );
1109a392 1034 }
66c611c5 1035
1109a392
MHM
1036 code |= modifier;
1037 patptr++;
49704364
WL
1038 }
1039
66c611c5
MHM
1040 /* inherit modifiers */
1041 code |= inherited_modifiers;
1042
fc241834 1043 /* look for count and/or / */
49704364
WL
1044 if (patptr < patend) {
1045 if (isDIGIT(*patptr)) {
1046 patptr = get_num( patptr, &symptr->length );
1047 symptr->howlen = e_number;
1048
1049 } else if (*patptr == '*') {
1050 patptr++;
1051 symptr->howlen = e_star;
1052
1053 } else if (*patptr == '[') {
f7fe979e 1054 const char* lenptr = ++patptr;
49704364
WL
1055 symptr->howlen = e_number;
1056 patptr = group_end( patptr, patend, ']' ) + 1;
1057 /* what kind of [] is it? */
1058 if (isDIGIT(*lenptr)) {
1059 lenptr = get_num( lenptr, &symptr->length );
1060 if( *lenptr != ']' )
1061 Perl_croak(aTHX_ "Malformed integer in [] in %s",
f7fe979e 1062 _action( symptr ) );
49704364
WL
1063 } else {
1064 tempsym_t savsym = *symptr;
1065 symptr->patend = patptr-1;
1066 symptr->patptr = lenptr;
1067 savsym.length = measure_struct(symptr);
1068 *symptr = savsym;
1069 }
1070 } else {
1071 symptr->howlen = e_no_len;
1072 symptr->length = 1;
1073 }
1074
1075 /* try to find / */
1076 while (patptr < patend) {
1077 if (isSPACE(*patptr))
1078 patptr++;
1079 else if (*patptr == '#') {
1080 patptr++;
1081 while (patptr < patend && *patptr != '\n')
1082 patptr++;
1083 if (patptr < patend)
1084 patptr++;
1085 } else {
66c611c5 1086 if (*patptr == '/') {
49704364
WL
1087 symptr->flags |= FLAG_SLASH;
1088 patptr++;
66c611c5
MHM
1089 if (patptr < patend &&
1090 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
49704364 1091 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
f7fe979e 1092 _action( symptr ) );
49704364
WL
1093 }
1094 break;
1095 }
18529408 1096 }
49704364
WL
1097 } else {
1098 /* at end - no count, no / */
1099 symptr->howlen = e_no_len;
1100 symptr->length = 1;
1101 }
1102
1103 symptr->code = code;
fc241834 1104 symptr->patptr = patptr;
49704364 1105 return TRUE;
18529408 1106 }
49704364 1107 }
fc241834 1108 symptr->patptr = patptr;
49704364 1109 return FALSE;
18529408
IZ
1110}
1111
18529408 1112/*
fc241834 1113 There is no way to cleanly handle the case where we should process the
08ca2aa3 1114 string per byte in its upgraded form while it's really in downgraded form
fc241834
RGS
1115 (e.g. estimates like strend-s as an upper bound for the number of
1116 characters left wouldn't work). So if we foresee the need of this
1117 (pattern starts with U or contains U0), we want to work on the encoded
1118 version of the string. Users are advised to upgrade their pack string
08ca2aa3
TH
1119 themselves if they need to do a lot of unpacks like this on it
1120*/
fc241834 1121STATIC bool
08ca2aa3
TH
1122need_utf8(const char *pat, const char *patend)
1123{
1124 bool first = TRUE;
1125 while (pat < patend) {
1126 if (pat[0] == '#') {
1127 pat++;
f7fe979e 1128 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
1129 if (!pat) return FALSE;
1130 } else if (pat[0] == 'U') {
1131 if (first || pat[1] == '0') return TRUE;
1132 } else first = FALSE;
1133 pat++;
1134 }
1135 return FALSE;
1136}
1137
1138STATIC char
1139first_symbol(const char *pat, const char *patend) {
1140 while (pat < patend) {
1141 if (pat[0] != '#') return pat[0];
1142 pat++;
f7fe979e 1143 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
1144 if (!pat) return 0;
1145 pat++;
1146 }
1147 return 0;
1148}
1149
1150/*
18529408
IZ
1151=for apidoc unpack_str
1152
7accc089
JH
1153The engine implementing unpack() Perl function. Note: parameters strbeg, new_s
1154and ocnt are not used. This call should not be used, use unpackstring instead.
18529408
IZ
1155
1156=cut */
1157
1158I32
f7fe979e 1159Perl_unpack_str(pTHX_ const char *pat, const char *patend, const char *s, const char *strbeg, const char *strend, char **new_s, I32 ocnt, U32 flags)
a6ec74c1 1160{
f7fe979e 1161 tempsym_t sym;
8772537c
AL
1162 PERL_UNUSED_ARG(strbeg);
1163 PERL_UNUSED_ARG(new_s);
1164 PERL_UNUSED_ARG(ocnt);
08ca2aa3 1165
f337b084 1166 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
08ca2aa3
TH
1167 else if (need_utf8(pat, patend)) {
1168 /* We probably should try to avoid this in case a scalar context call
1169 wouldn't get to the "U0" */
1170 STRLEN len = strend - s;
230e1fce 1171 s = (char *) bytes_to_utf8((U8 *) s, &len);
08ca2aa3
TH
1172 SAVEFREEPV(s);
1173 strend = s + len;
f337b084 1174 flags |= FLAG_DO_UTF8;
08ca2aa3
TH
1175 }
1176
f337b084
TH
1177 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
1178 flags |= FLAG_PARSE_UTF8;
08ca2aa3 1179
f7fe979e 1180 TEMPSYM_INIT(&sym, pat, patend, flags);
49704364
WL
1181
1182 return unpack_rec(&sym, s, s, strend, NULL );
1183}
1184
7accc089
JH
1185/*
1186=for apidoc unpackstring
1187
608d3aed
WL
1188The engine implementing unpack() Perl function. C<unpackstring> puts the
1189extracted list items on the stack and returns the number of elements.
1190Issue C<PUTBACK> before and C<SPAGAIN> after the call to this function.
7accc089
JH
1191
1192=cut */
1193
1194I32
f7fe979e 1195Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
7accc089 1196{
f7fe979e 1197 tempsym_t sym;
08ca2aa3 1198
f337b084 1199 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
08ca2aa3
TH
1200 else if (need_utf8(pat, patend)) {
1201 /* We probably should try to avoid this in case a scalar context call
1202 wouldn't get to the "U0" */
1203 STRLEN len = strend - s;
230e1fce 1204 s = (char *) bytes_to_utf8((U8 *) s, &len);
08ca2aa3
TH
1205 SAVEFREEPV(s);
1206 strend = s + len;
f337b084 1207 flags |= FLAG_DO_UTF8;
08ca2aa3
TH
1208 }
1209
f337b084
TH
1210 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
1211 flags |= FLAG_PARSE_UTF8;
08ca2aa3 1212
f7fe979e 1213 TEMPSYM_INIT(&sym, pat, patend, flags);
7accc089
JH
1214
1215 return unpack_rec(&sym, s, s, strend, NULL );
1216}
1217
49704364
WL
1218STATIC
1219I32
f7fe979e 1220S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
49704364 1221{
27da23d5 1222 dVAR; dSP;
18529408 1223 SV *sv;
f7fe979e 1224 const I32 start_sp_offset = SP - PL_stack_base;
49704364 1225 howlen_t howlen;
a6ec74c1 1226
a6ec74c1 1227 I32 checksum = 0;
92d41999 1228 UV cuv = 0;
a6ec74c1 1229 NV cdouble = 0.0;
f337b084 1230 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
49704364 1231 bool beyond = FALSE;
21c16052 1232 bool explicit_length;
9e27e96a 1233 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
f337b084 1234 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
28be1210 1235 symptr->strbeg = s - strbeg;
49704364 1236
49704364 1237 while (next_symbol(symptr)) {
a7a3cfaa 1238 packprops_t props;
9e27e96a 1239 I32 len;
f337b084 1240 I32 datumtype = symptr->code;
206947d2 1241 /* do first one only unless in list context
08ca2aa3 1242 / is implemented by unpacking the count, then popping it from the
206947d2 1243 stack, so must check that we're not in the middle of a / */
49704364 1244 if ( unpack_only_one
206947d2 1245 && (SP - PL_stack_base == start_sp_offset + 1)
49704364 1246 && (datumtype != '/') ) /* XXX can this be omitted */
206947d2 1247 break;
49704364 1248
f337b084 1249 switch (howlen = symptr->howlen) {
fc241834
RGS
1250 case e_star:
1251 len = strend - strbeg; /* long enough */
49704364 1252 break;
f337b084
TH
1253 default:
1254 /* e_no_len and e_number */
1255 len = symptr->length;
1256 break;
49704364 1257 }
18529408 1258
21c16052 1259 explicit_length = TRUE;
a6ec74c1 1260 redo_switch:
49704364 1261 beyond = s >= strend;
a7a3cfaa
TH
1262
1263 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
1264 if (props) {
1265 /* props nonzero means we can process this letter. */
9e27e96a
AL
1266 const long size = props & PACK_SIZE_MASK;
1267 const long howmany = (strend - s) / size;
a7a3cfaa
TH
1268 if (len > howmany)
1269 len = howmany;
1270
1271 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
1272 if (len && unpack_only_one) len = 1;
1273 EXTEND(SP, len);
1274 EXTEND_MORTAL(len);
78d46eaa
NC
1275 }
1276 }
a7a3cfaa 1277
1109a392 1278 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 1279 default:
1109a392 1280 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
49704364 1281
a6ec74c1 1282 case '%':
49704364 1283 if (howlen == e_no_len)
18529408 1284 len = 16; /* len is not specified */
a6ec74c1 1285 checksum = len;
92d41999 1286 cuv = 0;
a6ec74c1 1287 cdouble = 0;
18529408 1288 continue;
a6ec74c1 1289 break;
18529408
IZ
1290 case '(':
1291 {
49704364 1292 tempsym_t savsym = *symptr;
9e27e96a 1293 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
66c611c5 1294 symptr->flags |= group_modifiers;
49704364 1295 symptr->patend = savsym.grpend;
28be1210 1296 symptr->previous = &savsym;
49704364 1297 symptr->level++;
18529408
IZ
1298 PUTBACK;
1299 while (len--) {
49704364 1300 symptr->patptr = savsym.grpbeg;
f337b084
TH
1301 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
1302 else symptr->flags &= ~FLAG_PARSE_UTF8;
08ca2aa3
TH
1303 unpack_rec(symptr, s, strbeg, strend, &s);
1304 if (s == strend && savsym.howlen == e_star)
49704364 1305 break; /* No way to continue */
18529408
IZ
1306 }
1307 SPAGAIN;
28be1210 1308 savsym.flags = symptr->flags & ~group_modifiers;
49704364 1309 *symptr = savsym;
18529408
IZ
1310 break;
1311 }
28be1210
TH
1312#ifdef PERL_PACK_CAN_SHRIEKSIGN
1313 case '.' | TYPE_IS_SHRIEKING:
1314#endif
1315 case '.': {
9e27e96a 1316 const char *from;
28be1210
TH
1317 SV *sv;
1318#ifdef PERL_PACK_CAN_SHRIEKSIGN
9e27e96a 1319 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
28be1210 1320#else /* PERL_PACK_CAN_SHRIEKSIGN */
9e27e96a 1321 const bool u8 = utf8;
28be1210
TH
1322#endif
1323 if (howlen == e_star) from = strbeg;
1324 else if (len <= 0) from = s;
1325 else {
1326 tempsym_t *group = symptr;
1327
1328 while (--len && group) group = group->previous;
1329 from = group ? strbeg + group->strbeg : strbeg;
1330 }
1331 sv = from <= s ?
00646304
CB
1332 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
1333 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
28be1210
TH
1334 XPUSHs(sv_2mortal(sv));
1335 break;
1336 }
1337#ifdef PERL_PACK_CAN_SHRIEKSIGN
1338 case '@' | TYPE_IS_SHRIEKING:
1339#endif
a6ec74c1 1340 case '@':
28be1210
TH
1341 s = strbeg + symptr->strbeg;
1342#ifdef PERL_PACK_CAN_SHRIEKSIGN
1343 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
1344#else /* PERL_PACK_CAN_SHRIEKSIGN */
1345 if (utf8)
1346#endif
1347 {
08ca2aa3
TH
1348 while (len > 0) {
1349 if (s >= strend)
1350 Perl_croak(aTHX_ "'@' outside of string in unpack");
1351 s += UTF8SKIP(s);
1352 len--;
1353 }
1354 if (s > strend)
1355 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
1356 } else {
28be1210 1357 if (strend-s < len)
fc241834 1358 Perl_croak(aTHX_ "'@' outside of string in unpack");
28be1210 1359 s += len;
08ca2aa3 1360 }
a6ec74c1 1361 break;
62f95557
IZ
1362 case 'X' | TYPE_IS_SHRIEKING:
1363 if (!len) /* Avoid division by 0 */
1364 len = 1;
08ca2aa3 1365 if (utf8) {
f7fe979e 1366 const char *hop, *last;
f337b084
TH
1367 I32 l = len;
1368 hop = last = strbeg;
1369 while (hop < s) {
1370 hop += UTF8SKIP(hop);
1371 if (--l == 0) {
08ca2aa3 1372 last = hop;
f337b084
TH
1373 l = len;
1374 }
fc241834 1375 }
f337b084
TH
1376 if (last > s)
1377 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
08ca2aa3
TH
1378 s = last;
1379 break;
f337b084
TH
1380 }
1381 len = (s - strbeg) % len;
62f95557 1382 /* FALL THROUGH */
a6ec74c1 1383 case 'X':
08ca2aa3
TH
1384 if (utf8) {
1385 while (len > 0) {
1386 if (s <= strbeg)
1387 Perl_croak(aTHX_ "'X' outside of string in unpack");
f337b084 1388 while (--s, UTF8_IS_CONTINUATION(*s)) {
08ca2aa3
TH
1389 if (s <= strbeg)
1390 Perl_croak(aTHX_ "'X' outside of string in unpack");
1391 }
1392 len--;
1393 }
1394 } else {
fc241834
RGS
1395 if (len > s - strbeg)
1396 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1397 s -= len;
08ca2aa3 1398 }
a6ec74c1 1399 break;
9e27e96a
AL
1400 case 'x' | TYPE_IS_SHRIEKING: {
1401 I32 ai32;
62f95557
IZ
1402 if (!len) /* Avoid division by 0 */
1403 len = 1;
230e1fce
NC
1404 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1405 else ai32 = (s - strbeg) % len;
08ca2aa3
TH
1406 if (ai32 == 0) break;
1407 len -= ai32;
9e27e96a 1408 }
62f95557 1409 /* FALL THROUGH */
a6ec74c1 1410 case 'x':
08ca2aa3
TH
1411 if (utf8) {
1412 while (len>0) {
1413 if (s >= strend)
1414 Perl_croak(aTHX_ "'x' outside of string in unpack");
1415 s += UTF8SKIP(s);
1416 len--;
1417 }
1418 } else {
fc241834
RGS
1419 if (len > strend - s)
1420 Perl_croak(aTHX_ "'x' outside of string in unpack");
1421 s += len;
f337b084 1422 }
a6ec74c1
JH
1423 break;
1424 case '/':
49704364
WL
1425 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1426 break;
a6ec74c1
JH
1427 case 'A':
1428 case 'Z':
1429 case 'a':
08ca2aa3
TH
1430 if (checksum) {
1431 /* Preliminary length estimate is assumed done in 'W' */
1432 if (len > strend - s) len = strend - s;
1433 goto W_checksum;
1434 }
1435 if (utf8) {
1436 I32 l;
f7fe979e 1437 const char *hop;
08ca2aa3
TH
1438 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1439 if (hop >= strend) {
1440 if (hop > strend)
1441 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1442 break;
fc241834 1443 }
a6ec74c1 1444 }
08ca2aa3
TH
1445 if (hop > strend)
1446 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1447 len = hop - s;
1448 } else if (len > strend - s)
1449 len = strend - s;
1450
1451 if (datumtype == 'Z') {
1452 /* 'Z' strips stuff after first null */
f7fe979e 1453 const char *ptr, *end;
f337b084
TH
1454 end = s + len;
1455 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
08ca2aa3
TH
1456 sv = newSVpvn(s, ptr-s);
1457 if (howlen == e_star) /* exact for 'Z*' */
1458 len = ptr-s + (ptr != strend ? 1 : 0);
1459 } else if (datumtype == 'A') {
1460 /* 'A' strips both nulls and spaces */
f7fe979e 1461 const char *ptr;
18bdf90a
TH
1462 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1463 for (ptr = s+len-1; ptr >= s; ptr--)
1464 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
230e1fce 1465 !is_utf8_space((U8 *) ptr)) break;
18bdf90a
TH
1466 if (ptr >= s) ptr += UTF8SKIP(ptr);
1467 else ptr++;
28be1210 1468 if (ptr > s+len)
18bdf90a
TH
1469 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1470 } else {
1471 for (ptr = s+len-1; ptr >= s; ptr--)
1472 if (*ptr != 0 && !isSPACE(*ptr)) break;
1473 ptr++;
1474 }
08ca2aa3
TH
1475 sv = newSVpvn(s, ptr-s);
1476 } else sv = newSVpvn(s, len);
1477
1478 if (utf8) {
1479 SvUTF8_on(sv);
1480 /* Undo any upgrade done due to need_utf8() */
f337b084 1481 if (!(symptr->flags & FLAG_WAS_UTF8))
08ca2aa3 1482 sv_utf8_downgrade(sv, 0);
a6ec74c1
JH
1483 }
1484 XPUSHs(sv_2mortal(sv));
08ca2aa3 1485 s += len;
a6ec74c1
JH
1486 break;
1487 case 'B':
08ca2aa3
TH
1488 case 'b': {
1489 char *str;
49704364 1490 if (howlen == e_star || len > (strend - s) * 8)
a6ec74c1
JH
1491 len = (strend - s) * 8;
1492 if (checksum) {
1493 if (!PL_bitcount) {
08ca2aa3 1494 int bits;
a02a5408 1495 Newxz(PL_bitcount, 256, char);
a6ec74c1
JH
1496 for (bits = 1; bits < 256; bits++) {
1497 if (bits & 1) PL_bitcount[bits]++;
1498 if (bits & 2) PL_bitcount[bits]++;
1499 if (bits & 4) PL_bitcount[bits]++;
1500 if (bits & 8) PL_bitcount[bits]++;
1501 if (bits & 16) PL_bitcount[bits]++;
1502 if (bits & 32) PL_bitcount[bits]++;
1503 if (bits & 64) PL_bitcount[bits]++;
1504 if (bits & 128) PL_bitcount[bits]++;
1505 }
1506 }
f337b084 1507 if (utf8)
08ca2aa3 1508 while (len >= 8 && s < strend) {
f337b084 1509 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
08ca2aa3
TH
1510 len -= 8;
1511 }
f337b084 1512 else
fc241834 1513 while (len >= 8) {
08ca2aa3 1514 cuv += PL_bitcount[*(U8 *)s++];
fc241834
RGS
1515 len -= 8;
1516 }
08ca2aa3
TH
1517 if (len && s < strend) {
1518 U8 bits;
f337b084
TH
1519 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1520 if (datumtype == 'b')
a6ec74c1 1521 while (len-- > 0) {
92d41999 1522 if (bits & 1) cuv++;
a6ec74c1
JH
1523 bits >>= 1;
1524 }
f337b084 1525 else
a6ec74c1 1526 while (len-- > 0) {
08ca2aa3 1527 if (bits & 0x80) cuv++;
a6ec74c1
JH
1528 bits <<= 1;
1529 }
fc241834 1530 }
a6ec74c1
JH
1531 break;
1532 }
08ca2aa3
TH
1533
1534 sv = sv_2mortal(NEWSV(35, len ? len : 1));
a6ec74c1
JH
1535 SvPOK_on(sv);
1536 str = SvPVX(sv);
1537 if (datumtype == 'b') {
f337b084 1538 U8 bits = 0;
f7fe979e 1539 const I32 ai32 = len;
08ca2aa3
TH
1540 for (len = 0; len < ai32; len++) {
1541 if (len & 7) bits >>= 1;
1542 else if (utf8) {
1543 if (s >= strend) break;
f337b084 1544 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1545 } else bits = *(U8 *) s++;
1546 *str++ = bits & 1 ? '1' : '0';
a6ec74c1 1547 }
08ca2aa3 1548 } else {
f337b084 1549 U8 bits = 0;
f7fe979e 1550 const I32 ai32 = len;
08ca2aa3
TH
1551 for (len = 0; len < ai32; len++) {
1552 if (len & 7) bits <<= 1;
1553 else if (utf8) {
1554 if (s >= strend) break;
f337b084 1555 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1556 } else bits = *(U8 *) s++;
1557 *str++ = bits & 0x80 ? '1' : '0';
a6ec74c1
JH
1558 }
1559 }
1560 *str = '\0';
aa07b2f6 1561 SvCUR_set(sv, str - SvPVX_const(sv));
08ca2aa3 1562 XPUSHs(sv);
a6ec74c1 1563 break;
08ca2aa3 1564 }
a6ec74c1 1565 case 'H':
08ca2aa3
TH
1566 case 'h': {
1567 char *str;
fc241834 1568 /* Preliminary length estimate, acceptable for utf8 too */
49704364 1569 if (howlen == e_star || len > (strend - s) * 2)
a6ec74c1 1570 len = (strend - s) * 2;
fc241834 1571 sv = sv_2mortal(NEWSV(35, len ? len : 1));
a6ec74c1
JH
1572 SvPOK_on(sv);
1573 str = SvPVX(sv);
1574 if (datumtype == 'h') {
f337b084 1575 U8 bits = 0;
9e27e96a 1576 I32 ai32 = len;
fc241834
RGS
1577 for (len = 0; len < ai32; len++) {
1578 if (len & 1) bits >>= 4;
1579 else if (utf8) {
1580 if (s >= strend) break;
f337b084 1581 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
fc241834 1582 } else bits = * (U8 *) s++;
a6ec74c1
JH
1583 *str++ = PL_hexdigit[bits & 15];
1584 }
08ca2aa3 1585 } else {
f337b084 1586 U8 bits = 0;
f7fe979e 1587 const I32 ai32 = len;
08ca2aa3
TH
1588 for (len = 0; len < ai32; len++) {
1589 if (len & 1) bits <<= 4;
1590 else if (utf8) {
1591 if (s >= strend) break;
f337b084 1592 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3 1593 } else bits = *(U8 *) s++;
a6ec74c1
JH
1594 *str++ = PL_hexdigit[(bits >> 4) & 15];
1595 }
1596 }
1597 *str = '\0';
aa07b2f6 1598 SvCUR_set(sv, str - SvPVX_const(sv));
08ca2aa3 1599 XPUSHs(sv);
a6ec74c1 1600 break;
08ca2aa3 1601 }
a6ec74c1 1602 case 'c':
73cb7263 1603 while (len-- > 0) {
f337b084 1604 int aint = SHIFT_BYTE(utf8, s, strend, datumtype);
73cb7263
NC
1605 if (aint >= 128) /* fake up signed chars */
1606 aint -= 256;
08ca2aa3 1607 if (!checksum)
ac7f3b1b 1608 PUSHs(sv_2mortal(newSViv((IV)aint)));
73cb7263
NC
1609 else if (checksum > bits_in_uv)
1610 cdouble += (NV)aint;
1611 else
1612 cuv += aint;
a6ec74c1
JH
1613 }
1614 break;
1615 case 'C':
08ca2aa3
TH
1616 case 'W':
1617 W_checksum:
35bcd338 1618 if (len == 0) {
fc241834 1619 if (explicit_length && datumtype == 'C')
08ca2aa3 1620 /* Switch to "character" mode */
f337b084 1621 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
35bcd338
JH
1622 break;
1623 }
fc241834 1624 if (datumtype == 'C' ?
f337b084
TH
1625 (symptr->flags & FLAG_DO_UTF8) &&
1626 !(symptr->flags & FLAG_WAS_UTF8) : utf8) {
08ca2aa3 1627 while (len-- > 0 && s < strend) {
08ca2aa3 1628 STRLEN retlen;
f7fe979e 1629 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
f337b084 1630 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
08ca2aa3
TH
1631 if (retlen == (STRLEN) -1 || retlen == 0)
1632 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1633 s += retlen;
1634 if (!checksum)
1635 PUSHs(sv_2mortal(newSVuv((UV) val)));
1636 else if (checksum > bits_in_uv)
1637 cdouble += (NV) val;
d6d3e8bd 1638 else
08ca2aa3 1639 cuv += val;
fc241834 1640 }
08ca2aa3 1641 } else if (!checksum)
a6ec74c1 1642 while (len-- > 0) {
f7fe979e 1643 const U8 ch = *(U8 *) s++;
08ca2aa3 1644 PUSHs(sv_2mortal(newSVuv((UV) ch)));
a6ec74c1 1645 }
08ca2aa3
TH
1646 else if (checksum > bits_in_uv)
1647 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1648 else
1649 while (len-- > 0) cuv += *(U8 *) s++;
a6ec74c1
JH
1650 break;
1651 case 'U':
35bcd338 1652 if (len == 0) {
08ca2aa3
TH
1653 if (explicit_length) {
1654 /* Switch to "bytes in UTF-8" mode */
f337b084 1655 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
08ca2aa3
TH
1656 else
1657 /* Should be impossible due to the need_utf8() test */
1658 Perl_croak(aTHX_ "U0 mode on a byte string");
1659 }
35bcd338
JH
1660 break;
1661 }
08ca2aa3 1662 if (len > strend - s) len = strend - s;
fc241834 1663 if (!checksum) {
08ca2aa3
TH
1664 if (len && unpack_only_one) len = 1;
1665 EXTEND(SP, len);
1666 EXTEND_MORTAL(len);
fc241834 1667 }
08ca2aa3
TH
1668 while (len-- > 0 && s < strend) {
1669 STRLEN retlen;
1670 UV auv;
1671 if (utf8) {
1672 U8 result[UTF8_MAXLEN];
f7fe979e 1673 const char *ptr = s;
08ca2aa3 1674 STRLEN len;
08ca2aa3
TH
1675 /* Bug: warns about bad utf8 even if we are short on bytes
1676 and will break out of the loop */
230e1fce
NC
1677 if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1678 'U'))
08ca2aa3
TH
1679 break;
1680 len = UTF8SKIP(result);
fc241834 1681 if (!uni_to_bytes(aTHX_ &ptr, strend,
230e1fce 1682 (char *) &result[1], len-1, 'U')) break;
08ca2aa3
TH
1683 auv = utf8n_to_uvuni(result, len, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1684 s = ptr;
1685 } else {
1686 auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1687 if (retlen == (STRLEN) -1 || retlen == 0)
1688 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1689 s += retlen;
1690 }
1691 if (!checksum)
1692 PUSHs(sv_2mortal(newSVuv((UV) auv)));
73cb7263 1693 else if (checksum > bits_in_uv)
08ca2aa3 1694 cdouble += (NV) auv;
73cb7263 1695 else
08ca2aa3 1696 cuv += auv;
a6ec74c1
JH
1697 }
1698 break;
49704364
WL
1699 case 's' | TYPE_IS_SHRIEKING:
1700#if SHORTSIZE != SIZE16
73cb7263 1701 while (len-- > 0) {
08ca2aa3 1702 short ashort;
f337b084
TH
1703 SHIFT_VAR(utf8, s, strend, ashort, datumtype);
1704 DO_BO_UNPACK(ashort, s);
08ca2aa3 1705 if (!checksum)
ac7f3b1b 1706 PUSHs(sv_2mortal(newSViv((IV)ashort)));
73cb7263
NC
1707 else if (checksum > bits_in_uv)
1708 cdouble += (NV)ashort;
1709 else
1710 cuv += ashort;
49704364
WL
1711 }
1712 break;
1713#else
1714 /* Fallthrough! */
a6ec74c1 1715#endif
49704364 1716 case 's':
73cb7263 1717 while (len-- > 0) {
08ca2aa3
TH
1718 I16 ai16;
1719
1720#if U16SIZE > SIZE16
1721 ai16 = 0;
1722#endif
f337b084 1723 SHIFT16(utf8, s, strend, &ai16, datumtype);
73cb7263 1724 DO_BO_UNPACK(ai16, 16);
1109a392 1725#if U16SIZE > SIZE16
73cb7263
NC
1726 if (ai16 > 32767)
1727 ai16 -= 65536;
a6ec74c1 1728#endif
08ca2aa3 1729 if (!checksum)
ac7f3b1b 1730 PUSHs(sv_2mortal(newSViv((IV)ai16)));
73cb7263
NC
1731 else if (checksum > bits_in_uv)
1732 cdouble += (NV)ai16;
1733 else
1734 cuv += ai16;
a6ec74c1
JH
1735 }
1736 break;
49704364
WL
1737 case 'S' | TYPE_IS_SHRIEKING:
1738#if SHORTSIZE != SIZE16
73cb7263 1739 while (len-- > 0) {
08ca2aa3 1740 unsigned short aushort;
f337b084
TH
1741 SHIFT_VAR(utf8, s, strend, aushort, datumtype);
1742 DO_BO_UNPACK(aushort, s);
08ca2aa3
TH
1743 if (!checksum)
1744 PUSHs(sv_2mortal(newSVuv((UV) aushort)));
73cb7263
NC
1745 else if (checksum > bits_in_uv)
1746 cdouble += (NV)aushort;
1747 else
1748 cuv += aushort;
49704364
WL
1749 }
1750 break;
1751#else
1752 /* Fallhrough! */
1753#endif
a6ec74c1
JH
1754 case 'v':
1755 case 'n':
1756 case 'S':
73cb7263 1757 while (len-- > 0) {
08ca2aa3
TH
1758 U16 au16;
1759#if U16SIZE > SIZE16
1760 au16 = 0;
1761#endif
f337b084 1762 SHIFT16(utf8, s, strend, &au16, datumtype);
08ca2aa3 1763 DO_BO_UNPACK(au16, 16);
a6ec74c1 1764#ifdef HAS_NTOHS
73cb7263
NC
1765 if (datumtype == 'n')
1766 au16 = PerlSock_ntohs(au16);
a6ec74c1
JH
1767#endif
1768#ifdef HAS_VTOHS
73cb7263
NC
1769 if (datumtype == 'v')
1770 au16 = vtohs(au16);
a6ec74c1 1771#endif
08ca2aa3
TH
1772 if (!checksum)
1773 PUSHs(sv_2mortal(newSVuv((UV)au16)));
73cb7263 1774 else if (checksum > bits_in_uv)
f337b084 1775 cdouble += (NV) au16;
73cb7263
NC
1776 else
1777 cuv += au16;
a6ec74c1
JH
1778 }
1779 break;
7212898e 1780#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7
MHM
1781 case 'v' | TYPE_IS_SHRIEKING:
1782 case 'n' | TYPE_IS_SHRIEKING:
73cb7263 1783 while (len-- > 0) {
08ca2aa3
TH
1784 I16 ai16;
1785# if U16SIZE > SIZE16
1786 ai16 = 0;
1787# endif
f337b084 1788 SHIFT16(utf8, s, strend, &ai16, datumtype);
08ca2aa3 1789# ifdef HAS_NTOHS
73cb7263 1790 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
08ca2aa3
TH
1791 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1792# endif /* HAS_NTOHS */
1793# ifdef HAS_VTOHS
73cb7263 1794 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
08ca2aa3
TH
1795 ai16 = (I16) vtohs((U16) ai16);
1796# endif /* HAS_VTOHS */
1797 if (!checksum)
ac7f3b1b 1798 PUSHs(sv_2mortal(newSViv((IV)ai16)));
73cb7263 1799 else if (checksum > bits_in_uv)
08ca2aa3 1800 cdouble += (NV) ai16;
73cb7263
NC
1801 else
1802 cuv += ai16;
068bd2e7
MHM
1803 }
1804 break;
08ca2aa3 1805#endif /* PERL_PACK_CAN_SHRIEKSIGN */
a6ec74c1 1806 case 'i':
49704364 1807 case 'i' | TYPE_IS_SHRIEKING:
73cb7263 1808 while (len-- > 0) {
08ca2aa3 1809 int aint;
f337b084
TH
1810 SHIFT_VAR(utf8, s, strend, aint, datumtype);
1811 DO_BO_UNPACK(aint, i);
08ca2aa3 1812 if (!checksum)
ac7f3b1b 1813 PUSHs(sv_2mortal(newSViv((IV)aint)));
73cb7263
NC
1814 else if (checksum > bits_in_uv)
1815 cdouble += (NV)aint;
1816 else
1817 cuv += aint;
a6ec74c1
JH
1818 }
1819 break;
1820 case 'I':
49704364 1821 case 'I' | TYPE_IS_SHRIEKING:
73cb7263 1822 while (len-- > 0) {
08ca2aa3 1823 unsigned int auint;
f337b084
TH
1824 SHIFT_VAR(utf8, s, strend, auint, datumtype);
1825 DO_BO_UNPACK(auint, i);
08ca2aa3 1826 if (!checksum)
ac7f3b1b 1827 PUSHs(sv_2mortal(newSVuv((UV)auint)));
73cb7263
NC
1828 else if (checksum > bits_in_uv)
1829 cdouble += (NV)auint;
1830 else
1831 cuv += auint;
a6ec74c1
JH
1832 }
1833 break;
92d41999 1834 case 'j':
73cb7263 1835 while (len-- > 0) {
08ca2aa3 1836 IV aiv;
f337b084 1837 SHIFT_VAR(utf8, s, strend, aiv, datumtype);
1109a392 1838#if IVSIZE == INTSIZE
f337b084 1839 DO_BO_UNPACK(aiv, i);
1109a392 1840#elif IVSIZE == LONGSIZE
f337b084 1841 DO_BO_UNPACK(aiv, l);
1109a392 1842#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
f337b084 1843 DO_BO_UNPACK(aiv, 64);
08ca2aa3
TH
1844#else
1845 Perl_croak(aTHX_ "'j' not supported on this platform");
1109a392 1846#endif
08ca2aa3 1847 if (!checksum)
ac7f3b1b 1848 PUSHs(sv_2mortal(newSViv(aiv)));
73cb7263
NC
1849 else if (checksum > bits_in_uv)
1850 cdouble += (NV)aiv;
1851 else
1852 cuv += aiv;
92d41999
JH
1853 }
1854 break;
1855 case 'J':
73cb7263 1856 while (len-- > 0) {
08ca2aa3 1857 UV auv;
f337b084 1858 SHIFT_VAR(utf8, s, strend, auv, datumtype);
08ca2aa3 1859#if IVSIZE == INTSIZE
f337b084 1860 DO_BO_UNPACK(auv, i);
08ca2aa3 1861#elif IVSIZE == LONGSIZE
f337b084 1862 DO_BO_UNPACK(auv, l);
08ca2aa3 1863#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
f337b084 1864 DO_BO_UNPACK(auv, 64);
08ca2aa3
TH
1865#else
1866 Perl_croak(aTHX_ "'J' not supported on this platform");
1109a392 1867#endif
08ca2aa3 1868 if (!checksum)
ac7f3b1b 1869 PUSHs(sv_2mortal(newSVuv(auv)));
73cb7263
NC
1870 else if (checksum > bits_in_uv)
1871 cdouble += (NV)auv;
1872 else
1873 cuv += auv;
92d41999
JH
1874 }
1875 break;
49704364
WL
1876 case 'l' | TYPE_IS_SHRIEKING:
1877#if LONGSIZE != SIZE32
73cb7263 1878 while (len-- > 0) {
08ca2aa3 1879 long along;
f337b084
TH
1880 SHIFT_VAR(utf8, s, strend, along, datumtype);
1881 DO_BO_UNPACK(along, l);
08ca2aa3 1882 if (!checksum)
ac7f3b1b 1883 PUSHs(sv_2mortal(newSViv((IV)along)));
73cb7263
NC
1884 else if (checksum > bits_in_uv)
1885 cdouble += (NV)along;
1886 else
1887 cuv += along;
49704364
WL
1888 }
1889 break;
1890#else
1891 /* Fallthrough! */
a6ec74c1 1892#endif
49704364 1893 case 'l':
73cb7263 1894 while (len-- > 0) {
08ca2aa3
TH
1895 I32 ai32;
1896#if U32SIZE > SIZE32
1897 ai32 = 0;
1898#endif
f337b084 1899 SHIFT32(utf8, s, strend, &ai32, datumtype);
73cb7263 1900 DO_BO_UNPACK(ai32, 32);
25a9bd2a 1901#if U32SIZE > SIZE32
08ca2aa3 1902 if (ai32 > 2147483647) ai32 -= 4294967296;
a6ec74c1 1903#endif
08ca2aa3 1904 if (!checksum)
ac7f3b1b 1905 PUSHs(sv_2mortal(newSViv((IV)ai32)));
73cb7263
NC
1906 else if (checksum > bits_in_uv)
1907 cdouble += (NV)ai32;
1908 else
1909 cuv += ai32;
a6ec74c1
JH
1910 }
1911 break;
49704364
WL
1912 case 'L' | TYPE_IS_SHRIEKING:
1913#if LONGSIZE != SIZE32
73cb7263 1914 while (len-- > 0) {
08ca2aa3 1915 unsigned long aulong;
f337b084
TH
1916 SHIFT_VAR(utf8, s, strend, aulong, datumtype);
1917 DO_BO_UNPACK(aulong, l);
08ca2aa3 1918 if (!checksum)
ac7f3b1b 1919 PUSHs(sv_2mortal(newSVuv((UV)aulong)));
73cb7263
NC
1920 else if (checksum > bits_in_uv)
1921 cdouble += (NV)aulong;
1922 else
1923 cuv += aulong;
49704364
WL
1924 }
1925 break;
1926#else
1927 /* Fall through! */
1928#endif
a6ec74c1
JH
1929 case 'V':
1930 case 'N':
1931 case 'L':
73cb7263 1932 while (len-- > 0) {
08ca2aa3
TH
1933 U32 au32;
1934#if U32SIZE > SIZE32
1935 au32 = 0;
1936#endif
f337b084 1937 SHIFT32(utf8, s, strend, &au32, datumtype);
08ca2aa3 1938 DO_BO_UNPACK(au32, 32);
a6ec74c1 1939#ifdef HAS_NTOHL
73cb7263
NC
1940 if (datumtype == 'N')
1941 au32 = PerlSock_ntohl(au32);
a6ec74c1
JH
1942#endif
1943#ifdef HAS_VTOHL
73cb7263
NC
1944 if (datumtype == 'V')
1945 au32 = vtohl(au32);
a6ec74c1 1946#endif
08ca2aa3 1947 if (!checksum)
fc241834
RGS
1948 PUSHs(sv_2mortal(newSVuv((UV)au32)));
1949 else if (checksum > bits_in_uv)
1950 cdouble += (NV)au32;
1951 else
1952 cuv += au32;
a6ec74c1
JH
1953 }
1954 break;
7212898e 1955#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7
MHM
1956 case 'V' | TYPE_IS_SHRIEKING:
1957 case 'N' | TYPE_IS_SHRIEKING:
73cb7263 1958 while (len-- > 0) {
08ca2aa3
TH
1959 I32 ai32;
1960# if U32SIZE > SIZE32
1961 ai32 = 0;
1962# endif
f337b084 1963 SHIFT32(utf8, s, strend, &ai32, datumtype);
08ca2aa3 1964# ifdef HAS_NTOHL
73cb7263
NC
1965 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1966 ai32 = (I32)PerlSock_ntohl((U32)ai32);
08ca2aa3
TH
1967# endif
1968# ifdef HAS_VTOHL
73cb7263
NC
1969 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1970 ai32 = (I32)vtohl((U32)ai32);
08ca2aa3
TH
1971# endif
1972 if (!checksum)
ac7f3b1b 1973 PUSHs(sv_2mortal(newSViv((IV)ai32)));
73cb7263
NC
1974 else if (checksum > bits_in_uv)
1975 cdouble += (NV)ai32;
1976 else
1977 cuv += ai32;
068bd2e7
MHM
1978 }
1979 break;
08ca2aa3 1980#endif /* PERL_PACK_CAN_SHRIEKSIGN */
a6ec74c1 1981 case 'p':
a6ec74c1 1982 while (len-- > 0) {
f7fe979e 1983 const char *aptr;
f337b084 1984 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
07409e01 1985 DO_BO_UNPACK_PC(aptr);
c4c5f44a
NC
1986 /* newSVpv generates undef if aptr is NULL */
1987 PUSHs(sv_2mortal(newSVpv(aptr, 0)));
a6ec74c1
JH
1988 }
1989 break;
1990 case 'w':
a6ec74c1
JH
1991 {
1992 UV auv = 0;
1993 U32 bytes = 0;
fc241834 1994
08ca2aa3
TH
1995 while (len > 0 && s < strend) {
1996 U8 ch;
f337b084 1997 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3 1998 auv = (auv << 7) | (ch & 0x7f);
a6ec74c1 1999 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
08ca2aa3 2000 if (ch < 0x80) {
a6ec74c1 2001 bytes = 0;
ac7f3b1b 2002 PUSHs(sv_2mortal(newSVuv(auv)));
a6ec74c1
JH
2003 len--;
2004 auv = 0;
08ca2aa3 2005 continue;
a6ec74c1 2006 }
08ca2aa3 2007 if (++bytes >= sizeof(UV)) { /* promote to string */
10516c54 2008 const char *t;
a6ec74c1
JH
2009
2010 sv = Perl_newSVpvf(aTHX_ "%.*"UVf, (int)TYPE_DIGITS(UV), auv);
2011 while (s < strend) {
f337b084 2012 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3
TH
2013 sv = mul128(sv, (U8)(ch & 0x7f));
2014 if (!(ch & 0x80)) {
a6ec74c1
JH
2015 bytes = 0;
2016 break;
2017 }
2018 }
10516c54 2019 t = SvPV_nolen_const(sv);
a6ec74c1
JH
2020 while (*t == '0')
2021 t++;
2022 sv_chop(sv, t);
2023 PUSHs(sv_2mortal(sv));
2024 len--;
2025 auv = 0;
2026 }
2027 }
2028 if ((s >= strend) && bytes)
49704364 2029 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
a6ec74c1
JH
2030 }
2031 break;
2032 case 'P':
49704364
WL
2033 if (symptr->howlen == e_star)
2034 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
a6ec74c1 2035 EXTEND(SP, 1);
2d3e0934 2036 if (s + sizeof(char*) <= strend) {
08ca2aa3 2037 char *aptr;
f337b084 2038 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
07409e01 2039 DO_BO_UNPACK_PC(aptr);
fc241834
RGS
2040 /* newSVpvn generates undef if aptr is NULL */
2041 PUSHs(sv_2mortal(newSVpvn(aptr, len)));
08ca2aa3 2042 }
a6ec74c1
JH
2043 break;
2044#ifdef HAS_QUAD
2045 case 'q':
73cb7263 2046 while (len-- > 0) {
08ca2aa3 2047 Quad_t aquad;
f337b084
TH
2048 SHIFT_VAR(utf8, s, strend, aquad, datumtype);
2049 DO_BO_UNPACK(aquad, 64);
08ca2aa3
TH
2050 if (!checksum)
2051 PUSHs(sv_2mortal(aquad >= IV_MIN && aquad <= IV_MAX ?
ac7f3b1b 2052 newSViv((IV)aquad) : newSVnv((NV)aquad)));
73cb7263
NC
2053 else if (checksum > bits_in_uv)
2054 cdouble += (NV)aquad;
2055 else
2056 cuv += aquad;
2057 }
a6ec74c1
JH
2058 break;
2059 case 'Q':
73cb7263 2060 while (len-- > 0) {
08ca2aa3 2061 Uquad_t auquad;
f337b084
TH
2062 SHIFT_VAR(utf8, s, strend, auquad, datumtype);
2063 DO_BO_UNPACK(auquad, 64);
08ca2aa3
TH
2064 if (!checksum)
2065 PUSHs(sv_2mortal(auquad <= UV_MAX ?
2066 newSVuv((UV)auquad):newSVnv((NV)auquad)));
73cb7263
NC
2067 else if (checksum > bits_in_uv)
2068 cdouble += (NV)auquad;
2069 else
2070 cuv += auquad;
a6ec74c1
JH
2071 }
2072 break;
08ca2aa3 2073#endif /* HAS_QUAD */
a6ec74c1
JH
2074 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
2075 case 'f':
73cb7263 2076 while (len-- > 0) {
08ca2aa3 2077 float afloat;
f337b084 2078 SHIFT_VAR(utf8, s, strend, afloat, datumtype);
08ca2aa3
TH
2079 DO_BO_UNPACK_N(afloat, float);
2080 if (!checksum)
2081 PUSHs(sv_2mortal(newSVnv((NV)afloat)));
2082 else
73cb7263 2083 cdouble += afloat;
fc241834 2084 }
a6ec74c1
JH
2085 break;
2086 case 'd':
73cb7263 2087 while (len-- > 0) {
08ca2aa3 2088 double adouble;
f337b084 2089 SHIFT_VAR(utf8, s, strend, adouble, datumtype);
08ca2aa3
TH
2090 DO_BO_UNPACK_N(adouble, double);
2091 if (!checksum)
2092 PUSHs(sv_2mortal(newSVnv((NV)adouble)));
2093 else
73cb7263 2094 cdouble += adouble;
fc241834 2095 }
a6ec74c1 2096 break;
92d41999 2097 case 'F':
73cb7263 2098 while (len-- > 0) {
08ca2aa3 2099 NV anv;
f337b084 2100 SHIFT_VAR(utf8, s, strend, anv, datumtype);
08ca2aa3
TH
2101 DO_BO_UNPACK_N(anv, NV);
2102 if (!checksum)
2103 PUSHs(sv_2mortal(newSVnv(anv)));
2104 else
73cb7263 2105 cdouble += anv;
fc241834 2106 }
92d41999
JH
2107 break;
2108#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2109 case 'D':
73cb7263 2110 while (len-- > 0) {
08ca2aa3 2111 long double aldouble;
f337b084 2112 SHIFT_VAR(utf8, s, strend, aldouble, datumtype);
08ca2aa3
TH
2113 DO_BO_UNPACK_N(aldouble, long double);
2114 if (!checksum)
2115 PUSHs(sv_2mortal(newSVnv((NV)aldouble)));
2116 else
2117 cdouble += aldouble;
92d41999
JH
2118 }
2119 break;
2120#endif
a6ec74c1
JH
2121 case 'u':
2122 /* MKS:
2123 * Initialise the decode mapping. By using a table driven
2124 * algorithm, the code will be character-set independent
2125 * (and just as fast as doing character arithmetic)
2126 */
2127 if (PL_uudmap['M'] == 0) {
2d3e0934 2128 size_t i;
a6ec74c1 2129
2d3e0934 2130 for (i = 0; i < sizeof(PL_uuemap); ++i)
a6ec74c1
JH
2131 PL_uudmap[(U8)PL_uuemap[i]] = i;
2132 /*
2133 * Because ' ' and '`' map to the same value,
2134 * we need to decode them both the same.
2135 */
2136 PL_uudmap[' '] = 0;
2137 }
08ca2aa3 2138 {
f7fe979e 2139 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
08ca2aa3
TH
2140 sv = sv_2mortal(NEWSV(42, l));
2141 if (l) SvPOK_on(sv);
2142 }
2143 if (utf8) {
2144 while (next_uni_uu(aTHX_ &s, strend, &len)) {
2145 I32 a, b, c, d;
2146 char hunk[4];
2147
2148 hunk[3] = '\0';
2149 while (len > 0) {
2150 next_uni_uu(aTHX_ &s, strend, &a);
2151 next_uni_uu(aTHX_ &s, strend, &b);
2152 next_uni_uu(aTHX_ &s, strend, &c);
2153 next_uni_uu(aTHX_ &s, strend, &d);
2154 hunk[0] = (char)((a << 2) | (b >> 4));
2155 hunk[1] = (char)((b << 4) | (c >> 2));
2156 hunk[2] = (char)((c << 6) | d);
2157 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2158 len -= 3;
2159 }
2160 if (s < strend) {
f7fe979e
AL
2161 if (*s == '\n') {
2162 s++;
2163 }
08ca2aa3
TH
2164 else {
2165 /* possible checksum byte */
f7fe979e
AL
2166 const char *skip = s+UTF8SKIP(s);
2167 if (skip < strend && *skip == '\n')
2168 s = skip+1;
08ca2aa3
TH
2169 }
2170 }
2171 }
2172 } else {
fc241834
RGS
2173 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
2174 I32 a, b, c, d;
2175 char hunk[4];
a6ec74c1 2176
fc241834
RGS
2177 hunk[3] = '\0';
2178 len = PL_uudmap[*(U8*)s++] & 077;
2179 while (len > 0) {
2180 if (s < strend && ISUUCHAR(*s))
2181 a = PL_uudmap[*(U8*)s++] & 077;
2182 else
2183 a = 0;
2184 if (s < strend && ISUUCHAR(*s))
2185 b = PL_uudmap[*(U8*)s++] & 077;
2186 else
2187 b = 0;
2188 if (s < strend && ISUUCHAR(*s))
2189 c = PL_uudmap[*(U8*)s++] & 077;
2190 else
2191 c = 0;
2192 if (s < strend && ISUUCHAR(*s))
2193 d = PL_uudmap[*(U8*)s++] & 077;
2194 else
2195 d = 0;
2196 hunk[0] = (char)((a << 2) | (b >> 4));
2197 hunk[1] = (char)((b << 4) | (c >> 2));
2198 hunk[2] = (char)((c << 6) | d);
2199 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2200 len -= 3;
2201 }
2202 if (*s == '\n')
2203 s++;
2204 else /* possible checksum byte */
2205 if (s + 1 < strend && s[1] == '\n')
2206 s += 2;
a6ec74c1 2207 }
08ca2aa3
TH
2208 }
2209 XPUSHs(sv);
a6ec74c1
JH
2210 break;
2211 }
49704364 2212
a6ec74c1 2213 if (checksum) {
1109a392 2214 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
92d41999 2215 (checksum > bits_in_uv &&
08ca2aa3
TH
2216 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
2217 NV trouble, anv;
a6ec74c1 2218
08ca2aa3 2219 anv = (NV) (1 << (checksum & 15));
a6ec74c1
JH
2220 while (checksum >= 16) {
2221 checksum -= 16;
08ca2aa3 2222 anv *= 65536.0;
a6ec74c1 2223 }
a6ec74c1 2224 while (cdouble < 0.0)
08ca2aa3
TH
2225 cdouble += anv;
2226 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
c4c5f44a 2227 sv = newSVnv(cdouble);
a6ec74c1
JH
2228 }
2229 else {
fa8ec7c1
NC
2230 if (checksum < bits_in_uv) {
2231 UV mask = ((UV)1 << checksum) - 1;
92d41999 2232 cuv &= mask;
a6ec74c1 2233 }
c4c5f44a 2234 sv = newSVuv(cuv);
a6ec74c1
JH
2235 }
2236 XPUSHs(sv_2mortal(sv));
2237 checksum = 0;
2238 }
fc241834 2239
49704364
WL
2240 if (symptr->flags & FLAG_SLASH){
2241 if (SP - PL_stack_base - start_sp_offset <= 0)
2242 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
2243 if( next_symbol(symptr) ){
2244 if( symptr->howlen == e_number )
2245 Perl_croak(aTHX_ "Count after length/code in unpack" );
2246 if( beyond ){
2247 /* ...end of char buffer then no decent length available */
2248 Perl_croak(aTHX_ "length/code after end of string in unpack" );
2249 } else {
2250 /* take top of stack (hope it's numeric) */
2251 len = POPi;
2252 if( len < 0 )
2253 Perl_croak(aTHX_ "Negative '/' count in unpack" );
2254 }
2255 } else {
2256 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
2257 }
2258 datumtype = symptr->code;
21c16052 2259 explicit_length = FALSE;
49704364
WL
2260 goto redo_switch;
2261 }
a6ec74c1 2262 }
49704364 2263
18529408
IZ
2264 if (new_s)
2265 *new_s = s;
2266 PUTBACK;
2267 return SP - PL_stack_base - start_sp_offset;
2268}
2269
2270PP(pp_unpack)
2271{
2272 dSP;
bab9c0ac 2273 dPOPPOPssrl;
18529408
IZ
2274 I32 gimme = GIMME_V;
2275 STRLEN llen;
2276 STRLEN rlen;
5c144d81
NC
2277 const char *pat = SvPV_const(left, llen);
2278 const char *s = SvPV_const(right, rlen);
f7fe979e
AL
2279 const char *strend = s + rlen;
2280 const char *patend = pat + llen;
08ca2aa3 2281 I32 cnt;
18529408
IZ
2282
2283 PUTBACK;
7accc089 2284 cnt = unpackstring(pat, patend, s, strend,
49704364 2285 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
f337b084 2286 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
49704364 2287
18529408
IZ
2288 SPAGAIN;
2289 if ( !cnt && gimme == G_SCALAR )
2290 PUSHs(&PL_sv_undef);
a6ec74c1
JH
2291 RETURN;
2292}
2293
f337b084 2294STATIC U8 *
f7fe979e 2295doencodes(U8 *h, const char *s, I32 len)
a6ec74c1 2296{
f337b084 2297 *h++ = PL_uuemap[len];
a6ec74c1 2298 while (len > 2) {
f337b084
TH
2299 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2300 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
2301 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
2302 *h++ = PL_uuemap[(077 & (s[2] & 077))];
a6ec74c1
JH
2303 s += 3;
2304 len -= 3;
2305 }
2306 if (len > 0) {
f7fe979e 2307 const char r = (len > 1 ? s[1] : '\0');
f337b084
TH
2308 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2309 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
2310 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
2311 *h++ = PL_uuemap[0];
a6ec74c1 2312 }
f337b084
TH
2313 *h++ = '\n';
2314 return h;
a6ec74c1
JH
2315}
2316
2317STATIC SV *
f7fe979e 2318S_is_an_int(pTHX_ const char *s, STRLEN l)
a6ec74c1 2319{
8b6e33c7
AL
2320 SV *result = newSVpvn(s, l);
2321 char *const result_c = SvPV_nolen(result); /* convenience */
2322 char *out = result_c;
2323 bool skip = 1;
2324 bool ignore = 0;
a6ec74c1
JH
2325
2326 while (*s) {
2327 switch (*s) {
2328 case ' ':
2329 break;
2330 case '+':
2331 if (!skip) {
2332 SvREFCNT_dec(result);
2333 return (NULL);
2334 }
2335 break;
2336 case '0':
2337 case '1':
2338 case '2':
2339 case '3':
2340 case '4':
2341 case '5':
2342 case '6':
2343 case '7':
2344 case '8':
2345 case '9':
2346 skip = 0;
2347 if (!ignore) {
2348 *(out++) = *s;
2349 }
2350 break;
2351 case '.':
2352 ignore = 1;
2353 break;
2354 default:
2355 SvREFCNT_dec(result);
2356 return (NULL);
2357 }
2358 s++;
2359 }
2360 *(out++) = '\0';
2361 SvCUR_set(result, out - result_c);
2362 return (result);
2363}
2364
2365/* pnum must be '\0' terminated */
2366STATIC int
2367S_div128(pTHX_ SV *pnum, bool *done)
2368{
8b6e33c7
AL
2369 STRLEN len;
2370 char * const s = SvPV(pnum, len);
2371 char *t = s;
2372 int m = 0;
2373
2374 *done = 1;
2375 while (*t) {
2376 const int i = m * 10 + (*t - '0');
2377 const int r = (i >> 7); /* r < 10 */
2378 m = i & 0x7F;
2379 if (r) {
2380 *done = 0;
2381 }
2382 *(t++) = '0' + r;
a6ec74c1 2383 }
8b6e33c7
AL
2384 *(t++) = '\0';
2385 SvCUR_set(pnum, (STRLEN) (t - s));
2386 return (m);
a6ec74c1
JH
2387}
2388
18529408
IZ
2389/*
2390=for apidoc pack_cat
2391
7accc089
JH
2392The engine implementing pack() Perl function. Note: parameters next_in_list and
2393flags are not used. This call should not be used; use packlist instead.
18529408 2394
bfce84ec
AL
2395=cut
2396*/
49704364 2397
18529408 2398void
f7fe979e 2399Perl_pack_cat(pTHX_ SV *cat, const char *pat, const char *patend, register SV **beglist, SV **endlist, SV ***next_in_list, U32 flags)
a6ec74c1 2400{
aadb217d 2401 tempsym_t sym;
8772537c
AL
2402 PERL_UNUSED_ARG(next_in_list);
2403 PERL_UNUSED_ARG(flags);
7accc089 2404
f7fe979e 2405 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
aadb217d 2406
7accc089
JH
2407 (void)pack_rec( cat, &sym, beglist, endlist );
2408}
2409
2410
2411/*
2412=for apidoc packlist
2413
2414The engine implementing pack() Perl function.
2415
bfce84ec
AL
2416=cut
2417*/
7accc089
JH
2418
2419void
f7fe979e 2420Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, register SV **beglist, SV **endlist )
7accc089 2421{
f337b084 2422 STRLEN no_len;
aadb217d
JH
2423 tempsym_t sym;
2424
f7fe979e 2425 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
49704364 2426
f337b084
TH
2427 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
2428 Also make sure any UTF8 flag is loaded */
2429 SvPV_force(cat, no_len);
bfce84ec
AL
2430 if (DO_UTF8(cat))
2431 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
f337b084 2432
49704364
WL
2433 (void)pack_rec( cat, &sym, beglist, endlist );
2434}
2435
f337b084
TH
2436/* like sv_utf8_upgrade, but also repoint the group start markers */
2437STATIC void
2438marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2439 STRLEN len;
2440 tempsym_t *group;
f7fe979e
AL
2441 const char *from_ptr, *from_start, *from_end, **marks, **m;
2442 char *to_start, *to_ptr;
f337b084
TH
2443
2444 if (SvUTF8(sv)) return;
2445
aa07b2f6 2446 from_start = SvPVX_const(sv);
f337b084
TH
2447 from_end = from_start + SvCUR(sv);
2448 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2449 if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
2450 if (from_ptr == from_end) {
2451 /* Simple case: no character needs to be changed */
2452 SvUTF8_on(sv);
2453 return;
2454 }
2455
3473cf63 2456 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
a02a5408 2457 Newx(to_start, len, char);
f337b084
TH
2458 Copy(from_start, to_start, from_ptr-from_start, char);
2459 to_ptr = to_start + (from_ptr-from_start);
2460
a02a5408 2461 Newx(marks, sym_ptr->level+2, const char *);
f337b084
TH
2462 for (group=sym_ptr; group; group = group->previous)
2463 marks[group->level] = from_start + group->strbeg;
2464 marks[sym_ptr->level+1] = from_end+1;
2465 for (m = marks; *m < from_ptr; m++)
2466 *m = to_start + (*m-from_start);
2467
2468 for (;from_ptr < from_end; from_ptr++) {
2469 while (*m == from_ptr) *m++ = to_ptr;
230e1fce 2470 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
f337b084
TH
2471 }
2472 *to_ptr = 0;
2473
2474 while (*m == from_ptr) *m++ = to_ptr;
2475 if (m != marks + sym_ptr->level+1) {
2476 Safefree(marks);
2477 Safefree(to_start);
2478 Perl_croak(aTHX_ "Assertion: marks beyond string end");
2479 }
2480 for (group=sym_ptr; group; group = group->previous)
2481 group->strbeg = marks[group->level] - to_start;
2482 Safefree(marks);
2483
2484 if (SvOOK(sv)) {
2485 if (SvIVX(sv)) {
b162af07 2486 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
f337b084
TH
2487 from_start -= SvIVX(sv);
2488 SvIV_set(sv, 0);
2489 }
2490 SvFLAGS(sv) &= ~SVf_OOK;
2491 }
2492 if (SvLEN(sv) != 0)
2493 Safefree(from_start);
f880fe2f 2494 SvPV_set(sv, to_start);
b162af07
SP
2495 SvCUR_set(sv, to_ptr - to_start);
2496 SvLEN_set(sv, len);
f337b084
TH
2497 SvUTF8_on(sv);
2498}
2499
2500/* Exponential string grower. Makes string extension effectively O(n)
2501 needed says how many extra bytes we need (not counting the final '\0')
2502 Only grows the string if there is an actual lack of space
2503*/
2504STATIC char *
2505sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
f7fe979e
AL
2506 const STRLEN cur = SvCUR(sv);
2507 const STRLEN len = SvLEN(sv);
f337b084
TH
2508 STRLEN extend;
2509 if (len - cur > needed) return SvPVX(sv);
2510 extend = needed > len ? needed : len;
2511 return SvGROW(sv, len+extend+1);
2512}
49704364
WL
2513
2514STATIC
2515SV **
f337b084 2516S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
49704364 2517{
49704364 2518 tempsym_t lookahead;
f337b084
TH
2519 I32 items = endlist - beglist;
2520 bool found = next_symbol(symptr);
2521 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
041457d9 2522 bool warn_utf8 = ckWARN(WARN_UTF8);
f337b084
TH
2523
2524 if (symptr->level == 0 && found && symptr->code == 'U') {
2525 marked_upgrade(aTHX_ cat, symptr);
2526 symptr->flags |= FLAG_DO_UTF8;
2527 utf8 = 0;
49704364 2528 }
f337b084 2529 symptr->strbeg = SvCUR(cat);
49704364
WL
2530
2531 while (found) {
f337b084
TH
2532 SV *fromstr;
2533 STRLEN fromlen;
2534 I32 len;
a6ec74c1 2535 SV *lengthcode = Nullsv;
49704364 2536 I32 datumtype = symptr->code;
f337b084
TH
2537 howlen_t howlen = symptr->howlen;
2538 char *start = SvPVX(cat);
2539 char *cur = start + SvCUR(cat);
49704364 2540
f337b084
TH
2541#define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2542
2543 switch (howlen) {
fc241834 2544 case e_star:
f337b084
TH
2545 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2546 0 : items;
2547 break;
2548 default:
2549 /* e_no_len and e_number */
2550 len = symptr->length;
49704364
WL
2551 break;
2552 }
2553
f337b084 2554 if (len) {
a7a3cfaa 2555 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
f337b084 2556
a7a3cfaa
TH
2557 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2558 /* We can process this letter. */
2559 STRLEN size = props & PACK_SIZE_MASK;
2560 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2561 }
f337b084
TH
2562 }
2563
49704364
WL
2564 /* Look ahead for next symbol. Do we have code/code? */
2565 lookahead = *symptr;
2566 found = next_symbol(&lookahead);
246f24af
TH
2567 if (symptr->flags & FLAG_SLASH) {
2568 IV count;
f337b084 2569 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
246f24af
TH
2570 if (strchr("aAZ", lookahead.code)) {
2571 if (lookahead.howlen == e_number) count = lookahead.length;
2572 else {
2573 if (items > 0)
2574 count = DO_UTF8(*beglist) ?
2575 sv_len_utf8(*beglist) : sv_len(*beglist);
2576 else count = 0;
2577 if (lookahead.code == 'Z') count++;
2578 }
2579 } else {
2580 if (lookahead.howlen == e_number && lookahead.length < items)
2581 count = lookahead.length;
2582 else count = items;
2583 }
2584 lookahead.howlen = e_number;
2585 lookahead.length = count;
2586 lengthcode = sv_2mortal(newSViv(count));
a6ec74c1 2587 }
49704364 2588
fc241834
RGS
2589 /* Code inside the switch must take care to properly update
2590 cat (CUR length and '\0' termination) if it updated *cur and
f337b084 2591 doesn't simply leave using break */
1109a392 2592 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 2593 default:
f337b084
TH
2594 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2595 (int) TYPE_NO_MODIFIERS(datumtype));
a6ec74c1 2596 case '%':
49704364 2597 Perl_croak(aTHX_ "'%%' may not be used in pack");
28be1210
TH
2598 {
2599 char *from;
2600#ifdef PERL_PACK_CAN_SHRIEKSIGN
2601 case '.' | TYPE_IS_SHRIEKING:
2602#endif
2603 case '.':
2604 if (howlen == e_star) from = start;
2605 else if (len == 0) from = cur;
2606 else {
2607 tempsym_t *group = symptr;
2608
2609 while (--len && group) group = group->previous;
2610 from = group ? start + group->strbeg : start;
2611 }
2612 fromstr = NEXTFROM;
2613 len = SvIV(fromstr);
2614 goto resize;
2615#ifdef PERL_PACK_CAN_SHRIEKSIGN
2616 case '@' | TYPE_IS_SHRIEKING:
2617#endif
a6ec74c1 2618 case '@':
28be1210
TH
2619 from = start + symptr->strbeg;
2620 resize:
2621#ifdef PERL_PACK_CAN_SHRIEKSIGN
2622 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
2623#else /* PERL_PACK_CAN_SHRIEKSIGN */
2624 if (utf8)
2625#endif
2626 if (len >= 0) {
2627 while (len && from < cur) {
2628 from += UTF8SKIP(from);
2629 len--;
2630 }
2631 if (from > cur)
2632 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2633 if (len) {
2634 /* Here we know from == cur */
2635 grow:
2636 GROWING(0, cat, start, cur, len);
2637 Zero(cur, len, char);
2638 cur += len;
2639 } else if (from < cur) {
2640 len = cur - from;
2641 goto shrink;
2642 } else goto no_change;
2643 } else {
2644 cur = from;
2645 len = -len;
2646 goto utf8_shrink;
f337b084 2647 }
28be1210
TH
2648 else {
2649 len -= cur - from;
f337b084 2650 if (len > 0) goto grow;
28be1210 2651 if (len == 0) goto no_change;
fc241834 2652 len = -len;
28be1210 2653 goto shrink;
f337b084 2654 }
a6ec74c1 2655 break;
28be1210 2656 }
fc241834 2657 case '(': {
49704364 2658 tempsym_t savsym = *symptr;
66c611c5
MHM
2659 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2660 symptr->flags |= group_modifiers;
49704364
WL
2661 symptr->patend = savsym.grpend;
2662 symptr->level++;
f337b084 2663 symptr->previous = &lookahead;
18529408 2664 while (len--) {
f337b084
TH
2665 U32 was_utf8;
2666 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2667 else symptr->flags &= ~FLAG_PARSE_UTF8;
2668 was_utf8 = SvUTF8(cat);
49704364 2669 symptr->patptr = savsym.grpbeg;
f337b084
TH
2670 beglist = pack_rec(cat, symptr, beglist, endlist);
2671 if (SvUTF8(cat) != was_utf8)
2672 /* This had better be an upgrade while in utf8==0 mode */
2673 utf8 = 1;
2674
49704364 2675 if (savsym.howlen == e_star && beglist == endlist)
18529408
IZ
2676 break; /* No way to continue */
2677 }
f337b084
TH
2678 lookahead.flags = symptr->flags & ~group_modifiers;
2679 goto no_change;
18529408 2680 }
62f95557
IZ
2681 case 'X' | TYPE_IS_SHRIEKING:
2682 if (!len) /* Avoid division by 0 */
2683 len = 1;
f337b084
TH
2684 if (utf8) {
2685 char *hop, *last;
2686 I32 l = len;
2687 hop = last = start;
2688 while (hop < cur) {
2689 hop += UTF8SKIP(hop);
2690 if (--l == 0) {
2691 last = hop;
2692 l = len;
2693 }
2694 }
2695 if (last > cur)
2696 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2697 cur = last;
2698 break;
2699 }
2700 len = (cur-start) % len;
62f95557 2701 /* FALL THROUGH */
a6ec74c1 2702 case 'X':
f337b084
TH
2703 if (utf8) {
2704 if (len < 1) goto no_change;
28be1210 2705 utf8_shrink:
f337b084
TH
2706 while (len > 0) {
2707 if (cur <= start)
28be1210
TH
2708 Perl_croak(aTHX_ "'%c' outside of string in pack",
2709 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2710 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2711 if (cur <= start)
28be1210
TH
2712 Perl_croak(aTHX_ "'%c' outside of string in pack",
2713 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2714 }
2715 len--;
2716 }
2717 } else {
fc241834 2718 shrink:
f337b084 2719 if (cur - start < len)
28be1210
TH
2720 Perl_croak(aTHX_ "'%c' outside of string in pack",
2721 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2722 cur -= len;
2723 }
2724 if (cur < start+symptr->strbeg) {
2725 /* Make sure group starts don't point into the void */
2726 tempsym_t *group;
9e27e96a 2727 const STRLEN length = cur-start;
f337b084
TH
2728 for (group = symptr;
2729 group && length < group->strbeg;
2730 group = group->previous) group->strbeg = length;
2731 lookahead.strbeg = length;
2732 }
a6ec74c1 2733 break;
fc241834
RGS
2734 case 'x' | TYPE_IS_SHRIEKING: {
2735 I32 ai32;
62f95557
IZ
2736 if (!len) /* Avoid division by 0 */
2737 len = 1;
230e1fce 2738 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
fc241834
RGS
2739 else ai32 = (cur - start) % len;
2740 if (ai32 == 0) goto no_change;
2741 len -= ai32;
2742 }
2743 /* FALL THROUGH */
a6ec74c1 2744 case 'x':
f337b084 2745 goto grow;
a6ec74c1
JH
2746 case 'A':
2747 case 'Z':
f337b084 2748 case 'a': {
f7fe979e 2749 const char *aptr;
f337b084 2750
a6ec74c1 2751 fromstr = NEXTFROM;
e62f0680 2752 aptr = SvPV_const(fromstr, fromlen);
f337b084 2753 if (DO_UTF8(fromstr)) {
f7fe979e 2754 const char *end, *s;
f337b084
TH
2755
2756 if (!utf8 && !SvUTF8(cat)) {
2757 marked_upgrade(aTHX_ cat, symptr);
2758 lookahead.flags |= FLAG_DO_UTF8;
2759 lookahead.strbeg = symptr->strbeg;
2760 utf8 = 1;
2761 start = SvPVX(cat);
2762 cur = start + SvCUR(cat);
2763 }
fc241834 2764 if (howlen == e_star) {
f337b084
TH
2765 if (utf8) goto string_copy;
2766 len = fromlen+1;
2767 }
2768 s = aptr;
2769 end = aptr + fromlen;
2770 fromlen = datumtype == 'Z' ? len-1 : len;
2771 while ((I32) fromlen > 0 && s < end) {
2772 s += UTF8SKIP(s);
2773 fromlen--;
2774 }
2775 if (s > end)
2776 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2777 if (utf8) {
fc241834 2778 len = fromlen;
f337b084
TH
2779 if (datumtype == 'Z') len++;
2780 fromlen = s-aptr;
2781 len += fromlen;
fc241834 2782
f337b084 2783 goto string_copy;
fc241834 2784 }
f337b084
TH
2785 fromlen = len - fromlen;
2786 if (datumtype == 'Z') fromlen--;
2787 if (howlen == e_star) {
2788 len = fromlen;
2789 if (datumtype == 'Z') len++;
fc241834 2790 }
f337b084 2791 GROWING(0, cat, start, cur, len);
fc241834 2792 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
f337b084
TH
2793 datumtype | TYPE_IS_PACK))
2794 Perl_croak(aTHX_ "Perl bug: predicted utf8 length not available");
2795 cur += fromlen;
a6ec74c1 2796 len -= fromlen;
f337b084
TH
2797 } else if (utf8) {
2798 if (howlen == e_star) {
2799 len = fromlen;
2800 if (datumtype == 'Z') len++;
a6ec74c1 2801 }
f337b084
TH
2802 if (len <= (I32) fromlen) {
2803 fromlen = len;
2804 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2805 }
fc241834 2806 /* assumes a byte expands to at most UTF8_EXPAND bytes on
3473cf63
RGS
2807 upgrade, so:
2808 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2809 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
f337b084
TH
2810 len -= fromlen;
2811 while (fromlen > 0) {
230e1fce 2812 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
f337b084
TH
2813 aptr++;
2814 fromlen--;
fc241834 2815 }
f337b084
TH
2816 } else {
2817 string_copy:
2818 if (howlen == e_star) {
2819 len = fromlen;
2820 if (datumtype == 'Z') len++;
2821 }
2822 if (len <= (I32) fromlen) {
2823 fromlen = len;
2824 if (datumtype == 'Z' && fromlen > 0) fromlen--;
a6ec74c1 2825 }
f337b084
TH
2826 GROWING(0, cat, start, cur, len);
2827 Copy(aptr, cur, fromlen, char);
2828 cur += fromlen;
2829 len -= fromlen;
a6ec74c1 2830 }
f337b084
TH
2831 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2832 cur += len;
a6ec74c1 2833 break;
f337b084 2834 }
a6ec74c1 2835 case 'B':
f337b084 2836 case 'b': {
b83604b4 2837 const char *str, *end;
f337b084
TH
2838 I32 l, field_len;
2839 U8 bits;
2840 bool utf8_source;
2841 U32 utf8_flags;
a6ec74c1 2842
fc241834 2843 fromstr = NEXTFROM;
b83604b4 2844 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2845 end = str + fromlen;
2846 if (DO_UTF8(fromstr)) {
2847 utf8_source = TRUE;
041457d9 2848 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2849 } else {
2850 utf8_source = FALSE;
2851 utf8_flags = 0; /* Unused, but keep compilers happy */
2852 }
2853 if (howlen == e_star) len = fromlen;
2854 field_len = (len+7)/8;
2855 GROWING(utf8, cat, start, cur, field_len);
2856 if (len > (I32)fromlen) len = fromlen;
2857 bits = 0;
2858 l = 0;
2859 if (datumtype == 'B')
2860 while (l++ < len) {
2861 if (utf8_source) {
2862 UV val;
2863 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2864 bits |= val & 1;
2865 } else bits |= *str++ & 1;
2866 if (l & 7) bits <<= 1;
fc241834 2867 else {
f337b084
TH
2868 PUSH_BYTE(utf8, cur, bits);
2869 bits = 0;
a6ec74c1
JH
2870 }
2871 }
f337b084
TH
2872 else
2873 /* datumtype == 'b' */
2874 while (l++ < len) {
2875 if (utf8_source) {
2876 UV val;
2877 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2878 if (val & 1) bits |= 0x80;
2879 } else if (*str++ & 1)
2880 bits |= 0x80;
2881 if (l & 7) bits >>= 1;
fc241834 2882 else {
f337b084
TH
2883 PUSH_BYTE(utf8, cur, bits);
2884 bits = 0;
a6ec74c1
JH
2885 }
2886 }
f337b084
TH
2887 l--;
2888 if (l & 7) {
fc241834 2889 if (datumtype == 'B')
f337b084 2890 bits <<= 7 - (l & 7);
fc241834 2891 else
f337b084
TH
2892 bits >>= 7 - (l & 7);
2893 PUSH_BYTE(utf8, cur, bits);
2894 l += 7;
a6ec74c1 2895 }
f337b084
TH
2896 /* Determine how many chars are left in the requested field */
2897 l /= 8;
2898 if (howlen == e_star) field_len = 0;
2899 else field_len -= l;
2900 Zero(cur, field_len, char);
2901 cur += field_len;
a6ec74c1 2902 break;
f337b084 2903 }
a6ec74c1 2904 case 'H':
f337b084 2905 case 'h': {
10516c54 2906 const char *str, *end;
f337b084
TH
2907 I32 l, field_len;
2908 U8 bits;
2909 bool utf8_source;
2910 U32 utf8_flags;
a6ec74c1 2911
fc241834 2912 fromstr = NEXTFROM;
10516c54 2913 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2914 end = str + fromlen;
2915 if (DO_UTF8(fromstr)) {
2916 utf8_source = TRUE;
041457d9 2917 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2918 } else {
2919 utf8_source = FALSE;
2920 utf8_flags = 0; /* Unused, but keep compilers happy */
2921 }
2922 if (howlen == e_star) len = fromlen;
2923 field_len = (len+1)/2;
2924 GROWING(utf8, cat, start, cur, field_len);
2925 if (!utf8 && len > (I32)fromlen) len = fromlen;
2926 bits = 0;
2927 l = 0;
2928 if (datumtype == 'H')
2929 while (l++ < len) {
2930 if (utf8_source) {
2931 UV val;
2932 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2933 if (val < 256 && isALPHA(val))
2934 bits |= (val + 9) & 0xf;
a6ec74c1 2935 else
f337b084
TH
2936 bits |= val & 0xf;
2937 } else if (isALPHA(*str))
2938 bits |= (*str++ + 9) & 0xf;
2939 else
2940 bits |= *str++ & 0xf;
2941 if (l & 1) bits <<= 4;
fc241834 2942 else {
f337b084
TH
2943 PUSH_BYTE(utf8, cur, bits);
2944 bits = 0;
a6ec74c1
JH
2945 }
2946 }
f337b084
TH
2947 else
2948 while (l++ < len) {
2949 if (utf8_source) {
2950 UV val;
2951 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2952 if (val < 256 && isALPHA(val))
2953 bits |= ((val + 9) & 0xf) << 4;
a6ec74c1 2954 else
f337b084
TH
2955 bits |= (val & 0xf) << 4;
2956 } else if (isALPHA(*str))
2957 bits |= ((*str++ + 9) & 0xf) << 4;
2958 else
2959 bits |= (*str++ & 0xf) << 4;
2960 if (l & 1) bits >>= 4;
fc241834 2961 else {
f337b084
TH
2962 PUSH_BYTE(utf8, cur, bits);
2963 bits = 0;
a6ec74c1 2964 }
fc241834 2965 }
f337b084
TH
2966 l--;
2967 if (l & 1) {
2968 PUSH_BYTE(utf8, cur, bits);
2969 l++;
2970 }
2971 /* Determine how many chars are left in the requested field */
2972 l /= 2;
2973 if (howlen == e_star) field_len = 0;
2974 else field_len -= l;
2975 Zero(cur, field_len, char);
2976 cur += field_len;
2977 break;
fc241834
RGS
2978 }
2979 case 'c':
f337b084
TH
2980 while (len-- > 0) {
2981 IV aiv;
2982 fromstr = NEXTFROM;
2983 aiv = SvIV(fromstr);
2984 if ((-128 > aiv || aiv > 127) &&
2985 ckWARN(WARN_PACK))
2986 Perl_warner(aTHX_ packWARN(WARN_PACK),
2987 "Character in 'c' format wrapped in pack");
585ec06d 2988 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
a6ec74c1
JH
2989 }
2990 break;
2991 case 'C':
f337b084
TH
2992 if (len == 0) {
2993 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2994 break;
2995 }
2996 GROWING(0, cat, start, cur, len);
a6ec74c1 2997 while (len-- > 0) {
f337b084 2998 IV aiv;
a6ec74c1 2999 fromstr = NEXTFROM;
f337b084
TH
3000 aiv = SvIV(fromstr);
3001 if ((0 > aiv || aiv > 0xff) &&
fc241834
RGS
3002 ckWARN(WARN_PACK))
3003 Perl_warner(aTHX_ packWARN(WARN_PACK),
3004 "Character in 'C' format wrapped in pack");
585ec06d 3005 *cur++ = (char)(aiv & 0xff);
f337b084 3006 }
fc241834
RGS
3007 break;
3008 case 'W': {
3009 char *end;
3010 U8 in_bytes = IN_BYTES;
3011
3012 end = start+SvLEN(cat)-1;
3013 if (utf8) end -= UTF8_MAXLEN-1;
3014 while (len-- > 0) {
3015 UV auv;
3016 fromstr = NEXTFROM;
3017 auv = SvUV(fromstr);
3018 if (in_bytes) auv = auv % 0x100;
3019 if (utf8) {
3020 W_utf8:
3021 if (cur > end) {
3022 *cur = '\0';
b162af07 3023 SvCUR_set(cat, cur - start);
fc241834
RGS
3024
3025 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3026 end = start+SvLEN(cat)-UTF8_MAXLEN;
3027 }
230e1fce
NC
3028 cur = (char *) uvuni_to_utf8_flags((U8 *) cur,
3029 NATIVE_TO_UNI(auv),
041457d9 3030 warn_utf8 ?
230e1fce 3031 0 : UNICODE_ALLOW_ANY);
fc241834
RGS
3032 } else {
3033 if (auv >= 0x100) {
3034 if (!SvUTF8(cat)) {
3035 *cur = '\0';
b162af07 3036 SvCUR_set(cat, cur - start);
fc241834
RGS
3037 marked_upgrade(aTHX_ cat, symptr);
3038 lookahead.flags |= FLAG_DO_UTF8;
3039 lookahead.strbeg = symptr->strbeg;
3040 utf8 = 1;
3041 start = SvPVX(cat);
3042 cur = start + SvCUR(cat);
3043 end = start+SvLEN(cat)-UTF8_MAXLEN;
3044 goto W_utf8;
3045 }
3046 if (ckWARN(WARN_PACK))
3047 Perl_warner(aTHX_ packWARN(WARN_PACK),
3048 "Character in 'W' format wrapped in pack");
3049 auv &= 0xff;
3050 }
3051 if (cur >= end) {
3052 *cur = '\0';
b162af07 3053 SvCUR_set(cat, cur - start);
fc241834
RGS
3054 GROWING(0, cat, start, cur, len+1);
3055 end = start+SvLEN(cat)-1;
3056 }
fe2774ed 3057 *(U8 *) cur++ = (U8)auv;
a6ec74c1
JH
3058 }
3059 }
3060 break;
fc241834
RGS
3061 }
3062 case 'U': {
3063 char *end;
3064
3065 if (len == 0) {
3066 if (!(symptr->flags & FLAG_DO_UTF8)) {
3067 marked_upgrade(aTHX_ cat, symptr);
3068 lookahead.flags |= FLAG_DO_UTF8;
3069 lookahead.strbeg = symptr->strbeg;
3070 }
3071 utf8 = 0;
3072 goto no_change;
3073 }
3074
3075 end = start+SvLEN(cat);
3076 if (!utf8) end -= UTF8_MAXLEN;
a6ec74c1 3077 while (len-- > 0) {
fc241834 3078 UV auv;
a6ec74c1 3079 fromstr = NEXTFROM;
fc241834
RGS
3080 auv = SvUV(fromstr);
3081 if (utf8) {
230e1fce 3082 U8 buffer[UTF8_MAXLEN], *endb;
fc241834 3083 endb = uvuni_to_utf8_flags(buffer, auv,
041457d9 3084 warn_utf8 ?
fc241834
RGS
3085 0 : UNICODE_ALLOW_ANY);
3086 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
3087 *cur = '\0';
b162af07 3088 SvCUR_set(cat, cur - start);
fc241834
RGS
3089 GROWING(0, cat, start, cur,
3090 len+(endb-buffer)*UTF8_EXPAND);
3091 end = start+SvLEN(cat);
3092 }
3093 bytes_to_uni(aTHX_ buffer, endb-buffer, &cur);
3094 } else {
3095 if (cur >= end) {
3096 *cur = '\0';
b162af07 3097 SvCUR_set(cat, cur - start);
fc241834
RGS
3098 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3099 end = start+SvLEN(cat)-UTF8_MAXLEN;
3100 }
230e1fce 3101 cur = (char *) uvuni_to_utf8_flags((U8 *) cur, auv,
041457d9 3102 warn_utf8 ?
230e1fce 3103 0 : UNICODE_ALLOW_ANY);
fc241834 3104 }
a6ec74c1 3105 }
a6ec74c1 3106 break;
fc241834 3107 }
a6ec74c1
JH
3108 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
3109 case 'f':
a6ec74c1 3110 while (len-- > 0) {
f337b084
TH
3111 float afloat;
3112 NV anv;
a6ec74c1 3113 fromstr = NEXTFROM;
f337b084 3114 anv = SvNV(fromstr);
5cdb9e01 3115#ifdef __VOS__
f337b084 3116 /* VOS does not automatically map a floating-point overflow
fc241834
RGS
3117 during conversion from double to float into infinity, so we
3118 do it by hand. This code should either be generalized for
3119 any OS that needs it, or removed if and when VOS implements
3120 posix-976 (suggestion to support mapping to infinity).
3121 Paul.Green@stratus.com 02-04-02. */
f337b084 3122 if (anv > FLT_MAX)
fc241834 3123 afloat = _float_constants[0]; /* single prec. inf. */
f337b084 3124 else if (anv < -FLT_MAX)
fc241834 3125 afloat = _float_constants[0]; /* single prec. inf. */
f337b084
TH
3126 else afloat = (float) anv;
3127#else /* __VOS__ */
baf3cf9c 3128# if defined(VMS) && !defined(__IEEE_FP)
f337b084 3129 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
3130 * on Alpha; fake it if we don't have them.
3131 */
f337b084 3132 if (anv > FLT_MAX)
fc241834 3133 afloat = FLT_MAX;
f337b084 3134 else if (anv < -FLT_MAX)
fc241834 3135 afloat = -FLT_MAX;
f337b084 3136 else afloat = (float)anv;
baf3cf9c 3137# else
f337b084 3138 afloat = (float)anv;
baf3cf9c 3139# endif
f337b084 3140#endif /* __VOS__ */
1109a392 3141 DO_BO_PACK_N(afloat, float);
f337b084 3142 PUSH_VAR(utf8, cur, afloat);
a6ec74c1
JH
3143 }
3144 break;
3145 case 'd':
a6ec74c1 3146 while (len-- > 0) {
f337b084
TH
3147 double adouble;
3148 NV anv;
a6ec74c1 3149 fromstr = NEXTFROM;
f337b084 3150 anv = SvNV(fromstr);
5cdb9e01 3151#ifdef __VOS__
f337b084 3152 /* VOS does not automatically map a floating-point overflow
fc241834
RGS
3153 during conversion from long double to double into infinity,
3154 so we do it by hand. This code should either be generalized
3155 for any OS that needs it, or removed if and when VOS
3156 implements posix-976 (suggestion to support mapping to
3157 infinity). Paul.Green@stratus.com 02-04-02. */
f337b084 3158 if (anv > DBL_MAX)
fc241834 3159 adouble = _double_constants[0]; /* double prec. inf. */
f337b084 3160 else if (anv < -DBL_MAX)
fc241834 3161 adouble = _double_constants[0]; /* double prec. inf. */
f337b084
TH
3162 else adouble = (double) anv;
3163#else /* __VOS__ */
baf3cf9c 3164# if defined(VMS) && !defined(__IEEE_FP)
f337b084 3165 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
3166 * on Alpha; fake it if we don't have them.
3167 */
f337b084 3168 if (anv > DBL_MAX)
fc241834 3169 adouble = DBL_MAX;
f337b084 3170 else if (anv < -DBL_MAX)
fc241834 3171 adouble = -DBL_MAX;
f337b084 3172 else adouble = (double)anv;
baf3cf9c 3173# else
f337b084 3174 adouble = (double)anv;
baf3cf9c 3175# endif
f337b084 3176#endif /* __VOS__ */
1109a392 3177 DO_BO_PACK_N(adouble, double);
f337b084 3178 PUSH_VAR(utf8, cur, adouble);
a6ec74c1
JH
3179 }
3180 break;
fc241834
RGS
3181 case 'F': {
3182 NV anv;
1109a392 3183 Zero(&anv, 1, NV); /* can be long double with unused bits */
92d41999
JH
3184 while (len-- > 0) {
3185 fromstr = NEXTFROM;
3186 anv = SvNV(fromstr);
1109a392 3187 DO_BO_PACK_N(anv, NV);
fc241834 3188 PUSH_VAR(utf8, cur, anv);
92d41999
JH
3189 }
3190 break;
fc241834 3191 }
92d41999 3192#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
fc241834
RGS
3193 case 'D': {
3194 long double aldouble;
1109a392
MHM
3195 /* long doubles can have unused bits, which may be nonzero */
3196 Zero(&aldouble, 1, long double);
92d41999
JH
3197 while (len-- > 0) {
3198 fromstr = NEXTFROM;
3199 aldouble = (long double)SvNV(fromstr);
1109a392 3200 DO_BO_PACK_N(aldouble, long double);
fc241834 3201 PUSH_VAR(utf8, cur, aldouble);
92d41999
JH
3202 }
3203 break;
fc241834 3204 }
92d41999 3205#endif
7212898e 3206#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3207 case 'n' | TYPE_IS_SHRIEKING:
7212898e 3208#endif
a6ec74c1
JH
3209 case 'n':
3210 while (len-- > 0) {
f337b084 3211 I16 ai16;
a6ec74c1 3212 fromstr = NEXTFROM;
ef108786 3213 ai16 = (I16)SvIV(fromstr);
a6ec74c1 3214#ifdef HAS_HTONS
ef108786 3215 ai16 = PerlSock_htons(ai16);
a6ec74c1 3216#endif
f337b084 3217 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3218 }
3219 break;
7212898e 3220#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3221 case 'v' | TYPE_IS_SHRIEKING:
7212898e 3222#endif
a6ec74c1
JH
3223 case 'v':
3224 while (len-- > 0) {
f337b084 3225 I16 ai16;
a6ec74c1 3226 fromstr = NEXTFROM;
ef108786 3227 ai16 = (I16)SvIV(fromstr);
a6ec74c1 3228#ifdef HAS_HTOVS
ef108786 3229 ai16 = htovs(ai16);
a6ec74c1 3230#endif
f337b084 3231 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3232 }
3233 break;
49704364 3234 case 'S' | TYPE_IS_SHRIEKING:
a6ec74c1 3235#if SHORTSIZE != SIZE16
fc241834 3236 while (len-- > 0) {
f337b084 3237 unsigned short aushort;
fc241834
RGS
3238 fromstr = NEXTFROM;
3239 aushort = SvUV(fromstr);
3240 DO_BO_PACK(aushort, s);
f337b084 3241 PUSH_VAR(utf8, cur, aushort);
fc241834 3242 }
49704364
WL
3243 break;
3244#else
3245 /* Fall through! */
a6ec74c1 3246#endif
49704364 3247 case 'S':
fc241834 3248 while (len-- > 0) {
f337b084 3249 U16 au16;
fc241834
RGS
3250 fromstr = NEXTFROM;
3251 au16 = (U16)SvUV(fromstr);
3252 DO_BO_PACK(au16, 16);
f337b084 3253 PUSH16(utf8, cur, &au16);
a6ec74c1
JH
3254 }
3255 break;
49704364 3256 case 's' | TYPE_IS_SHRIEKING:
a6ec74c1 3257#if SHORTSIZE != SIZE16
fc241834 3258 while (len-- > 0) {
f337b084 3259 short ashort;
fc241834
RGS
3260 fromstr = NEXTFROM;
3261 ashort = SvIV(fromstr);
3262 DO_BO_PACK(ashort, s);
f337b084 3263 PUSH_VAR(utf8, cur, ashort);
a6ec74c1 3264 }
49704364
WL
3265 break;
3266#else
3267 /* Fall through! */
a6ec74c1 3268#endif
49704364
WL
3269 case 's':
3270 while (len-- > 0) {
f337b084 3271 I16 ai16;
49704364 3272 fromstr = NEXTFROM;
ef108786
MHM
3273 ai16 = (I16)SvIV(fromstr);
3274 DO_BO_PACK(ai16, 16);
f337b084 3275 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3276 }
3277 break;
3278 case 'I':
49704364 3279 case 'I' | TYPE_IS_SHRIEKING:
a6ec74c1 3280 while (len-- > 0) {
f337b084 3281 unsigned int auint;
a6ec74c1
JH
3282 fromstr = NEXTFROM;
3283 auint = SvUV(fromstr);
1109a392 3284 DO_BO_PACK(auint, i);
f337b084 3285 PUSH_VAR(utf8, cur, auint);
a6ec74c1
JH
3286 }
3287 break;
92d41999
JH
3288 case 'j':
3289 while (len-- > 0) {
f337b084 3290 IV aiv;
92d41999
JH
3291 fromstr = NEXTFROM;
3292 aiv = SvIV(fromstr);
1109a392
MHM
3293#if IVSIZE == INTSIZE
3294 DO_BO_PACK(aiv, i);
3295#elif IVSIZE == LONGSIZE
3296 DO_BO_PACK(aiv, l);
3297#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
3298 DO_BO_PACK(aiv, 64);
f337b084
TH
3299#else
3300 Perl_croak(aTHX_ "'j' not supported on this platform");
1109a392 3301#endif
f337b084 3302 PUSH_VAR(utf8, cur, aiv);
92d41999
JH
3303 }
3304 break;
3305 case 'J':
3306 while (len-- > 0) {
f337b084 3307 UV auv;
92d41999
JH
3308 fromstr = NEXTFROM;
3309 auv = SvUV(fromstr);
1109a392
MHM
3310#if UVSIZE == INTSIZE
3311 DO_BO_PACK(auv, i);
3312#elif UVSIZE == LONGSIZE
3313 DO_BO_PACK(auv, l);
3314#elif defined(HAS_QUAD) && UVSIZE == U64SIZE
3315 DO_BO_PACK(auv, 64);
f337b084
TH
3316#else
3317 Perl_croak(aTHX_ "'J' not supported on this platform");
1109a392 3318#endif
f337b084 3319 PUSH_VAR(utf8, cur, auv);
92d41999
JH
3320 }
3321 break;
a6ec74c1
JH
3322 case 'w':
3323 while (len-- > 0) {
f337b084 3324 NV anv;
a6ec74c1 3325 fromstr = NEXTFROM;
15e9f109 3326 anv = SvNV(fromstr);
a6ec74c1 3327
f337b084
TH
3328 if (anv < 0) {
3329 *cur = '\0';
b162af07 3330 SvCUR_set(cat, cur - start);
49704364 3331 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
f337b084 3332 }
a6ec74c1 3333
196b62db
NC
3334 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
3335 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
3336 any negative IVs will have already been got by the croak()
3337 above. IOK is untrue for fractions, so we test them
3338 against UV_MAX_P1. */
f337b084
TH
3339 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
3340 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
a6ec74c1 3341 char *in = buf + sizeof(buf);
196b62db 3342 UV auv = SvUV(fromstr);
a6ec74c1
JH
3343
3344 do {
eb160463 3345 *--in = (char)((auv & 0x7f) | 0x80);
a6ec74c1
JH
3346 auv >>= 7;
3347 } while (auv);
3348 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
3349 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3350 in, (buf + sizeof(buf)) - in);
3351 } else if (SvPOKp(fromstr))
3352 goto w_string;
a6ec74c1 3353 else if (SvNOKp(fromstr)) {
0258719b
NC
3354 /* 10**NV_MAX_10_EXP is the largest power of 10
3355 so 10**(NV_MAX_10_EXP+1) is definately unrepresentable
3356 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
3357 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
3358 And with that many bytes only Inf can overflow.
8f8d40ab
PG
3359 Some C compilers are strict about integral constant
3360 expressions so we conservatively divide by a slightly
3361 smaller integer instead of multiplying by the exact
3362 floating-point value.
0258719b
NC
3363 */
3364#ifdef NV_MAX_10_EXP
f337b084 3365 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
8f8d40ab 3366 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
0258719b 3367#else
f337b084 3368 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
8f8d40ab 3369 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
0258719b 3370#endif
a6ec74c1
JH
3371 char *in = buf + sizeof(buf);
3372
8b6e33c7 3373 anv = Perl_floor(anv);
a6ec74c1 3374 do {
8b6e33c7 3375 const NV next = Perl_floor(anv / 128);
a6ec74c1 3376 if (in <= buf) /* this cannot happen ;-) */
49704364 3377 Perl_croak(aTHX_ "Cannot compress integer in pack");
0258719b 3378 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
15e9f109
NC
3379 anv = next;
3380 } while (anv > 0);
a6ec74c1 3381 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
3382 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3383 in, (buf + sizeof(buf)) - in);
3384 } else {
8b6e33c7
AL
3385 const char *from;
3386 char *result, *in;
735b914b
JH
3387 SV *norm;
3388 STRLEN len;
3389 bool done;
3390
f337b084 3391 w_string:
735b914b 3392 /* Copy string and check for compliance */
349d4f2f 3393 from = SvPV_const(fromstr, len);
735b914b 3394 if ((norm = is_an_int(from, len)) == NULL)
49704364 3395 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
735b914b 3396
a02a5408 3397 Newx(result, len, char);
735b914b
JH
3398 in = result + len;
3399 done = FALSE;
f337b084 3400 while (!done) *--in = div128(norm, &done) | 0x80;
735b914b 3401 result[len - 1] &= 0x7F; /* clear continue bit */
f337b084
TH
3402 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3403 in, (result + len) - in);
735b914b
JH
3404 Safefree(result);
3405 SvREFCNT_dec(norm); /* free norm */
fc241834 3406 }
a6ec74c1
JH
3407 }
3408 break;
3409 case 'i':
49704364 3410 case 'i' | TYPE_IS_SHRIEKING:
a6ec74c1 3411 while (len-- > 0) {
f337b084 3412 int aint;
a6ec74c1
JH
3413 fromstr = NEXTFROM;
3414 aint = SvIV(fromstr);
1109a392 3415 DO_BO_PACK(aint, i);
f337b084 3416 PUSH_VAR(utf8, cur, aint);
a6ec74c1
JH
3417 }
3418 break;
7212898e 3419#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3420 case 'N' | TYPE_IS_SHRIEKING:
7212898e 3421#endif
a6ec74c1
JH
3422 case 'N':
3423 while (len-- > 0) {
f337b084 3424 U32 au32;
a6ec74c1 3425 fromstr = NEXTFROM;
ef108786 3426 au32 = SvUV(fromstr);
a6ec74c1 3427#ifdef HAS_HTONL
ef108786 3428 au32 = PerlSock_htonl(au32);
a6ec74c1 3429#endif
f337b084 3430 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3431 }
3432 break;
7212898e 3433#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3434 case 'V' | TYPE_IS_SHRIEKING:
7212898e 3435#endif
a6ec74c1
JH
3436 case 'V':
3437 while (len-- > 0) {
f337b084 3438 U32 au32;
a6ec74c1 3439 fromstr = NEXTFROM;
ef108786 3440 au32 = SvUV(fromstr);
a6ec74c1 3441#ifdef HAS_HTOVL
ef108786 3442 au32 = htovl(au32);
a6ec74c1 3443#endif
f337b084 3444 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3445 }
3446 break;
49704364 3447 case 'L' | TYPE_IS_SHRIEKING:
a6ec74c1 3448#if LONGSIZE != SIZE32
fc241834 3449 while (len-- > 0) {
f337b084 3450 unsigned long aulong;
fc241834
RGS
3451 fromstr = NEXTFROM;
3452 aulong = SvUV(fromstr);
3453 DO_BO_PACK(aulong, l);
f337b084 3454 PUSH_VAR(utf8, cur, aulong);
a6ec74c1 3455 }
49704364
WL
3456 break;
3457#else
3458 /* Fall though! */
a6ec74c1 3459#endif
49704364 3460 case 'L':
fc241834 3461 while (len-- > 0) {
f337b084 3462 U32 au32;
fc241834
RGS
3463 fromstr = NEXTFROM;
3464 au32 = SvUV(fromstr);
3465 DO_BO_PACK(au32, 32);
f337b084 3466 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3467 }
3468 break;
49704364 3469 case 'l' | TYPE_IS_SHRIEKING:
a6ec74c1 3470#if LONGSIZE != SIZE32
fc241834 3471 while (len-- > 0) {
f337b084 3472 long along;
fc241834
RGS
3473 fromstr = NEXTFROM;
3474 along = SvIV(fromstr);
3475 DO_BO_PACK(along, l);
f337b084 3476 PUSH_VAR(utf8, cur, along);
a6ec74c1 3477 }
49704364
WL
3478 break;
3479#else
3480 /* Fall though! */
a6ec74c1 3481#endif
49704364
WL
3482 case 'l':
3483 while (len-- > 0) {
f337b084 3484 I32 ai32;
49704364 3485 fromstr = NEXTFROM;
ef108786
MHM
3486 ai32 = SvIV(fromstr);
3487 DO_BO_PACK(ai32, 32);
f337b084 3488 PUSH32(utf8, cur, &ai32);
a6ec74c1
JH
3489 }
3490 break;
3491#ifdef HAS_QUAD
3492 case 'Q':
3493 while (len-- > 0) {
f337b084 3494 Uquad_t auquad;
a6ec74c1 3495 fromstr = NEXTFROM;
f337b084 3496 auquad = (Uquad_t) SvUV(fromstr);
1109a392 3497 DO_BO_PACK(auquad, 64);
f337b084 3498 PUSH_VAR(utf8, cur, auquad);
a6ec74c1
JH
3499 }
3500 break;
3501 case 'q':
3502 while (len-- > 0) {
f337b084 3503 Quad_t aquad;
a6ec74c1
JH
3504 fromstr = NEXTFROM;
3505 aquad = (Quad_t)SvIV(fromstr);
1109a392 3506 DO_BO_PACK(aquad, 64);
f337b084 3507 PUSH_VAR(utf8, cur, aquad);
a6ec74c1
JH
3508 }
3509 break;
f337b084 3510#endif /* HAS_QUAD */
a6ec74c1
JH
3511 case 'P':
3512 len = 1; /* assume SV is correct length */
f337b084 3513 GROWING(utf8, cat, start, cur, sizeof(char *));
49704364 3514 /* Fall through! */
a6ec74c1
JH
3515 case 'p':
3516 while (len-- > 0) {
83003860 3517 const char *aptr;
f337b084 3518
a6ec74c1 3519 fromstr = NEXTFROM;
28a4f200
TH
3520 SvGETMAGIC(fromstr);
3521 if (!SvOK(fromstr)) aptr = NULL;
a6ec74c1 3522 else {
a6ec74c1
JH
3523 /* XXX better yet, could spirit away the string to
3524 * a safe spot and hang on to it until the result
3525 * of pack() (and all copies of the result) are
3526 * gone.
3527 */
041457d9
DM
3528 if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
3529 !SvREADONLY(fromstr))) && ckWARN(WARN_PACK)) {
9014280d 3530 Perl_warner(aTHX_ packWARN(WARN_PACK),
fc241834 3531 "Attempt to pack pointer to temporary value");
a6ec74c1
JH
3532 }
3533 if (SvPOK(fromstr) || SvNIOK(fromstr))
2596d9fe 3534 aptr = SvPV_nomg_const_nolen(fromstr);
a6ec74c1 3535 else
2596d9fe 3536 aptr = SvPV_force_flags_nolen(fromstr, 0);
a6ec74c1 3537 }
07409e01 3538 DO_BO_PACK_PC(aptr);
f337b084 3539 PUSH_VAR(utf8, cur, aptr);
a6ec74c1
JH
3540 }
3541 break;
fc241834 3542 case 'u': {
f7fe979e 3543 const char *aptr, *aend;
fc241834 3544 bool from_utf8;
f337b084 3545
a6ec74c1 3546 fromstr = NEXTFROM;
fc241834
RGS
3547 if (len <= 2) len = 45;
3548 else len = len / 3 * 3;
3549 if (len >= 64) {
0fdc08b6
RGS
3550 if (ckWARN(WARN_PACK))
3551 Perl_warner(aTHX_ packWARN(WARN_PACK),
fc241834
RGS
3552 "Field too wide in 'u' format in pack");
3553 len = 63;
3554 }
83003860 3555 aptr = SvPV_const(fromstr, fromlen);
fc241834
RGS
3556 from_utf8 = DO_UTF8(fromstr);
3557 if (from_utf8) {
3558 aend = aptr + fromlen;
3559 fromlen = sv_len_utf8(fromstr);
3560 } else aend = NULL; /* Unused, but keep compilers happy */
3561 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
a6ec74c1 3562 while (fromlen > 0) {
fc241834 3563 U8 *end;
a6ec74c1 3564 I32 todo;
fc241834 3565 U8 hunk[1+63/3*4+1];
a6ec74c1 3566
eb160463 3567 if ((I32)fromlen > len)
a6ec74c1
JH
3568 todo = len;
3569 else
3570 todo = fromlen;
fc241834
RGS
3571 if (from_utf8) {
3572 char buffer[64];
3573 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3574 'u' | TYPE_IS_PACK)) {
3575 *cur = '\0';
b162af07 3576 SvCUR_set(cat, cur - start);
fc241834
RGS
3577 Perl_croak(aTHX_ "Assertion: string is shorter than advertised");
3578 }
3579 end = doencodes(hunk, buffer, todo);
3580 } else {
3581 end = doencodes(hunk, aptr, todo);
3582 aptr += todo;
3583 }
3584 PUSH_BYTES(utf8, cur, hunk, end-hunk);
3585 fromlen -= todo;
3586 }
a6ec74c1
JH
3587 break;
3588 }
f337b084
TH
3589 }
3590 *cur = '\0';
b162af07 3591 SvCUR_set(cat, cur - start);
f337b084 3592 no_change:
49704364 3593 *symptr = lookahead;
a6ec74c1 3594 }
49704364 3595 return beglist;
18529408
IZ
3596}
3597#undef NEXTFROM
3598
3599
3600PP(pp_pack)
3601{
3602 dSP; dMARK; dORIGMARK; dTARGET;
3603 register SV *cat = TARG;
3604 STRLEN fromlen;
349d4f2f
NC
3605 SV *pat_sv = *++MARK;
3606 register const char *pat = SvPV_const(pat_sv, fromlen);
f7fe979e 3607 register const char *patend = pat + fromlen;
18529408
IZ
3608
3609 MARK++;
3610 sv_setpvn(cat, "", 0);
f337b084 3611 SvUTF8_off(cat);
18529408 3612
7accc089 3613 packlist(cat, pat, patend, MARK, SP + 1);
18529408 3614
a6ec74c1
JH
3615 SvSETMAGIC(cat);
3616 SP = ORIGMARK;
3617 PUSHs(cat);
3618 RETURN;
3619}
a6ec74c1 3620
73cb7263
NC
3621/*
3622 * Local variables:
3623 * c-indentation-style: bsd
3624 * c-basic-offset: 4
3625 * indent-tabs-mode: t
3626 * End:
3627 *
37442d52
RGS
3628 * ex: set ts=8 sts=4 sw=4 noet:
3629 */