This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
perlfunc: clarify that "do $file" isn't really like "eval `cat $file`"
[perl5.git] / pp_pack.c
CommitLineData
a6ec74c1
JH
1/* pp_pack.c
2 *
1129b882
NC
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
a6ec74c1
JH
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
9 */
10
d31a8517
AT
11/*
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
16 * some salt.
4ac71550
TC
17 *
18 * [p.653 of _The Lord of the Rings_, IV/iv: "Of Herbs and Stewed Rabbit"]
d31a8517
AT
19 */
20
166f8a29
DM
21/* This file contains pp ("push/pop") functions that
22 * execute the opcodes that make up a perl program. A typical pp function
23 * expects to find its arguments on the stack, and usually pushes its
24 * results onto the stack, hence the 'pp' terminology. Each OP structure
25 * contains a pointer to the relevant pp_foo() function.
26 *
27 * This particular file just contains pp_pack() and pp_unpack(). See the
28 * other pp*.c files for the rest of the pp_ functions.
29 */
30
a6ec74c1
JH
31#include "EXTERN.h"
32#define PERL_IN_PP_PACK_C
33#include "perl.h"
34
f7fe979e
AL
35/* Types used by pack/unpack */
36typedef enum {
37 e_no_len, /* no length */
38 e_number, /* number, [] */
39 e_star /* asterisk */
40} howlen_t;
41
42typedef struct tempsym {
43 const char* patptr; /* current template char */
44 const char* patend; /* one after last char */
45 const char* grpbeg; /* 1st char of ()-group */
46 const char* grpend; /* end of ()-group */
47 I32 code; /* template code (!<>) */
48 I32 length; /* length/repeat count */
49 howlen_t howlen; /* how length is given */
50 int level; /* () nesting level */
51 U32 flags; /* /=4, comma=2, pack=1 */
52 /* and group modifiers */
53 STRLEN strbeg; /* offset of group start */
54 struct tempsym *previous; /* previous group */
55} tempsym_t;
56
57#define TEMPSYM_INIT(symptr, p, e, f) \
58 STMT_START { \
59 (symptr)->patptr = (p); \
60 (symptr)->patend = (e); \
61 (symptr)->grpbeg = NULL; \
62 (symptr)->grpend = NULL; \
63 (symptr)->grpend = NULL; \
64 (symptr)->code = 0; \
65 (symptr)->length = 0; \
10edeb5d 66 (symptr)->howlen = e_no_len; \
f7fe979e
AL
67 (symptr)->level = 0; \
68 (symptr)->flags = (f); \
69 (symptr)->strbeg = 0; \
70 (symptr)->previous = NULL; \
71 } STMT_END
72
275663fa
TC
73typedef union {
74 NV nv;
75 U8 bytes[sizeof(NV)];
76} NV_bytes;
77
78#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
79typedef union {
80 long double ld;
81 U8 bytes[sizeof(long double)];
82} ld_bytes;
83#endif
84
7212898e 85#if PERL_VERSION >= 9
f337b084
TH
86# define PERL_PACK_CAN_BYTEORDER
87# define PERL_PACK_CAN_SHRIEKSIGN
88#endif
89
90#ifndef CHAR_BIT
91# define CHAR_BIT 8
7212898e 92#endif
3473cf63
RGS
93/* Maximum number of bytes to which a byte can grow due to upgrade */
94#define UTF8_EXPAND 2
7212898e 95
a6ec74c1 96/*
a6ec74c1
JH
97 * Offset for integer pack/unpack.
98 *
99 * On architectures where I16 and I32 aren't really 16 and 32 bits,
100 * which for now are all Crays, pack and unpack have to play games.
101 */
102
103/*
104 * These values are required for portability of pack() output.
105 * If they're not right on your machine, then pack() and unpack()
106 * wouldn't work right anyway; you'll need to apply the Cray hack.
107 * (I'd like to check them with #if, but you can't use sizeof() in
108 * the preprocessor.) --???
109 */
110/*
111 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
112 defines are now in config.h. --Andy Dougherty April 1998
113 */
114#define SIZE16 2
115#define SIZE32 4
116
117/* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
118 --jhi Feb 1999 */
119
1109a392
MHM
120#if U16SIZE > SIZE16 || U32SIZE > SIZE32
121# if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */
08ca2aa3
TH
122# define OFF16(p) ((char*)(p))
123# define OFF32(p) ((char*)(p))
a6ec74c1 124# else
1109a392 125# if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */
a6ec74c1
JH
126# define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
127# define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
128# else
08ca2aa3 129 ++++ bad cray byte order
a6ec74c1
JH
130# endif
131# endif
a6ec74c1 132#else
08ca2aa3
TH
133# define OFF16(p) ((char *) (p))
134# define OFF32(p) ((char *) (p))
a6ec74c1
JH
135#endif
136
f337b084
TH
137/* Only to be used inside a loop (see the break) */
138#define SHIFT16(utf8, s, strend, p, datumtype) STMT_START { \
139 if (utf8) { \
140 if (!uni_to_bytes(aTHX_ &(s), strend, OFF16(p), SIZE16, datumtype)) break; \
141 } else { \
142 Copy(s, OFF16(p), SIZE16, char); \
143 (s) += SIZE16; \
144 } \
145} STMT_END
146
147/* Only to be used inside a loop (see the break) */
148#define SHIFT32(utf8, s, strend, p, datumtype) STMT_START { \
149 if (utf8) { \
150 if (!uni_to_bytes(aTHX_ &(s), strend, OFF32(p), SIZE32, datumtype)) break; \
151 } else { \
152 Copy(s, OFF32(p), SIZE32, char); \
153 (s) += SIZE32; \
154 } \
155} STMT_END
156
157#define PUSH16(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF16(p), SIZE16)
158#define PUSH32(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF32(p), SIZE32)
08ca2aa3
TH
159
160/* Only to be used inside a loop (see the break) */
275663fa 161#define SHIFT_BYTES(utf8, s, strend, buf, len, datumtype) \
08ca2aa3
TH
162STMT_START { \
163 if (utf8) { \
f337b084 164 if (!uni_to_bytes(aTHX_ &s, strend, \
275663fa 165 (char *) (buf), len, datumtype)) break; \
08ca2aa3 166 } else { \
275663fa
TC
167 Copy(s, (char *) (buf), len, char); \
168 s += len; \
08ca2aa3 169 } \
08ca2aa3
TH
170} STMT_END
171
275663fa
TC
172#define SHIFT_VAR(utf8, s, strend, var, datumtype) \
173 SHIFT_BYTES(utf8, s, strend, &(var), sizeof(var), datumtype)
174
f337b084 175#define PUSH_VAR(utf8, aptr, var) \
230e1fce 176 PUSH_BYTES(utf8, aptr, &(var), sizeof(var))
f337b084 177
49704364
WL
178/* Avoid stack overflow due to pathological templates. 100 should be plenty. */
179#define MAX_SUB_TEMPLATE_LEVEL 100
180
66c611c5 181/* flags (note that type modifiers can also be used as flags!) */
f337b084
TH
182#define FLAG_WAS_UTF8 0x40
183#define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */
49704364 184#define FLAG_UNPACK_ONLY_ONE 0x10
f337b084 185#define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */
49704364
WL
186#define FLAG_SLASH 0x04
187#define FLAG_COMMA 0x02
188#define FLAG_PACK 0x01
189
a6ec74c1
JH
190STATIC SV *
191S_mul128(pTHX_ SV *sv, U8 m)
192{
193 STRLEN len;
194 char *s = SvPV(sv, len);
195 char *t;
a6ec74c1 196
7918f24d
NC
197 PERL_ARGS_ASSERT_MUL128;
198
a6ec74c1 199 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */
396482e1 200 SV * const tmpNew = newSVpvs("0000000000");
a6ec74c1
JH
201
202 sv_catsv(tmpNew, sv);
203 SvREFCNT_dec(sv); /* free old sv */
204 sv = tmpNew;
205 s = SvPV(sv, len);
206 }
207 t = s + len - 1;
208 while (!*t) /* trailing '\0'? */
209 t--;
210 while (t > s) {
f7fe979e 211 const U32 i = ((*t - '0') << 7) + m;
eb160463
GS
212 *(t--) = '0' + (char)(i % 10);
213 m = (char)(i / 10);
a6ec74c1
JH
214 }
215 return (sv);
216}
217
218/* Explosives and implosives. */
219
220#if 'I' == 73 && 'J' == 74
221/* On an ASCII/ISO kind of system */
222#define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a')
223#else
224/*
225 Some other sort of character set - use memchr() so we don't match
226 the null byte.
227 */
228#define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
229#endif
230
66c611c5 231/* type modifiers */
62f95557 232#define TYPE_IS_SHRIEKING 0x100
1109a392
MHM
233#define TYPE_IS_BIG_ENDIAN 0x200
234#define TYPE_IS_LITTLE_ENDIAN 0x400
f337b084 235#define TYPE_IS_PACK 0x800
1109a392 236#define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
66c611c5 237#define TYPE_MODIFIERS(t) ((t) & ~0xFF)
1109a392
MHM
238#define TYPE_NO_MODIFIERS(t) ((t) & 0xFF)
239
7212898e 240#ifdef PERL_PACK_CAN_SHRIEKSIGN
28be1210 241# define SHRIEKING_ALLOWED_TYPES "sSiIlLxXnNvV@."
7212898e 242#else
28be1210 243# define SHRIEKING_ALLOWED_TYPES "sSiIlLxX"
7212898e
NC
244#endif
245
246#ifndef PERL_PACK_CAN_BYTEORDER
247/* Put "can't" first because it is shorter */
248# define TYPE_ENDIANNESS(t) 0
249# define TYPE_NO_ENDIANNESS(t) (t)
250
251# define ENDIANNESS_ALLOWED_TYPES ""
252
253# define DO_BO_UNPACK(var, type)
254# define DO_BO_PACK(var, type)
07409e01
NC
255# define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast)
256# define DO_BO_PACK_PTR(var, type, pre_cast, post_cast)
7212898e
NC
257# define DO_BO_UNPACK_N(var, type)
258# define DO_BO_PACK_N(var, type)
259# define DO_BO_UNPACK_P(var)
260# define DO_BO_PACK_P(var)
45f723e1
NC
261# define DO_BO_UNPACK_PC(var)
262# define DO_BO_PACK_PC(var)
66c611c5 263
f337b084 264#else /* PERL_PACK_CAN_BYTEORDER */
7212898e
NC
265
266# define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK)
267# define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
268
269# define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP("
270
271# define DO_BO_UNPACK(var, type) \
1109a392 272 STMT_START { \
66c611c5 273 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
274 case TYPE_IS_BIG_ENDIAN: var = my_betoh ## type (var); break; \
275 case TYPE_IS_LITTLE_ENDIAN: var = my_letoh ## type (var); break; \
276 default: break; \
277 } \
278 } STMT_END
279
7212898e 280# define DO_BO_PACK(var, type) \
1109a392 281 STMT_START { \
66c611c5 282 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
283 case TYPE_IS_BIG_ENDIAN: var = my_htobe ## type (var); break; \
284 case TYPE_IS_LITTLE_ENDIAN: var = my_htole ## type (var); break; \
285 default: break; \
286 } \
287 } STMT_END
288
07409e01 289# define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast) \
1109a392 290 STMT_START { \
66c611c5 291 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392 292 case TYPE_IS_BIG_ENDIAN: \
07409e01 293 var = (post_cast*) my_betoh ## type ((pre_cast) var); \
1109a392
MHM
294 break; \
295 case TYPE_IS_LITTLE_ENDIAN: \
07409e01 296 var = (post_cast *) my_letoh ## type ((pre_cast) var); \
1109a392
MHM
297 break; \
298 default: \
299 break; \
300 } \
301 } STMT_END
302
07409e01 303# define DO_BO_PACK_PTR(var, type, pre_cast, post_cast) \
1109a392 304 STMT_START { \
66c611c5 305 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392 306 case TYPE_IS_BIG_ENDIAN: \
07409e01 307 var = (post_cast *) my_htobe ## type ((pre_cast) var); \
1109a392
MHM
308 break; \
309 case TYPE_IS_LITTLE_ENDIAN: \
07409e01 310 var = (post_cast *) my_htole ## type ((pre_cast) var); \
1109a392
MHM
311 break; \
312 default: \
313 break; \
314 } \
315 } STMT_END
316
7212898e 317# define BO_CANT_DOIT(action, type) \
66c611c5
MHM
318 STMT_START { \
319 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
320 case TYPE_IS_BIG_ENDIAN: \
321 Perl_croak(aTHX_ "Can't %s big-endian %ss on this " \
322 "platform", #action, #type); \
323 break; \
324 case TYPE_IS_LITTLE_ENDIAN: \
325 Perl_croak(aTHX_ "Can't %s little-endian %ss on this " \
326 "platform", #action, #type); \
327 break; \
328 default: \
329 break; \
330 } \
331 } STMT_END
332
7212898e 333# if PTRSIZE == INTSIZE
07409e01
NC
334# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, i, int, void)
335# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, i, int, void)
336# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, i, int, char)
337# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, i, int, char)
7212898e 338# elif PTRSIZE == LONGSIZE
fa58a56f
S
339# if LONGSIZE < IVSIZE && IVSIZE == 8
340# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, 64, IV, void)
341# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, 64, IV, void)
342# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, 64, IV, char)
343# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, 64, IV, char)
344# else
345# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, IV, void)
346# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, IV, void)
347# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, IV, char)
348# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, IV, char)
349# endif
2b00a750
JD
350# elif PTRSIZE == IVSIZE
351# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, IV, void)
352# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, IV, void)
353# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, IV, char)
354# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, IV, char)
7212898e
NC
355# else
356# define DO_BO_UNPACK_P(var) BO_CANT_DOIT(unpack, pointer)
357# define DO_BO_PACK_P(var) BO_CANT_DOIT(pack, pointer)
45f723e1
NC
358# define DO_BO_UNPACK_PC(var) BO_CANT_DOIT(unpack, pointer)
359# define DO_BO_PACK_PC(var) BO_CANT_DOIT(pack, pointer)
7212898e 360# endif
1109a392 361
7212898e 362# if defined(my_htolen) && defined(my_letohn) && \
1109a392 363 defined(my_htoben) && defined(my_betohn)
7212898e 364# define DO_BO_UNPACK_N(var, type) \
1109a392 365 STMT_START { \
66c611c5 366 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
367 case TYPE_IS_BIG_ENDIAN: my_betohn(&var, sizeof(type)); break;\
368 case TYPE_IS_LITTLE_ENDIAN: my_letohn(&var, sizeof(type)); break;\
369 default: break; \
370 } \
371 } STMT_END
372
7212898e 373# define DO_BO_PACK_N(var, type) \
1109a392 374 STMT_START { \
66c611c5 375 switch (TYPE_ENDIANNESS(datumtype)) { \
1109a392
MHM
376 case TYPE_IS_BIG_ENDIAN: my_htoben(&var, sizeof(type)); break;\
377 case TYPE_IS_LITTLE_ENDIAN: my_htolen(&var, sizeof(type)); break;\
378 default: break; \
379 } \
380 } STMT_END
7212898e
NC
381# else
382# define DO_BO_UNPACK_N(var, type) BO_CANT_DOIT(unpack, type)
383# define DO_BO_PACK_N(var, type) BO_CANT_DOIT(pack, type)
384# endif
385
f337b084 386#endif /* PERL_PACK_CAN_BYTEORDER */
62f95557 387
78d46eaa 388#define PACK_SIZE_CANNOT_CSUM 0x80
f337b084 389#define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */
78d46eaa
NC
390#define PACK_SIZE_MASK 0x3F
391
78d46eaa
NC
392/* These tables are regenerated by genpacksizetables.pl (and then hand pasted
393 in). You're unlikely ever to need to regenerate them. */
a7a3cfaa
TH
394
395#if TYPE_IS_SHRIEKING != 0x100
396 ++++shriek offset should be 256
397#endif
398
399typedef U8 packprops_t;
78d46eaa
NC
400#if 'J'-'I' == 1
401/* ASCII */
f43a9a31 402STATIC const packprops_t packprops[512] = {
a7a3cfaa
TH
403 /* normal */
404 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
405 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
406 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
407 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
408 0, 0, 0,
1651fc44 409 /* C */ sizeof(unsigned char),
80a13697 410#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
a7a3cfaa 411 /* D */ LONG_DOUBLESIZE,
80a13697 412#else
a7a3cfaa 413 0,
80a13697 414#endif
a7a3cfaa
TH
415 0,
416 /* F */ NVSIZE,
417 0, 0,
418 /* I */ sizeof(unsigned int),
419 /* J */ UVSIZE,
420 0,
421 /* L */ SIZE32,
422 0,
423 /* N */ SIZE32,
424 0, 0,
80a13697 425#if defined(HAS_QUAD)
a7a3cfaa 426 /* Q */ sizeof(Uquad_t),
80a13697 427#else
a7a3cfaa 428 0,
80a13697 429#endif
a7a3cfaa
TH
430 0,
431 /* S */ SIZE16,
432 0,
433 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
434 /* V */ SIZE32,
435 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
436 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
437 /* c */ sizeof(char),
438 /* d */ sizeof(double),
439 0,
440 /* f */ sizeof(float),
441 0, 0,
442 /* i */ sizeof(int),
443 /* j */ IVSIZE,
444 0,
445 /* l */ SIZE32,
446 0,
447 /* n */ SIZE16,
448 0,
449 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
80a13697 450#if defined(HAS_QUAD)
a7a3cfaa 451 /* q */ sizeof(Quad_t),
80a13697 452#else
a7a3cfaa 453 0,
80a13697 454#endif
a7a3cfaa
TH
455 0,
456 /* s */ SIZE16,
457 0, 0,
458 /* v */ SIZE16,
459 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
460 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
461 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
462 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
463 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
464 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
465 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
466 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
467 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
468 0, 0, 0, 0, 0, 0, 0, 0,
469 /* shrieking */
470 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
471 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
472 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
473 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
474 0, 0, 0, 0, 0, 0, 0, 0, 0,
475 /* I */ sizeof(unsigned int),
476 0, 0,
477 /* L */ sizeof(unsigned long),
478 0,
7212898e 479#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 480 /* N */ SIZE32,
7212898e 481#else
a7a3cfaa 482 0,
7212898e 483#endif
a7a3cfaa
TH
484 0, 0, 0, 0,
485 /* S */ sizeof(unsigned short),
486 0, 0,
7212898e 487#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 488 /* V */ SIZE32,
7212898e 489#else
a7a3cfaa 490 0,
7212898e 491#endif
a7a3cfaa
TH
492 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
493 0, 0,
494 /* i */ sizeof(int),
495 0, 0,
496 /* l */ sizeof(long),
497 0,
7212898e 498#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 499 /* n */ SIZE16,
7212898e 500#else
a7a3cfaa 501 0,
7212898e 502#endif
a7a3cfaa
TH
503 0, 0, 0, 0,
504 /* s */ sizeof(short),
505 0, 0,
7212898e 506#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 507 /* v */ SIZE16,
7212898e 508#else
a7a3cfaa 509 0,
7212898e 510#endif
a7a3cfaa
TH
511 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
512 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
513 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
514 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
515 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
516 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
517 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
518 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
519 0, 0, 0, 0, 0, 0, 0, 0, 0
78d46eaa
NC
520};
521#else
522/* EBCDIC (or bust) */
f43a9a31 523STATIC const packprops_t packprops[512] = {
a7a3cfaa
TH
524 /* normal */
525 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
526 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
527 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
528 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
529 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
530 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
531 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
532 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
533 0, 0, 0,
534 /* c */ sizeof(char),
535 /* d */ sizeof(double),
536 0,
537 /* f */ sizeof(float),
538 0, 0,
539 /* i */ sizeof(int),
540 0, 0, 0, 0, 0, 0, 0,
541 /* j */ IVSIZE,
542 0,
543 /* l */ SIZE32,
544 0,
545 /* n */ SIZE16,
546 0,
547 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
80a13697 548#if defined(HAS_QUAD)
a7a3cfaa 549 /* q */ sizeof(Quad_t),
80a13697 550#else
a7a3cfaa 551 0,
80a13697 552#endif
a7a3cfaa
TH
553 0, 0, 0, 0, 0, 0, 0, 0, 0,
554 /* s */ SIZE16,
555 0, 0,
556 /* v */ SIZE16,
557 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
558 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
559 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1651fc44 560 /* C */ sizeof(unsigned char),
80a13697 561#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
a7a3cfaa 562 /* D */ LONG_DOUBLESIZE,
80a13697 563#else
a7a3cfaa 564 0,
80a13697 565#endif
a7a3cfaa
TH
566 0,
567 /* F */ NVSIZE,
568 0, 0,
569 /* I */ sizeof(unsigned int),
570 0, 0, 0, 0, 0, 0, 0,
571 /* J */ UVSIZE,
572 0,
573 /* L */ SIZE32,
574 0,
575 /* N */ SIZE32,
576 0, 0,
80a13697 577#if defined(HAS_QUAD)
a7a3cfaa 578 /* Q */ sizeof(Uquad_t),
80a13697 579#else
a7a3cfaa 580 0,
80a13697 581#endif
a7a3cfaa
TH
582 0, 0, 0, 0, 0, 0, 0, 0, 0,
583 /* S */ SIZE16,
584 0,
585 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
586 /* V */ SIZE32,
587 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
588 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
589 0, 0, 0, 0, 0, 0, 0, 0, 0,
590 /* shrieking */
591 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
592 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
593 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
594 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
595 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
596 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
597 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
598 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
599 0, 0, 0, 0, 0, 0, 0, 0, 0,
600 /* i */ sizeof(int),
601 0, 0, 0, 0, 0, 0, 0, 0, 0,
602 /* l */ sizeof(long),
603 0,
7212898e 604#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 605 /* n */ SIZE16,
7212898e 606#else
a7a3cfaa 607 0,
7212898e 608#endif
a7a3cfaa
TH
609 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
610 /* s */ sizeof(short),
611 0, 0,
7212898e 612#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 613 /* v */ SIZE16,
7212898e 614#else
a7a3cfaa 615 0,
7212898e 616#endif
a7a3cfaa
TH
617 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
618 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
619 0, 0, 0,
620 /* I */ sizeof(unsigned int),
621 0, 0, 0, 0, 0, 0, 0, 0, 0,
622 /* L */ sizeof(unsigned long),
623 0,
7212898e 624#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 625 /* N */ SIZE32,
7212898e 626#else
a7a3cfaa 627 0,
7212898e 628#endif
a7a3cfaa
TH
629 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
630 /* S */ sizeof(unsigned short),
631 0, 0,
7212898e 632#if defined(PERL_PACK_CAN_SHRIEKSIGN)
a7a3cfaa 633 /* V */ SIZE32,
7212898e 634#else
a7a3cfaa 635 0,
7212898e 636#endif
a7a3cfaa
TH
637 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
638 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
78d46eaa
NC
639};
640#endif
641
08ca2aa3 642STATIC U8
f7fe979e 643uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
08ca2aa3 644{
08ca2aa3 645 STRLEN retlen;
0bcc34c2 646 UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
f337b084 647 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
486ec47a 648 /* We try to process malformed UTF-8 as much as possible (preferably with
08ca2aa3
TH
649 warnings), but these two mean we make no progress in the string and
650 might enter an infinite loop */
651 if (retlen == (STRLEN) -1 || retlen == 0)
f337b084
TH
652 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
653 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3 654 if (val >= 0x100) {
a2a5de95
NC
655 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
656 "Character in '%c' format wrapped in unpack",
657 (int) TYPE_NO_MODIFIERS(datumtype));
08ca2aa3
TH
658 val &= 0xff;
659 }
660 *s += retlen;
fe2774ed 661 return (U8)val;
08ca2aa3
TH
662}
663
f337b084
TH
664#define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
665 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
08ca2aa3
TH
666 *(U8 *)(s)++)
667
668STATIC bool
f7fe979e 669uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
08ca2aa3
TH
670{
671 UV val;
672 STRLEN retlen;
f7fe979e 673 const char *from = *s;
08ca2aa3 674 int bad = 0;
f7fe979e 675 const U32 flags = ckWARN(WARN_UTF8) ?
08ca2aa3
TH
676 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
677 for (;buf_len > 0; buf_len--) {
678 if (from >= end) return FALSE;
f337b084 679 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
08ca2aa3
TH
680 if (retlen == (STRLEN) -1 || retlen == 0) {
681 from += UTF8SKIP(from);
682 bad |= 1;
683 } else from += retlen;
684 if (val >= 0x100) {
685 bad |= 2;
686 val &= 0xff;
687 }
fe2774ed 688 *(U8 *)buf++ = (U8)val;
08ca2aa3
TH
689 }
690 /* We have enough characters for the buffer. Did we have problems ? */
691 if (bad) {
692 if (bad & 1) {
693 /* Rewalk the string fragment while warning */
f7fe979e 694 const char *ptr;
9e27e96a 695 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
08ca2aa3
TH
696 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
697 if (ptr >= end) break;
f337b084 698 utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
08ca2aa3
TH
699 }
700 if (from > end) from = end;
701 }
a2a5de95
NC
702 if ((bad & 2))
703 Perl_ck_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
f337b084 704 WARN_PACK : WARN_UNPACK),
a2a5de95
NC
705 "Character(s) in '%c' format wrapped in %s",
706 (int) TYPE_NO_MODIFIERS(datumtype),
707 datumtype & TYPE_IS_PACK ? "pack" : "unpack");
08ca2aa3
TH
708 }
709 *s = from;
710 return TRUE;
711}
712
713STATIC bool
f7fe979e 714next_uni_uu(pTHX_ const char **s, const char *end, I32 *out)
08ca2aa3 715{
97aff369 716 dVAR;
08ca2aa3 717 STRLEN retlen;
0bcc34c2 718 const UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
08ca2aa3
TH
719 if (val >= 0x100 || !ISUUCHAR(val) ||
720 retlen == (STRLEN) -1 || retlen == 0) {
721 *out = 0;
722 return FALSE;
723 }
724 *out = PL_uudmap[val] & 077;
f337b084 725 *s += retlen;
08ca2aa3
TH
726 return TRUE;
727}
78d46eaa 728
64844641 729STATIC char *
14333449 730S_bytes_to_uni(const U8 *start, STRLEN len, char *dest) {
0bcc34c2 731 const U8 * const end = start + len;
64844641 732
7918f24d
NC
733 PERL_ARGS_ASSERT_BYTES_TO_UNI;
734
f337b084 735 while (start < end) {
48fa4626
TS
736 const UV uv = NATIVE_TO_ASCII(*start);
737 if (UNI_IS_INVARIANT(uv))
738 *dest++ = (char)(U8)UTF_TO_NATIVE(uv);
739 else {
740 *dest++ = (char)(U8)UTF8_EIGHT_BIT_HI(uv);
741 *dest++ = (char)(U8)UTF8_EIGHT_BIT_LO(uv);
f337b084
TH
742 }
743 start++;
744 }
64844641 745 return dest;
f337b084
TH
746}
747
230e1fce
NC
748#define PUSH_BYTES(utf8, cur, buf, len) \
749STMT_START { \
64844641
AL
750 if (utf8) \
751 (cur) = bytes_to_uni((U8 *) buf, len, (cur)); \
230e1fce
NC
752 else { \
753 Copy(buf, cur, len, char); \
754 (cur) += (len); \
755 } \
f337b084
TH
756} STMT_END
757
758#define GROWING(utf8, cat, start, cur, in_len) \
759STMT_START { \
760 STRLEN glen = (in_len); \
3473cf63 761 if (utf8) glen *= UTF8_EXPAND; \
f337b084 762 if ((cur) + glen >= (start) + SvLEN(cat)) { \
0bd48802 763 (start) = sv_exp_grow(cat, glen); \
f337b084
TH
764 (cur) = (start) + SvCUR(cat); \
765 } \
766} STMT_END
767
768#define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
769STMT_START { \
f7fe979e 770 const STRLEN glen = (in_len); \
f337b084 771 STRLEN gl = glen; \
3473cf63 772 if (utf8) gl *= UTF8_EXPAND; \
f337b084
TH
773 if ((cur) + gl >= (start) + SvLEN(cat)) { \
774 *cur = '\0'; \
b162af07 775 SvCUR_set((cat), (cur) - (start)); \
0bd48802 776 (start) = sv_exp_grow(cat, gl); \
f337b084
TH
777 (cur) = (start) + SvCUR(cat); \
778 } \
779 PUSH_BYTES(utf8, cur, buf, glen); \
780} STMT_END
781
782#define PUSH_BYTE(utf8, s, byte) \
783STMT_START { \
784 if (utf8) { \
f7fe979e 785 const U8 au8 = (byte); \
64844641 786 (s) = bytes_to_uni(&au8, 1, (s)); \
f337b084
TH
787 } else *(U8 *)(s)++ = (byte); \
788} STMT_END
789
790/* Only to be used inside a loop (see the break) */
791#define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
792STMT_START { \
793 STRLEN retlen; \
794 if (str >= end) break; \
795 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
796 if (retlen == (STRLEN) -1 || retlen == 0) { \
797 *cur = '\0'; \
798 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \
799 } \
800 str += retlen; \
801} STMT_END
802
f7fe979e
AL
803static const char *_action( const tempsym_t* symptr )
804{
10edeb5d 805 return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
f7fe979e
AL
806}
807
206947d2 808/* Returns the sizeof() struct described by pat */
028d1f6d 809STATIC I32
f337b084 810S_measure_struct(pTHX_ tempsym_t* symptr)
206947d2 811{
f337b084 812 I32 total = 0;
206947d2 813
7918f24d
NC
814 PERL_ARGS_ASSERT_MEASURE_STRUCT;
815
49704364 816 while (next_symbol(symptr)) {
f337b084 817 I32 len;
f7fe979e 818 int size;
f337b084
TH
819
820 switch (symptr->howlen) {
fc241834 821 case e_star:
49704364 822 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
f7fe979e 823 _action( symptr ) );
49704364 824 break;
f337b084
TH
825 default:
826 /* e_no_len and e_number */
827 len = symptr->length;
828 break;
49704364
WL
829 }
830
a7a3cfaa 831 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
80a13697 832 if (!size) {
f7fe979e 833 int star;
80a13697
NC
834 /* endianness doesn't influence the size of a type */
835 switch(TYPE_NO_ENDIANNESS(symptr->code)) {
836 default:
837 Perl_croak(aTHX_ "Invalid type '%c' in %s",
838 (int)TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 839 _action( symptr ) );
28be1210
TH
840#ifdef PERL_PACK_CAN_SHRIEKSIGN
841 case '.' | TYPE_IS_SHRIEKING:
842 case '@' | TYPE_IS_SHRIEKING:
843#endif
80a13697 844 case '@':
28be1210 845 case '.':
80a13697
NC
846 case '/':
847 case 'U': /* XXXX Is it correct? */
848 case 'w':
849 case 'u':
850 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
28be1210 851 (int) TYPE_NO_MODIFIERS(symptr->code),
f7fe979e 852 _action( symptr ) );
80a13697
NC
853 case '%':
854 size = 0;
855 break;
856 case '(':
fc241834
RGS
857 {
858 tempsym_t savsym = *symptr;
859 symptr->patptr = savsym.grpbeg;
860 symptr->patend = savsym.grpend;
861 /* XXXX Theoretically, we need to measure many times at
862 different positions, since the subexpression may contain
863 alignment commands, but be not of aligned length.
864 Need to detect this and croak(). */
865 size = measure_struct(symptr);
866 *symptr = savsym;
867 break;
868 }
80a13697
NC
869 case 'X' | TYPE_IS_SHRIEKING:
870 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS.
871 */
872 if (!len) /* Avoid division by 0 */
873 len = 1;
874 len = total % len; /* Assumed: the start is aligned. */
875 /* FALL THROUGH */
876 case 'X':
877 size = -1;
878 if (total < len)
f7fe979e 879 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
80a13697
NC
880 break;
881 case 'x' | TYPE_IS_SHRIEKING:
882 if (!len) /* Avoid division by 0 */
883 len = 1;
884 star = total % len; /* Assumed: the start is aligned. */
885 if (star) /* Other portable ways? */
886 len = len - star;
887 else
888 len = 0;
889 /* FALL THROUGH */
890 case 'x':
891 case 'A':
892 case 'Z':
893 case 'a':
80a13697
NC
894 size = 1;
895 break;
896 case 'B':
897 case 'b':
898 len = (len + 7)/8;
899 size = 1;
900 break;
901 case 'H':
902 case 'h':
903 len = (len + 1)/2;
904 size = 1;
905 break;
78d46eaa 906
80a13697
NC
907 case 'P':
908 len = 1;
909 size = sizeof(char*);
78d46eaa
NC
910 break;
911 }
206947d2
IZ
912 }
913 total += len * size;
914 }
915 return total;
916}
917
49704364
WL
918
919/* locate matching closing parenthesis or bracket
920 * returns char pointer to char after match, or NULL
921 */
f7fe979e 922STATIC const char *
5aaab254 923S_group_end(pTHX_ const char *patptr, const char *patend, char ender)
18529408 924{
7918f24d
NC
925 PERL_ARGS_ASSERT_GROUP_END;
926
49704364 927 while (patptr < patend) {
f7fe979e 928 const char c = *patptr++;
49704364
WL
929
930 if (isSPACE(c))
931 continue;
932 else if (c == ender)
933 return patptr-1;
934 else if (c == '#') {
935 while (patptr < patend && *patptr != '\n')
936 patptr++;
937 continue;
938 } else if (c == '(')
939 patptr = group_end(patptr, patend, ')') + 1;
940 else if (c == '[')
941 patptr = group_end(patptr, patend, ']') + 1;
18529408 942 }
49704364
WL
943 Perl_croak(aTHX_ "No group ending character '%c' found in template",
944 ender);
945 return 0;
18529408
IZ
946}
947
49704364
WL
948
949/* Convert unsigned decimal number to binary.
950 * Expects a pointer to the first digit and address of length variable
951 * Advances char pointer to 1st non-digit char and returns number
fc241834 952 */
f7fe979e 953STATIC const char *
5aaab254 954S_get_num(pTHX_ const char *patptr, I32 *lenptr )
49704364
WL
955{
956 I32 len = *patptr++ - '0';
7918f24d
NC
957
958 PERL_ARGS_ASSERT_GET_NUM;
959
49704364
WL
960 while (isDIGIT(*patptr)) {
961 if (len >= 0x7FFFFFFF/10)
962 Perl_croak(aTHX_ "pack/unpack repeat count overflow");
963 len = (len * 10) + (*patptr++ - '0');
964 }
965 *lenptr = len;
966 return patptr;
967}
968
969/* The marvellous template parsing routine: Using state stored in *symptr,
970 * locates next template code and count
971 */
972STATIC bool
f337b084 973S_next_symbol(pTHX_ tempsym_t* symptr )
18529408 974{
f7fe979e 975 const char* patptr = symptr->patptr;
0bcc34c2 976 const char* const patend = symptr->patend;
49704364 977
7918f24d
NC
978 PERL_ARGS_ASSERT_NEXT_SYMBOL;
979
49704364
WL
980 symptr->flags &= ~FLAG_SLASH;
981
982 while (patptr < patend) {
983 if (isSPACE(*patptr))
984 patptr++;
985 else if (*patptr == '#') {
986 patptr++;
987 while (patptr < patend && *patptr != '\n')
988 patptr++;
989 if (patptr < patend)
990 patptr++;
991 } else {
fc241834 992 /* We should have found a template code */
49704364 993 I32 code = *patptr++ & 0xFF;
66c611c5 994 U32 inherited_modifiers = 0;
49704364
WL
995
996 if (code == ','){ /* grandfather in commas but with a warning */
997 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
998 symptr->flags |= FLAG_COMMA;
999 Perl_warner(aTHX_ packWARN(WARN_UNPACK),
f7fe979e 1000 "Invalid type ',' in %s", _action( symptr ) );
49704364
WL
1001 }
1002 continue;
1003 }
fc241834 1004
49704364 1005 /* for '(', skip to ')' */
fc241834 1006 if (code == '(') {
49704364
WL
1007 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
1008 Perl_croak(aTHX_ "()-group starts with a count in %s",
f7fe979e 1009 _action( symptr ) );
49704364
WL
1010 symptr->grpbeg = patptr;
1011 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
1012 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
1013 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
f7fe979e 1014 _action( symptr ) );
49704364
WL
1015 }
1016
66c611c5
MHM
1017 /* look for group modifiers to inherit */
1018 if (TYPE_ENDIANNESS(symptr->flags)) {
1019 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
1020 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
1021 }
1022
1109a392
MHM
1023 /* look for modifiers */
1024 while (patptr < patend) {
b7787f18
AL
1025 const char *allowed;
1026 I32 modifier;
1109a392
MHM
1027 switch (*patptr) {
1028 case '!':
1029 modifier = TYPE_IS_SHRIEKING;
7212898e 1030 allowed = SHRIEKING_ALLOWED_TYPES;
1109a392 1031 break;
7212898e 1032#ifdef PERL_PACK_CAN_BYTEORDER
1109a392
MHM
1033 case '>':
1034 modifier = TYPE_IS_BIG_ENDIAN;
66c611c5 1035 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392
MHM
1036 break;
1037 case '<':
1038 modifier = TYPE_IS_LITTLE_ENDIAN;
66c611c5 1039 allowed = ENDIANNESS_ALLOWED_TYPES;
1109a392 1040 break;
f337b084 1041#endif /* PERL_PACK_CAN_BYTEORDER */
1109a392 1042 default:
b7787f18
AL
1043 allowed = "";
1044 modifier = 0;
1109a392
MHM
1045 break;
1046 }
66c611c5 1047
1109a392
MHM
1048 if (modifier == 0)
1049 break;
66c611c5 1050
1109a392
MHM
1051 if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
1052 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
f7fe979e 1053 allowed, _action( symptr ) );
66c611c5
MHM
1054
1055 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
1109a392 1056 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
f7fe979e 1057 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
66c611c5
MHM
1058 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
1059 TYPE_ENDIANNESS_MASK)
1060 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
f7fe979e 1061 *patptr, _action( symptr ) );
66c611c5 1062
a2a5de95
NC
1063 if ((code & modifier)) {
1064 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
1065 "Duplicate modifier '%c' after '%c' in %s",
1066 *patptr, (int) TYPE_NO_MODIFIERS(code),
1067 _action( symptr ) );
1109a392 1068 }
66c611c5 1069
1109a392
MHM
1070 code |= modifier;
1071 patptr++;
49704364
WL
1072 }
1073
66c611c5
MHM
1074 /* inherit modifiers */
1075 code |= inherited_modifiers;
1076
fc241834 1077 /* look for count and/or / */
49704364
WL
1078 if (patptr < patend) {
1079 if (isDIGIT(*patptr)) {
1080 patptr = get_num( patptr, &symptr->length );
1081 symptr->howlen = e_number;
1082
1083 } else if (*patptr == '*') {
1084 patptr++;
1085 symptr->howlen = e_star;
1086
1087 } else if (*patptr == '[') {
f7fe979e 1088 const char* lenptr = ++patptr;
49704364
WL
1089 symptr->howlen = e_number;
1090 patptr = group_end( patptr, patend, ']' ) + 1;
1091 /* what kind of [] is it? */
1092 if (isDIGIT(*lenptr)) {
1093 lenptr = get_num( lenptr, &symptr->length );
1094 if( *lenptr != ']' )
1095 Perl_croak(aTHX_ "Malformed integer in [] in %s",
f7fe979e 1096 _action( symptr ) );
49704364
WL
1097 } else {
1098 tempsym_t savsym = *symptr;
1099 symptr->patend = patptr-1;
1100 symptr->patptr = lenptr;
1101 savsym.length = measure_struct(symptr);
1102 *symptr = savsym;
1103 }
1104 } else {
1105 symptr->howlen = e_no_len;
1106 symptr->length = 1;
1107 }
1108
1109 /* try to find / */
1110 while (patptr < patend) {
1111 if (isSPACE(*patptr))
1112 patptr++;
1113 else if (*patptr == '#') {
1114 patptr++;
1115 while (patptr < patend && *patptr != '\n')
1116 patptr++;
1117 if (patptr < patend)
1118 patptr++;
1119 } else {
66c611c5 1120 if (*patptr == '/') {
49704364
WL
1121 symptr->flags |= FLAG_SLASH;
1122 patptr++;
66c611c5
MHM
1123 if (patptr < patend &&
1124 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
49704364 1125 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
f7fe979e 1126 _action( symptr ) );
49704364
WL
1127 }
1128 break;
1129 }
18529408 1130 }
49704364
WL
1131 } else {
1132 /* at end - no count, no / */
1133 symptr->howlen = e_no_len;
1134 symptr->length = 1;
1135 }
1136
1137 symptr->code = code;
fc241834 1138 symptr->patptr = patptr;
49704364 1139 return TRUE;
18529408 1140 }
49704364 1141 }
fc241834 1142 symptr->patptr = patptr;
49704364 1143 return FALSE;
18529408
IZ
1144}
1145
18529408 1146/*
fc241834 1147 There is no way to cleanly handle the case where we should process the
08ca2aa3 1148 string per byte in its upgraded form while it's really in downgraded form
fc241834
RGS
1149 (e.g. estimates like strend-s as an upper bound for the number of
1150 characters left wouldn't work). So if we foresee the need of this
1151 (pattern starts with U or contains U0), we want to work on the encoded
1152 version of the string. Users are advised to upgrade their pack string
08ca2aa3
TH
1153 themselves if they need to do a lot of unpacks like this on it
1154*/
fc241834 1155STATIC bool
08ca2aa3
TH
1156need_utf8(const char *pat, const char *patend)
1157{
1158 bool first = TRUE;
7918f24d
NC
1159
1160 PERL_ARGS_ASSERT_NEED_UTF8;
1161
08ca2aa3
TH
1162 while (pat < patend) {
1163 if (pat[0] == '#') {
1164 pat++;
f7fe979e 1165 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
1166 if (!pat) return FALSE;
1167 } else if (pat[0] == 'U') {
1168 if (first || pat[1] == '0') return TRUE;
1169 } else first = FALSE;
1170 pat++;
1171 }
1172 return FALSE;
1173}
1174
1175STATIC char
1176first_symbol(const char *pat, const char *patend) {
7918f24d
NC
1177 PERL_ARGS_ASSERT_FIRST_SYMBOL;
1178
08ca2aa3
TH
1179 while (pat < patend) {
1180 if (pat[0] != '#') return pat[0];
1181 pat++;
f7fe979e 1182 pat = (const char *) memchr(pat, '\n', patend-pat);
08ca2aa3
TH
1183 if (!pat) return 0;
1184 pat++;
1185 }
1186 return 0;
1187}
1188
1189/*
7accc089
JH
1190=for apidoc unpackstring
1191
608d3aed 1192The engine implementing unpack() Perl function. C<unpackstring> puts the
349c8cee 1193extracted list items on the @_ stack and returns the number of elements.
608d3aed 1194Issue C<PUTBACK> before and C<SPAGAIN> after the call to this function.
349c8cee
DD
1195Unlike L</call_pv>, do not do a C<PUSHMARK>. This function takes no parameters
1196on the @_ stack.
7accc089
JH
1197
1198=cut */
1199
1200I32
f7fe979e 1201Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
7accc089 1202{
f7fe979e 1203 tempsym_t sym;
08ca2aa3 1204
7918f24d
NC
1205 PERL_ARGS_ASSERT_UNPACKSTRING;
1206
f337b084 1207 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
08ca2aa3
TH
1208 else if (need_utf8(pat, patend)) {
1209 /* We probably should try to avoid this in case a scalar context call
1210 wouldn't get to the "U0" */
1211 STRLEN len = strend - s;
230e1fce 1212 s = (char *) bytes_to_utf8((U8 *) s, &len);
08ca2aa3
TH
1213 SAVEFREEPV(s);
1214 strend = s + len;
f337b084 1215 flags |= FLAG_DO_UTF8;
08ca2aa3
TH
1216 }
1217
f337b084
TH
1218 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
1219 flags |= FLAG_PARSE_UTF8;
08ca2aa3 1220
f7fe979e 1221 TEMPSYM_INIT(&sym, pat, patend, flags);
7accc089
JH
1222
1223 return unpack_rec(&sym, s, s, strend, NULL );
1224}
1225
4136a0f7 1226STATIC I32
f7fe979e 1227S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
49704364 1228{
27da23d5 1229 dVAR; dSP;
3297d27d 1230 SV *sv = NULL;
f7fe979e 1231 const I32 start_sp_offset = SP - PL_stack_base;
49704364 1232 howlen_t howlen;
a6ec74c1 1233 I32 checksum = 0;
92d41999 1234 UV cuv = 0;
a6ec74c1 1235 NV cdouble = 0.0;
f337b084 1236 const int bits_in_uv = CHAR_BIT * sizeof(cuv);
49704364 1237 bool beyond = FALSE;
21c16052 1238 bool explicit_length;
9e27e96a 1239 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
f337b084 1240 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
7918f24d
NC
1241
1242 PERL_ARGS_ASSERT_UNPACK_REC;
1243
28be1210 1244 symptr->strbeg = s - strbeg;
49704364 1245
49704364 1246 while (next_symbol(symptr)) {
a7a3cfaa 1247 packprops_t props;
9e27e96a 1248 I32 len;
f337b084 1249 I32 datumtype = symptr->code;
206947d2 1250 /* do first one only unless in list context
08ca2aa3 1251 / is implemented by unpacking the count, then popping it from the
206947d2 1252 stack, so must check that we're not in the middle of a / */
49704364 1253 if ( unpack_only_one
206947d2 1254 && (SP - PL_stack_base == start_sp_offset + 1)
49704364 1255 && (datumtype != '/') ) /* XXX can this be omitted */
206947d2 1256 break;
49704364 1257
f337b084 1258 switch (howlen = symptr->howlen) {
fc241834
RGS
1259 case e_star:
1260 len = strend - strbeg; /* long enough */
49704364 1261 break;
f337b084
TH
1262 default:
1263 /* e_no_len and e_number */
1264 len = symptr->length;
1265 break;
49704364 1266 }
18529408 1267
21c16052 1268 explicit_length = TRUE;
a6ec74c1 1269 redo_switch:
49704364 1270 beyond = s >= strend;
a7a3cfaa
TH
1271
1272 props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
1273 if (props) {
1274 /* props nonzero means we can process this letter. */
9e27e96a
AL
1275 const long size = props & PACK_SIZE_MASK;
1276 const long howmany = (strend - s) / size;
a7a3cfaa
TH
1277 if (len > howmany)
1278 len = howmany;
1279
1280 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
1281 if (len && unpack_only_one) len = 1;
1282 EXTEND(SP, len);
1283 EXTEND_MORTAL(len);
78d46eaa
NC
1284 }
1285 }
a7a3cfaa 1286
1109a392 1287 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 1288 default:
1109a392 1289 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
49704364 1290
a6ec74c1 1291 case '%':
49704364 1292 if (howlen == e_no_len)
18529408 1293 len = 16; /* len is not specified */
a6ec74c1 1294 checksum = len;
92d41999 1295 cuv = 0;
a6ec74c1 1296 cdouble = 0;
18529408 1297 continue;
a6ec74c1 1298 break;
18529408
IZ
1299 case '(':
1300 {
49704364 1301 tempsym_t savsym = *symptr;
9e27e96a 1302 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
66c611c5 1303 symptr->flags |= group_modifiers;
49704364 1304 symptr->patend = savsym.grpend;
28be1210 1305 symptr->previous = &savsym;
49704364 1306 symptr->level++;
18529408 1307 PUTBACK;
c6f750d1 1308 if (len && unpack_only_one) len = 1;
18529408 1309 while (len--) {
49704364 1310 symptr->patptr = savsym.grpbeg;
f337b084
TH
1311 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
1312 else symptr->flags &= ~FLAG_PARSE_UTF8;
08ca2aa3
TH
1313 unpack_rec(symptr, s, strbeg, strend, &s);
1314 if (s == strend && savsym.howlen == e_star)
49704364 1315 break; /* No way to continue */
18529408
IZ
1316 }
1317 SPAGAIN;
28be1210 1318 savsym.flags = symptr->flags & ~group_modifiers;
49704364 1319 *symptr = savsym;
18529408
IZ
1320 break;
1321 }
28be1210
TH
1322#ifdef PERL_PACK_CAN_SHRIEKSIGN
1323 case '.' | TYPE_IS_SHRIEKING:
1324#endif
1325 case '.': {
9e27e96a 1326 const char *from;
28be1210
TH
1327 SV *sv;
1328#ifdef PERL_PACK_CAN_SHRIEKSIGN
9e27e96a 1329 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
28be1210 1330#else /* PERL_PACK_CAN_SHRIEKSIGN */
9e27e96a 1331 const bool u8 = utf8;
28be1210
TH
1332#endif
1333 if (howlen == e_star) from = strbeg;
1334 else if (len <= 0) from = s;
1335 else {
1336 tempsym_t *group = symptr;
1337
1338 while (--len && group) group = group->previous;
1339 from = group ? strbeg + group->strbeg : strbeg;
1340 }
1341 sv = from <= s ?
00646304
CB
1342 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
1343 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
6e449a3a 1344 mXPUSHs(sv);
28be1210
TH
1345 break;
1346 }
1347#ifdef PERL_PACK_CAN_SHRIEKSIGN
1348 case '@' | TYPE_IS_SHRIEKING:
1349#endif
a6ec74c1 1350 case '@':
28be1210
TH
1351 s = strbeg + symptr->strbeg;
1352#ifdef PERL_PACK_CAN_SHRIEKSIGN
1353 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
1354#else /* PERL_PACK_CAN_SHRIEKSIGN */
1355 if (utf8)
1356#endif
1357 {
08ca2aa3
TH
1358 while (len > 0) {
1359 if (s >= strend)
1360 Perl_croak(aTHX_ "'@' outside of string in unpack");
1361 s += UTF8SKIP(s);
1362 len--;
1363 }
1364 if (s > strend)
1365 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
1366 } else {
28be1210 1367 if (strend-s < len)
fc241834 1368 Perl_croak(aTHX_ "'@' outside of string in unpack");
28be1210 1369 s += len;
08ca2aa3 1370 }
a6ec74c1 1371 break;
62f95557
IZ
1372 case 'X' | TYPE_IS_SHRIEKING:
1373 if (!len) /* Avoid division by 0 */
1374 len = 1;
08ca2aa3 1375 if (utf8) {
f7fe979e 1376 const char *hop, *last;
f337b084
TH
1377 I32 l = len;
1378 hop = last = strbeg;
1379 while (hop < s) {
1380 hop += UTF8SKIP(hop);
1381 if (--l == 0) {
08ca2aa3 1382 last = hop;
f337b084
TH
1383 l = len;
1384 }
fc241834 1385 }
f337b084
TH
1386 if (last > s)
1387 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
08ca2aa3
TH
1388 s = last;
1389 break;
f337b084
TH
1390 }
1391 len = (s - strbeg) % len;
62f95557 1392 /* FALL THROUGH */
a6ec74c1 1393 case 'X':
08ca2aa3
TH
1394 if (utf8) {
1395 while (len > 0) {
1396 if (s <= strbeg)
1397 Perl_croak(aTHX_ "'X' outside of string in unpack");
f337b084 1398 while (--s, UTF8_IS_CONTINUATION(*s)) {
08ca2aa3
TH
1399 if (s <= strbeg)
1400 Perl_croak(aTHX_ "'X' outside of string in unpack");
1401 }
1402 len--;
1403 }
1404 } else {
fc241834
RGS
1405 if (len > s - strbeg)
1406 Perl_croak(aTHX_ "'X' outside of string in unpack" );
1407 s -= len;
08ca2aa3 1408 }
a6ec74c1 1409 break;
9e27e96a
AL
1410 case 'x' | TYPE_IS_SHRIEKING: {
1411 I32 ai32;
62f95557
IZ
1412 if (!len) /* Avoid division by 0 */
1413 len = 1;
230e1fce
NC
1414 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1415 else ai32 = (s - strbeg) % len;
08ca2aa3
TH
1416 if (ai32 == 0) break;
1417 len -= ai32;
9e27e96a 1418 }
62f95557 1419 /* FALL THROUGH */
a6ec74c1 1420 case 'x':
08ca2aa3
TH
1421 if (utf8) {
1422 while (len>0) {
1423 if (s >= strend)
1424 Perl_croak(aTHX_ "'x' outside of string in unpack");
1425 s += UTF8SKIP(s);
1426 len--;
1427 }
1428 } else {
fc241834
RGS
1429 if (len > strend - s)
1430 Perl_croak(aTHX_ "'x' outside of string in unpack");
1431 s += len;
f337b084 1432 }
a6ec74c1
JH
1433 break;
1434 case '/':
49704364
WL
1435 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1436 break;
a6ec74c1
JH
1437 case 'A':
1438 case 'Z':
1439 case 'a':
08ca2aa3
TH
1440 if (checksum) {
1441 /* Preliminary length estimate is assumed done in 'W' */
1442 if (len > strend - s) len = strend - s;
1443 goto W_checksum;
1444 }
1445 if (utf8) {
1446 I32 l;
f7fe979e 1447 const char *hop;
08ca2aa3
TH
1448 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1449 if (hop >= strend) {
1450 if (hop > strend)
1451 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1452 break;
fc241834 1453 }
a6ec74c1 1454 }
08ca2aa3
TH
1455 if (hop > strend)
1456 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1457 len = hop - s;
1458 } else if (len > strend - s)
1459 len = strend - s;
1460
1461 if (datumtype == 'Z') {
1462 /* 'Z' strips stuff after first null */
f7fe979e 1463 const char *ptr, *end;
f337b084
TH
1464 end = s + len;
1465 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
08ca2aa3
TH
1466 sv = newSVpvn(s, ptr-s);
1467 if (howlen == e_star) /* exact for 'Z*' */
1468 len = ptr-s + (ptr != strend ? 1 : 0);
1469 } else if (datumtype == 'A') {
1470 /* 'A' strips both nulls and spaces */
f7fe979e 1471 const char *ptr;
18bdf90a
TH
1472 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1473 for (ptr = s+len-1; ptr >= s; ptr--)
1474 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
76a77b1b 1475 !isSPACE_utf8(ptr)) break;
18bdf90a
TH
1476 if (ptr >= s) ptr += UTF8SKIP(ptr);
1477 else ptr++;
28be1210 1478 if (ptr > s+len)
18bdf90a
TH
1479 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1480 } else {
1481 for (ptr = s+len-1; ptr >= s; ptr--)
1482 if (*ptr != 0 && !isSPACE(*ptr)) break;
1483 ptr++;
1484 }
08ca2aa3
TH
1485 sv = newSVpvn(s, ptr-s);
1486 } else sv = newSVpvn(s, len);
1487
1488 if (utf8) {
1489 SvUTF8_on(sv);
1490 /* Undo any upgrade done due to need_utf8() */
f337b084 1491 if (!(symptr->flags & FLAG_WAS_UTF8))
08ca2aa3 1492 sv_utf8_downgrade(sv, 0);
a6ec74c1 1493 }
6e449a3a 1494 mXPUSHs(sv);
08ca2aa3 1495 s += len;
a6ec74c1
JH
1496 break;
1497 case 'B':
08ca2aa3
TH
1498 case 'b': {
1499 char *str;
49704364 1500 if (howlen == e_star || len > (strend - s) * 8)
a6ec74c1
JH
1501 len = (strend - s) * 8;
1502 if (checksum) {
f337b084 1503 if (utf8)
08ca2aa3 1504 while (len >= 8 && s < strend) {
f337b084 1505 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
08ca2aa3
TH
1506 len -= 8;
1507 }
f337b084 1508 else
fc241834 1509 while (len >= 8) {
08ca2aa3 1510 cuv += PL_bitcount[*(U8 *)s++];
fc241834
RGS
1511 len -= 8;
1512 }
08ca2aa3
TH
1513 if (len && s < strend) {
1514 U8 bits;
f337b084
TH
1515 bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1516 if (datumtype == 'b')
a6ec74c1 1517 while (len-- > 0) {
92d41999 1518 if (bits & 1) cuv++;
a6ec74c1
JH
1519 bits >>= 1;
1520 }
f337b084 1521 else
a6ec74c1 1522 while (len-- > 0) {
08ca2aa3 1523 if (bits & 0x80) cuv++;
a6ec74c1
JH
1524 bits <<= 1;
1525 }
fc241834 1526 }
a6ec74c1
JH
1527 break;
1528 }
08ca2aa3 1529
561b68a9 1530 sv = sv_2mortal(newSV(len ? len : 1));
a6ec74c1
JH
1531 SvPOK_on(sv);
1532 str = SvPVX(sv);
1533 if (datumtype == 'b') {
f337b084 1534 U8 bits = 0;
f7fe979e 1535 const I32 ai32 = len;
08ca2aa3
TH
1536 for (len = 0; len < ai32; len++) {
1537 if (len & 7) bits >>= 1;
1538 else if (utf8) {
1539 if (s >= strend) break;
f337b084 1540 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1541 } else bits = *(U8 *) s++;
1542 *str++ = bits & 1 ? '1' : '0';
a6ec74c1 1543 }
08ca2aa3 1544 } else {
f337b084 1545 U8 bits = 0;
f7fe979e 1546 const I32 ai32 = len;
08ca2aa3
TH
1547 for (len = 0; len < ai32; len++) {
1548 if (len & 7) bits <<= 1;
1549 else if (utf8) {
1550 if (s >= strend) break;
f337b084 1551 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3
TH
1552 } else bits = *(U8 *) s++;
1553 *str++ = bits & 0x80 ? '1' : '0';
a6ec74c1
JH
1554 }
1555 }
1556 *str = '\0';
aa07b2f6 1557 SvCUR_set(sv, str - SvPVX_const(sv));
08ca2aa3 1558 XPUSHs(sv);
a6ec74c1 1559 break;
08ca2aa3 1560 }
a6ec74c1 1561 case 'H':
08ca2aa3 1562 case 'h': {
3297d27d 1563 char *str = NULL;
fc241834 1564 /* Preliminary length estimate, acceptable for utf8 too */
49704364 1565 if (howlen == e_star || len > (strend - s) * 2)
a6ec74c1 1566 len = (strend - s) * 2;
858fe5e1
TC
1567 if (!checksum) {
1568 sv = sv_2mortal(newSV(len ? len : 1));
1569 SvPOK_on(sv);
1570 str = SvPVX(sv);
1571 }
a6ec74c1 1572 if (datumtype == 'h') {
f337b084 1573 U8 bits = 0;
9e27e96a 1574 I32 ai32 = len;
fc241834
RGS
1575 for (len = 0; len < ai32; len++) {
1576 if (len & 1) bits >>= 4;
1577 else if (utf8) {
1578 if (s >= strend) break;
f337b084 1579 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
fc241834 1580 } else bits = * (U8 *) s++;
858fe5e1
TC
1581 if (!checksum)
1582 *str++ = PL_hexdigit[bits & 15];
a6ec74c1 1583 }
08ca2aa3 1584 } else {
f337b084 1585 U8 bits = 0;
f7fe979e 1586 const I32 ai32 = len;
08ca2aa3
TH
1587 for (len = 0; len < ai32; len++) {
1588 if (len & 1) bits <<= 4;
1589 else if (utf8) {
1590 if (s >= strend) break;
f337b084 1591 bits = uni_to_byte(aTHX_ &s, strend, datumtype);
08ca2aa3 1592 } else bits = *(U8 *) s++;
858fe5e1
TC
1593 if (!checksum)
1594 *str++ = PL_hexdigit[(bits >> 4) & 15];
a6ec74c1
JH
1595 }
1596 }
858fe5e1
TC
1597 if (!checksum) {
1598 *str = '\0';
1599 SvCUR_set(sv, str - SvPVX_const(sv));
1600 XPUSHs(sv);
1601 }
a6ec74c1 1602 break;
08ca2aa3 1603 }
1651fc44
ML
1604 case 'C':
1605 if (len == 0) {
1606 if (explicit_length)
1607 /* Switch to "character" mode */
1608 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1609 break;
1610 }
1611 /* FALL THROUGH */
a6ec74c1 1612 case 'c':
1651fc44
ML
1613 while (len-- > 0 && s < strend) {
1614 int aint;
1615 if (utf8)
1616 {
1617 STRLEN retlen;
1618 aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1619 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1620 if (retlen == (STRLEN) -1 || retlen == 0)
1621 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1622 s += retlen;
1623 }
1624 else
1625 aint = *(U8 *)(s)++;
1626 if (aint >= 128 && datumtype != 'C') /* fake up signed chars */
73cb7263 1627 aint -= 256;
08ca2aa3 1628 if (!checksum)
6e449a3a 1629 mPUSHi(aint);
73cb7263
NC
1630 else if (checksum > bits_in_uv)
1631 cdouble += (NV)aint;
1632 else
1633 cuv += aint;
a6ec74c1
JH
1634 }
1635 break;
08ca2aa3
TH
1636 case 'W':
1637 W_checksum:
1651fc44 1638 if (utf8) {
08ca2aa3 1639 while (len-- > 0 && s < strend) {
08ca2aa3 1640 STRLEN retlen;
f7fe979e 1641 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
f337b084 1642 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
08ca2aa3
TH
1643 if (retlen == (STRLEN) -1 || retlen == 0)
1644 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1645 s += retlen;
1646 if (!checksum)
6e449a3a 1647 mPUSHu(val);
08ca2aa3
TH
1648 else if (checksum > bits_in_uv)
1649 cdouble += (NV) val;
d6d3e8bd 1650 else
08ca2aa3 1651 cuv += val;
fc241834 1652 }
08ca2aa3 1653 } else if (!checksum)
a6ec74c1 1654 while (len-- > 0) {
f7fe979e 1655 const U8 ch = *(U8 *) s++;
6e449a3a 1656 mPUSHu(ch);
a6ec74c1 1657 }
08ca2aa3
TH
1658 else if (checksum > bits_in_uv)
1659 while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1660 else
1661 while (len-- > 0) cuv += *(U8 *) s++;
a6ec74c1
JH
1662 break;
1663 case 'U':
35bcd338 1664 if (len == 0) {
c5333953 1665 if (explicit_length && howlen != e_star) {
08ca2aa3 1666 /* Switch to "bytes in UTF-8" mode */
f337b084 1667 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
08ca2aa3
TH
1668 else
1669 /* Should be impossible due to the need_utf8() test */
1670 Perl_croak(aTHX_ "U0 mode on a byte string");
1671 }
35bcd338
JH
1672 break;
1673 }
08ca2aa3 1674 if (len > strend - s) len = strend - s;
fc241834 1675 if (!checksum) {
08ca2aa3
TH
1676 if (len && unpack_only_one) len = 1;
1677 EXTEND(SP, len);
1678 EXTEND_MORTAL(len);
fc241834 1679 }
08ca2aa3
TH
1680 while (len-- > 0 && s < strend) {
1681 STRLEN retlen;
1682 UV auv;
1683 if (utf8) {
1684 U8 result[UTF8_MAXLEN];
f7fe979e 1685 const char *ptr = s;
08ca2aa3 1686 STRLEN len;
08ca2aa3
TH
1687 /* Bug: warns about bad utf8 even if we are short on bytes
1688 and will break out of the loop */
230e1fce
NC
1689 if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1690 'U'))
08ca2aa3
TH
1691 break;
1692 len = UTF8SKIP(result);
fc241834 1693 if (!uni_to_bytes(aTHX_ &ptr, strend,
230e1fce 1694 (char *) &result[1], len-1, 'U')) break;
cde84f2a 1695 auv = utf8n_to_uvuni(result, len, &retlen, UTF8_ALLOW_DEFAULT);
08ca2aa3
TH
1696 s = ptr;
1697 } else {
cde84f2a 1698 auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, UTF8_ALLOW_DEFAULT);
08ca2aa3
TH
1699 if (retlen == (STRLEN) -1 || retlen == 0)
1700 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1701 s += retlen;
1702 }
1703 if (!checksum)
6e449a3a 1704 mPUSHu(auv);
73cb7263 1705 else if (checksum > bits_in_uv)
08ca2aa3 1706 cdouble += (NV) auv;
73cb7263 1707 else
08ca2aa3 1708 cuv += auv;
a6ec74c1
JH
1709 }
1710 break;
49704364
WL
1711 case 's' | TYPE_IS_SHRIEKING:
1712#if SHORTSIZE != SIZE16
73cb7263 1713 while (len-- > 0) {
08ca2aa3 1714 short ashort;
f337b084
TH
1715 SHIFT_VAR(utf8, s, strend, ashort, datumtype);
1716 DO_BO_UNPACK(ashort, s);
08ca2aa3 1717 if (!checksum)
6e449a3a 1718 mPUSHi(ashort);
73cb7263
NC
1719 else if (checksum > bits_in_uv)
1720 cdouble += (NV)ashort;
1721 else
1722 cuv += ashort;
49704364
WL
1723 }
1724 break;
1725#else
1726 /* Fallthrough! */
a6ec74c1 1727#endif
49704364 1728 case 's':
73cb7263 1729 while (len-- > 0) {
08ca2aa3
TH
1730 I16 ai16;
1731
1732#if U16SIZE > SIZE16
1733 ai16 = 0;
1734#endif
f337b084 1735 SHIFT16(utf8, s, strend, &ai16, datumtype);
73cb7263 1736 DO_BO_UNPACK(ai16, 16);
1109a392 1737#if U16SIZE > SIZE16
73cb7263
NC
1738 if (ai16 > 32767)
1739 ai16 -= 65536;
a6ec74c1 1740#endif
08ca2aa3 1741 if (!checksum)
6e449a3a 1742 mPUSHi(ai16);
73cb7263
NC
1743 else if (checksum > bits_in_uv)
1744 cdouble += (NV)ai16;
1745 else
1746 cuv += ai16;
a6ec74c1
JH
1747 }
1748 break;
49704364
WL
1749 case 'S' | TYPE_IS_SHRIEKING:
1750#if SHORTSIZE != SIZE16
73cb7263 1751 while (len-- > 0) {
08ca2aa3 1752 unsigned short aushort;
f337b084
TH
1753 SHIFT_VAR(utf8, s, strend, aushort, datumtype);
1754 DO_BO_UNPACK(aushort, s);
08ca2aa3 1755 if (!checksum)
6e449a3a 1756 mPUSHu(aushort);
73cb7263
NC
1757 else if (checksum > bits_in_uv)
1758 cdouble += (NV)aushort;
1759 else
1760 cuv += aushort;
49704364
WL
1761 }
1762 break;
1763#else
486ec47a 1764 /* Fallthrough! */
49704364 1765#endif
a6ec74c1
JH
1766 case 'v':
1767 case 'n':
1768 case 'S':
73cb7263 1769 while (len-- > 0) {
08ca2aa3
TH
1770 U16 au16;
1771#if U16SIZE > SIZE16
1772 au16 = 0;
1773#endif
f337b084 1774 SHIFT16(utf8, s, strend, &au16, datumtype);
08ca2aa3 1775 DO_BO_UNPACK(au16, 16);
a6ec74c1 1776#ifdef HAS_NTOHS
73cb7263
NC
1777 if (datumtype == 'n')
1778 au16 = PerlSock_ntohs(au16);
a6ec74c1
JH
1779#endif
1780#ifdef HAS_VTOHS
73cb7263
NC
1781 if (datumtype == 'v')
1782 au16 = vtohs(au16);
a6ec74c1 1783#endif
08ca2aa3 1784 if (!checksum)
6e449a3a 1785 mPUSHu(au16);
73cb7263 1786 else if (checksum > bits_in_uv)
f337b084 1787 cdouble += (NV) au16;
73cb7263
NC
1788 else
1789 cuv += au16;
a6ec74c1
JH
1790 }
1791 break;
7212898e 1792#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7
MHM
1793 case 'v' | TYPE_IS_SHRIEKING:
1794 case 'n' | TYPE_IS_SHRIEKING:
73cb7263 1795 while (len-- > 0) {
08ca2aa3
TH
1796 I16 ai16;
1797# if U16SIZE > SIZE16
1798 ai16 = 0;
1799# endif
f337b084 1800 SHIFT16(utf8, s, strend, &ai16, datumtype);
08ca2aa3 1801# ifdef HAS_NTOHS
73cb7263 1802 if (datumtype == ('n' | TYPE_IS_SHRIEKING))
08ca2aa3
TH
1803 ai16 = (I16) PerlSock_ntohs((U16) ai16);
1804# endif /* HAS_NTOHS */
1805# ifdef HAS_VTOHS
73cb7263 1806 if (datumtype == ('v' | TYPE_IS_SHRIEKING))
08ca2aa3
TH
1807 ai16 = (I16) vtohs((U16) ai16);
1808# endif /* HAS_VTOHS */
1809 if (!checksum)
6e449a3a 1810 mPUSHi(ai16);
73cb7263 1811 else if (checksum > bits_in_uv)
08ca2aa3 1812 cdouble += (NV) ai16;
73cb7263
NC
1813 else
1814 cuv += ai16;
068bd2e7
MHM
1815 }
1816 break;
08ca2aa3 1817#endif /* PERL_PACK_CAN_SHRIEKSIGN */
a6ec74c1 1818 case 'i':
49704364 1819 case 'i' | TYPE_IS_SHRIEKING:
73cb7263 1820 while (len-- > 0) {
08ca2aa3 1821 int aint;
f337b084
TH
1822 SHIFT_VAR(utf8, s, strend, aint, datumtype);
1823 DO_BO_UNPACK(aint, i);
08ca2aa3 1824 if (!checksum)
6e449a3a 1825 mPUSHi(aint);
73cb7263
NC
1826 else if (checksum > bits_in_uv)
1827 cdouble += (NV)aint;
1828 else
1829 cuv += aint;
a6ec74c1
JH
1830 }
1831 break;
1832 case 'I':
49704364 1833 case 'I' | TYPE_IS_SHRIEKING:
73cb7263 1834 while (len-- > 0) {
08ca2aa3 1835 unsigned int auint;
f337b084
TH
1836 SHIFT_VAR(utf8, s, strend, auint, datumtype);
1837 DO_BO_UNPACK(auint, i);
08ca2aa3 1838 if (!checksum)
6e449a3a 1839 mPUSHu(auint);
73cb7263
NC
1840 else if (checksum > bits_in_uv)
1841 cdouble += (NV)auint;
1842 else
1843 cuv += auint;
a6ec74c1
JH
1844 }
1845 break;
92d41999 1846 case 'j':
73cb7263 1847 while (len-- > 0) {
08ca2aa3 1848 IV aiv;
f337b084 1849 SHIFT_VAR(utf8, s, strend, aiv, datumtype);
1109a392 1850#if IVSIZE == INTSIZE
f337b084 1851 DO_BO_UNPACK(aiv, i);
1109a392 1852#elif IVSIZE == LONGSIZE
f337b084 1853 DO_BO_UNPACK(aiv, l);
1109a392 1854#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
f337b084 1855 DO_BO_UNPACK(aiv, 64);
08ca2aa3
TH
1856#else
1857 Perl_croak(aTHX_ "'j' not supported on this platform");
1109a392 1858#endif
08ca2aa3 1859 if (!checksum)
6e449a3a 1860 mPUSHi(aiv);
73cb7263
NC
1861 else if (checksum > bits_in_uv)
1862 cdouble += (NV)aiv;
1863 else
1864 cuv += aiv;
92d41999
JH
1865 }
1866 break;
1867 case 'J':
73cb7263 1868 while (len-- > 0) {
08ca2aa3 1869 UV auv;
f337b084 1870 SHIFT_VAR(utf8, s, strend, auv, datumtype);
08ca2aa3 1871#if IVSIZE == INTSIZE
f337b084 1872 DO_BO_UNPACK(auv, i);
08ca2aa3 1873#elif IVSIZE == LONGSIZE
f337b084 1874 DO_BO_UNPACK(auv, l);
08ca2aa3 1875#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
f337b084 1876 DO_BO_UNPACK(auv, 64);
08ca2aa3
TH
1877#else
1878 Perl_croak(aTHX_ "'J' not supported on this platform");
1109a392 1879#endif
08ca2aa3 1880 if (!checksum)
6e449a3a 1881 mPUSHu(auv);
73cb7263
NC
1882 else if (checksum > bits_in_uv)
1883 cdouble += (NV)auv;
1884 else
1885 cuv += auv;
92d41999
JH
1886 }
1887 break;
49704364
WL
1888 case 'l' | TYPE_IS_SHRIEKING:
1889#if LONGSIZE != SIZE32
73cb7263 1890 while (len-- > 0) {
08ca2aa3 1891 long along;
f337b084
TH
1892 SHIFT_VAR(utf8, s, strend, along, datumtype);
1893 DO_BO_UNPACK(along, l);
08ca2aa3 1894 if (!checksum)
6e449a3a 1895 mPUSHi(along);
73cb7263
NC
1896 else if (checksum > bits_in_uv)
1897 cdouble += (NV)along;
1898 else
1899 cuv += along;
49704364
WL
1900 }
1901 break;
1902#else
1903 /* Fallthrough! */
a6ec74c1 1904#endif
49704364 1905 case 'l':
73cb7263 1906 while (len-- > 0) {
08ca2aa3
TH
1907 I32 ai32;
1908#if U32SIZE > SIZE32
1909 ai32 = 0;
1910#endif
f337b084 1911 SHIFT32(utf8, s, strend, &ai32, datumtype);
73cb7263 1912 DO_BO_UNPACK(ai32, 32);
25a9bd2a 1913#if U32SIZE > SIZE32
08ca2aa3 1914 if (ai32 > 2147483647) ai32 -= 4294967296;
a6ec74c1 1915#endif
08ca2aa3 1916 if (!checksum)
6e449a3a 1917 mPUSHi(ai32);
73cb7263
NC
1918 else if (checksum > bits_in_uv)
1919 cdouble += (NV)ai32;
1920 else
1921 cuv += ai32;
a6ec74c1
JH
1922 }
1923 break;
49704364
WL
1924 case 'L' | TYPE_IS_SHRIEKING:
1925#if LONGSIZE != SIZE32
73cb7263 1926 while (len-- > 0) {
08ca2aa3 1927 unsigned long aulong;
f337b084
TH
1928 SHIFT_VAR(utf8, s, strend, aulong, datumtype);
1929 DO_BO_UNPACK(aulong, l);
08ca2aa3 1930 if (!checksum)
6e449a3a 1931 mPUSHu(aulong);
73cb7263
NC
1932 else if (checksum > bits_in_uv)
1933 cdouble += (NV)aulong;
1934 else
1935 cuv += aulong;
49704364
WL
1936 }
1937 break;
1938#else
1939 /* Fall through! */
1940#endif
a6ec74c1
JH
1941 case 'V':
1942 case 'N':
1943 case 'L':
73cb7263 1944 while (len-- > 0) {
08ca2aa3
TH
1945 U32 au32;
1946#if U32SIZE > SIZE32
1947 au32 = 0;
1948#endif
f337b084 1949 SHIFT32(utf8, s, strend, &au32, datumtype);
08ca2aa3 1950 DO_BO_UNPACK(au32, 32);
a6ec74c1 1951#ifdef HAS_NTOHL
73cb7263
NC
1952 if (datumtype == 'N')
1953 au32 = PerlSock_ntohl(au32);
a6ec74c1
JH
1954#endif
1955#ifdef HAS_VTOHL
73cb7263
NC
1956 if (datumtype == 'V')
1957 au32 = vtohl(au32);
a6ec74c1 1958#endif
08ca2aa3 1959 if (!checksum)
6e449a3a 1960 mPUSHu(au32);
fc241834
RGS
1961 else if (checksum > bits_in_uv)
1962 cdouble += (NV)au32;
1963 else
1964 cuv += au32;
a6ec74c1
JH
1965 }
1966 break;
7212898e 1967#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7
MHM
1968 case 'V' | TYPE_IS_SHRIEKING:
1969 case 'N' | TYPE_IS_SHRIEKING:
73cb7263 1970 while (len-- > 0) {
08ca2aa3
TH
1971 I32 ai32;
1972# if U32SIZE > SIZE32
1973 ai32 = 0;
1974# endif
f337b084 1975 SHIFT32(utf8, s, strend, &ai32, datumtype);
08ca2aa3 1976# ifdef HAS_NTOHL
73cb7263
NC
1977 if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1978 ai32 = (I32)PerlSock_ntohl((U32)ai32);
08ca2aa3
TH
1979# endif
1980# ifdef HAS_VTOHL
73cb7263
NC
1981 if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1982 ai32 = (I32)vtohl((U32)ai32);
08ca2aa3
TH
1983# endif
1984 if (!checksum)
6e449a3a 1985 mPUSHi(ai32);
73cb7263
NC
1986 else if (checksum > bits_in_uv)
1987 cdouble += (NV)ai32;
1988 else
1989 cuv += ai32;
068bd2e7
MHM
1990 }
1991 break;
08ca2aa3 1992#endif /* PERL_PACK_CAN_SHRIEKSIGN */
a6ec74c1 1993 case 'p':
a6ec74c1 1994 while (len-- > 0) {
f7fe979e 1995 const char *aptr;
f337b084 1996 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
07409e01 1997 DO_BO_UNPACK_PC(aptr);
c4c5f44a 1998 /* newSVpv generates undef if aptr is NULL */
6e449a3a 1999 mPUSHs(newSVpv(aptr, 0));
a6ec74c1
JH
2000 }
2001 break;
2002 case 'w':
a6ec74c1
JH
2003 {
2004 UV auv = 0;
2005 U32 bytes = 0;
fc241834 2006
08ca2aa3
TH
2007 while (len > 0 && s < strend) {
2008 U8 ch;
f337b084 2009 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3 2010 auv = (auv << 7) | (ch & 0x7f);
a6ec74c1 2011 /* UTF8_IS_XXXXX not right here - using constant 0x80 */
08ca2aa3 2012 if (ch < 0x80) {
a6ec74c1 2013 bytes = 0;
6e449a3a 2014 mPUSHu(auv);
a6ec74c1
JH
2015 len--;
2016 auv = 0;
08ca2aa3 2017 continue;
a6ec74c1 2018 }
08ca2aa3 2019 if (++bytes >= sizeof(UV)) { /* promote to string */
10516c54 2020 const char *t;
a6ec74c1 2021
f5992bc4 2022 sv = Perl_newSVpvf(aTHX_ "%.*"UVuf, (int)TYPE_DIGITS(UV), auv);
a6ec74c1 2023 while (s < strend) {
f337b084 2024 ch = SHIFT_BYTE(utf8, s, strend, datumtype);
08ca2aa3
TH
2025 sv = mul128(sv, (U8)(ch & 0x7f));
2026 if (!(ch & 0x80)) {
a6ec74c1
JH
2027 bytes = 0;
2028 break;
2029 }
2030 }
10516c54 2031 t = SvPV_nolen_const(sv);
a6ec74c1
JH
2032 while (*t == '0')
2033 t++;
2034 sv_chop(sv, t);
6e449a3a 2035 mPUSHs(sv);
a6ec74c1
JH
2036 len--;
2037 auv = 0;
2038 }
2039 }
2040 if ((s >= strend) && bytes)
49704364 2041 Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
a6ec74c1
JH
2042 }
2043 break;
2044 case 'P':
49704364
WL
2045 if (symptr->howlen == e_star)
2046 Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
a6ec74c1 2047 EXTEND(SP, 1);
2d3e0934 2048 if (s + sizeof(char*) <= strend) {
08ca2aa3 2049 char *aptr;
f337b084 2050 SHIFT_VAR(utf8, s, strend, aptr, datumtype);
07409e01 2051 DO_BO_UNPACK_PC(aptr);
fc241834 2052 /* newSVpvn generates undef if aptr is NULL */
59cd0e26 2053 PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP));
08ca2aa3 2054 }
a6ec74c1
JH
2055 break;
2056#ifdef HAS_QUAD
2057 case 'q':
73cb7263 2058 while (len-- > 0) {
08ca2aa3 2059 Quad_t aquad;
f337b084
TH
2060 SHIFT_VAR(utf8, s, strend, aquad, datumtype);
2061 DO_BO_UNPACK(aquad, 64);
08ca2aa3 2062 if (!checksum)
6e449a3a
MHM
2063 mPUSHs(aquad >= IV_MIN && aquad <= IV_MAX ?
2064 newSViv((IV)aquad) : newSVnv((NV)aquad));
73cb7263
NC
2065 else if (checksum > bits_in_uv)
2066 cdouble += (NV)aquad;
2067 else
2068 cuv += aquad;
2069 }
a6ec74c1
JH
2070 break;
2071 case 'Q':
73cb7263 2072 while (len-- > 0) {
08ca2aa3 2073 Uquad_t auquad;
f337b084
TH
2074 SHIFT_VAR(utf8, s, strend, auquad, datumtype);
2075 DO_BO_UNPACK(auquad, 64);
08ca2aa3 2076 if (!checksum)
6e449a3a
MHM
2077 mPUSHs(auquad <= UV_MAX ?
2078 newSVuv((UV)auquad) : newSVnv((NV)auquad));
73cb7263
NC
2079 else if (checksum > bits_in_uv)
2080 cdouble += (NV)auquad;
2081 else
2082 cuv += auquad;
a6ec74c1
JH
2083 }
2084 break;
08ca2aa3 2085#endif /* HAS_QUAD */
a6ec74c1
JH
2086 /* float and double added gnb@melba.bby.oz.au 22/11/89 */
2087 case 'f':
73cb7263 2088 while (len-- > 0) {
08ca2aa3 2089 float afloat;
f337b084 2090 SHIFT_VAR(utf8, s, strend, afloat, datumtype);
08ca2aa3
TH
2091 DO_BO_UNPACK_N(afloat, float);
2092 if (!checksum)
6e449a3a 2093 mPUSHn(afloat);
08ca2aa3 2094 else
73cb7263 2095 cdouble += afloat;
fc241834 2096 }
a6ec74c1
JH
2097 break;
2098 case 'd':
73cb7263 2099 while (len-- > 0) {
08ca2aa3 2100 double adouble;
f337b084 2101 SHIFT_VAR(utf8, s, strend, adouble, datumtype);
08ca2aa3
TH
2102 DO_BO_UNPACK_N(adouble, double);
2103 if (!checksum)
6e449a3a 2104 mPUSHn(adouble);
08ca2aa3 2105 else
73cb7263 2106 cdouble += adouble;
fc241834 2107 }
a6ec74c1 2108 break;
92d41999 2109 case 'F':
73cb7263 2110 while (len-- > 0) {
275663fa
TC
2111 NV_bytes anv;
2112 SHIFT_BYTES(utf8, s, strend, anv.bytes, sizeof(anv.bytes), datumtype);
2113 DO_BO_UNPACK_N(anv.nv, NV);
08ca2aa3 2114 if (!checksum)
275663fa 2115 mPUSHn(anv.nv);
08ca2aa3 2116 else
275663fa 2117 cdouble += anv.nv;
fc241834 2118 }
92d41999
JH
2119 break;
2120#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2121 case 'D':
73cb7263 2122 while (len-- > 0) {
275663fa
TC
2123 ld_bytes aldouble;
2124 SHIFT_BYTES(utf8, s, strend, aldouble.bytes, sizeof(aldouble.bytes), datumtype);
2125 DO_BO_UNPACK_N(aldouble.ld, long double);
08ca2aa3 2126 if (!checksum)
275663fa 2127 mPUSHn(aldouble.ld);
08ca2aa3 2128 else
275663fa 2129 cdouble += aldouble.ld;
92d41999
JH
2130 }
2131 break;
2132#endif
a6ec74c1 2133 case 'u':
858fe5e1 2134 if (!checksum) {
f7fe979e 2135 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
561b68a9 2136 sv = sv_2mortal(newSV(l));
08ca2aa3
TH
2137 if (l) SvPOK_on(sv);
2138 }
2139 if (utf8) {
2140 while (next_uni_uu(aTHX_ &s, strend, &len)) {
2141 I32 a, b, c, d;
db187877 2142 char hunk[3];
08ca2aa3 2143
08ca2aa3
TH
2144 while (len > 0) {
2145 next_uni_uu(aTHX_ &s, strend, &a);
2146 next_uni_uu(aTHX_ &s, strend, &b);
2147 next_uni_uu(aTHX_ &s, strend, &c);
2148 next_uni_uu(aTHX_ &s, strend, &d);
2149 hunk[0] = (char)((a << 2) | (b >> 4));
2150 hunk[1] = (char)((b << 4) | (c >> 2));
2151 hunk[2] = (char)((c << 6) | d);
858fe5e1
TC
2152 if (!checksum)
2153 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
08ca2aa3
TH
2154 len -= 3;
2155 }
2156 if (s < strend) {
f7fe979e
AL
2157 if (*s == '\n') {
2158 s++;
2159 }
08ca2aa3
TH
2160 else {
2161 /* possible checksum byte */
f7fe979e
AL
2162 const char *skip = s+UTF8SKIP(s);
2163 if (skip < strend && *skip == '\n')
2164 s = skip+1;
08ca2aa3
TH
2165 }
2166 }
2167 }
2168 } else {
fc241834
RGS
2169 while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
2170 I32 a, b, c, d;
db187877 2171 char hunk[3];
a6ec74c1 2172
fc241834
RGS
2173 len = PL_uudmap[*(U8*)s++] & 077;
2174 while (len > 0) {
2175 if (s < strend && ISUUCHAR(*s))
2176 a = PL_uudmap[*(U8*)s++] & 077;
2177 else
2178 a = 0;
2179 if (s < strend && ISUUCHAR(*s))
2180 b = PL_uudmap[*(U8*)s++] & 077;
2181 else
2182 b = 0;
2183 if (s < strend && ISUUCHAR(*s))
2184 c = PL_uudmap[*(U8*)s++] & 077;
2185 else
2186 c = 0;
2187 if (s < strend && ISUUCHAR(*s))
2188 d = PL_uudmap[*(U8*)s++] & 077;
2189 else
2190 d = 0;
2191 hunk[0] = (char)((a << 2) | (b >> 4));
2192 hunk[1] = (char)((b << 4) | (c >> 2));
2193 hunk[2] = (char)((c << 6) | d);
858fe5e1
TC
2194 if (!checksum)
2195 sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
fc241834
RGS
2196 len -= 3;
2197 }
2198 if (*s == '\n')
2199 s++;
2200 else /* possible checksum byte */
2201 if (s + 1 < strend && s[1] == '\n')
2202 s += 2;
a6ec74c1 2203 }
08ca2aa3 2204 }
858fe5e1
TC
2205 if (!checksum)
2206 XPUSHs(sv);
a6ec74c1
JH
2207 break;
2208 }
49704364 2209
a6ec74c1 2210 if (checksum) {
1109a392 2211 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
92d41999 2212 (checksum > bits_in_uv &&
08ca2aa3
TH
2213 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
2214 NV trouble, anv;
a6ec74c1 2215
08ca2aa3 2216 anv = (NV) (1 << (checksum & 15));
a6ec74c1
JH
2217 while (checksum >= 16) {
2218 checksum -= 16;
08ca2aa3 2219 anv *= 65536.0;
a6ec74c1 2220 }
a6ec74c1 2221 while (cdouble < 0.0)
08ca2aa3
TH
2222 cdouble += anv;
2223 cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
c4c5f44a 2224 sv = newSVnv(cdouble);
a6ec74c1
JH
2225 }
2226 else {
fa8ec7c1
NC
2227 if (checksum < bits_in_uv) {
2228 UV mask = ((UV)1 << checksum) - 1;
92d41999 2229 cuv &= mask;
a6ec74c1 2230 }
c4c5f44a 2231 sv = newSVuv(cuv);
a6ec74c1 2232 }
6e449a3a 2233 mXPUSHs(sv);
a6ec74c1
JH
2234 checksum = 0;
2235 }
fc241834 2236
49704364
WL
2237 if (symptr->flags & FLAG_SLASH){
2238 if (SP - PL_stack_base - start_sp_offset <= 0)
21361d07 2239 break;
49704364
WL
2240 if( next_symbol(symptr) ){
2241 if( symptr->howlen == e_number )
2242 Perl_croak(aTHX_ "Count after length/code in unpack" );
2243 if( beyond ){
2244 /* ...end of char buffer then no decent length available */
2245 Perl_croak(aTHX_ "length/code after end of string in unpack" );
2246 } else {
2247 /* take top of stack (hope it's numeric) */
2248 len = POPi;
2249 if( len < 0 )
2250 Perl_croak(aTHX_ "Negative '/' count in unpack" );
2251 }
2252 } else {
2253 Perl_croak(aTHX_ "Code missing after '/' in unpack" );
2254 }
2255 datumtype = symptr->code;
21c16052 2256 explicit_length = FALSE;
49704364
WL
2257 goto redo_switch;
2258 }
a6ec74c1 2259 }
49704364 2260
18529408
IZ
2261 if (new_s)
2262 *new_s = s;
2263 PUTBACK;
2264 return SP - PL_stack_base - start_sp_offset;
2265}
2266
2267PP(pp_unpack)
2268{
97aff369 2269 dVAR;
18529408 2270 dSP;
bab9c0ac 2271 dPOPPOPssrl;
18529408
IZ
2272 I32 gimme = GIMME_V;
2273 STRLEN llen;
2274 STRLEN rlen;
5c144d81
NC
2275 const char *pat = SvPV_const(left, llen);
2276 const char *s = SvPV_const(right, rlen);
f7fe979e
AL
2277 const char *strend = s + rlen;
2278 const char *patend = pat + llen;
08ca2aa3 2279 I32 cnt;
18529408
IZ
2280
2281 PUTBACK;
7accc089 2282 cnt = unpackstring(pat, patend, s, strend,
49704364 2283 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
f337b084 2284 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
49704364 2285
18529408
IZ
2286 SPAGAIN;
2287 if ( !cnt && gimme == G_SCALAR )
2288 PUSHs(&PL_sv_undef);
a6ec74c1
JH
2289 RETURN;
2290}
2291
f337b084 2292STATIC U8 *
f7fe979e 2293doencodes(U8 *h, const char *s, I32 len)
a6ec74c1 2294{
f337b084 2295 *h++ = PL_uuemap[len];
a6ec74c1 2296 while (len > 2) {
f337b084
TH
2297 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2298 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
2299 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
2300 *h++ = PL_uuemap[(077 & (s[2] & 077))];
a6ec74c1
JH
2301 s += 3;
2302 len -= 3;
2303 }
2304 if (len > 0) {
f7fe979e 2305 const char r = (len > 1 ? s[1] : '\0');
f337b084
TH
2306 *h++ = PL_uuemap[(077 & (s[0] >> 2))];
2307 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
2308 *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
2309 *h++ = PL_uuemap[0];
a6ec74c1 2310 }
f337b084
TH
2311 *h++ = '\n';
2312 return h;
a6ec74c1
JH
2313}
2314
2315STATIC SV *
f7fe979e 2316S_is_an_int(pTHX_ const char *s, STRLEN l)
a6ec74c1 2317{
8b6e33c7
AL
2318 SV *result = newSVpvn(s, l);
2319 char *const result_c = SvPV_nolen(result); /* convenience */
2320 char *out = result_c;
2321 bool skip = 1;
2322 bool ignore = 0;
a6ec74c1 2323
7918f24d
NC
2324 PERL_ARGS_ASSERT_IS_AN_INT;
2325
a6ec74c1
JH
2326 while (*s) {
2327 switch (*s) {
2328 case ' ':
2329 break;
2330 case '+':
2331 if (!skip) {
2332 SvREFCNT_dec(result);
2333 return (NULL);
2334 }
2335 break;
2336 case '0':
2337 case '1':
2338 case '2':
2339 case '3':
2340 case '4':
2341 case '5':
2342 case '6':
2343 case '7':
2344 case '8':
2345 case '9':
2346 skip = 0;
2347 if (!ignore) {
2348 *(out++) = *s;
2349 }
2350 break;
2351 case '.':
2352 ignore = 1;
2353 break;
2354 default:
2355 SvREFCNT_dec(result);
2356 return (NULL);
2357 }
2358 s++;
2359 }
2360 *(out++) = '\0';
2361 SvCUR_set(result, out - result_c);
2362 return (result);
2363}
2364
2365/* pnum must be '\0' terminated */
2366STATIC int
2367S_div128(pTHX_ SV *pnum, bool *done)
2368{
8b6e33c7
AL
2369 STRLEN len;
2370 char * const s = SvPV(pnum, len);
2371 char *t = s;
2372 int m = 0;
2373
7918f24d
NC
2374 PERL_ARGS_ASSERT_DIV128;
2375
8b6e33c7
AL
2376 *done = 1;
2377 while (*t) {
2378 const int i = m * 10 + (*t - '0');
2379 const int r = (i >> 7); /* r < 10 */
2380 m = i & 0x7F;
2381 if (r) {
2382 *done = 0;
2383 }
2384 *(t++) = '0' + r;
a6ec74c1 2385 }
8b6e33c7
AL
2386 *(t++) = '\0';
2387 SvCUR_set(pnum, (STRLEN) (t - s));
2388 return (m);
a6ec74c1
JH
2389}
2390
18529408 2391/*
7accc089
JH
2392=for apidoc packlist
2393
2394The engine implementing pack() Perl function.
2395
bfce84ec
AL
2396=cut
2397*/
7accc089
JH
2398
2399void
5aaab254 2400Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, SV **beglist, SV **endlist )
7accc089 2401{
97aff369 2402 dVAR;
aadb217d
JH
2403 tempsym_t sym;
2404
7918f24d
NC
2405 PERL_ARGS_ASSERT_PACKLIST;
2406
f7fe979e 2407 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
49704364 2408
f337b084
TH
2409 /* We're going to do changes through SvPVX(cat). Make sure it's valid.
2410 Also make sure any UTF8 flag is loaded */
56eb0262 2411 SvPV_force_nolen(cat);
bfce84ec
AL
2412 if (DO_UTF8(cat))
2413 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
f337b084 2414
49704364
WL
2415 (void)pack_rec( cat, &sym, beglist, endlist );
2416}
2417
f337b084
TH
2418/* like sv_utf8_upgrade, but also repoint the group start markers */
2419STATIC void
2420marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2421 STRLEN len;
2422 tempsym_t *group;
f7fe979e
AL
2423 const char *from_ptr, *from_start, *from_end, **marks, **m;
2424 char *to_start, *to_ptr;
f337b084
TH
2425
2426 if (SvUTF8(sv)) return;
2427
aa07b2f6 2428 from_start = SvPVX_const(sv);
f337b084
TH
2429 from_end = from_start + SvCUR(sv);
2430 for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2431 if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
2432 if (from_ptr == from_end) {
2433 /* Simple case: no character needs to be changed */
2434 SvUTF8_on(sv);
2435 return;
2436 }
2437
3473cf63 2438 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
a02a5408 2439 Newx(to_start, len, char);
f337b084
TH
2440 Copy(from_start, to_start, from_ptr-from_start, char);
2441 to_ptr = to_start + (from_ptr-from_start);
2442
a02a5408 2443 Newx(marks, sym_ptr->level+2, const char *);
f337b084
TH
2444 for (group=sym_ptr; group; group = group->previous)
2445 marks[group->level] = from_start + group->strbeg;
2446 marks[sym_ptr->level+1] = from_end+1;
2447 for (m = marks; *m < from_ptr; m++)
2448 *m = to_start + (*m-from_start);
2449
2450 for (;from_ptr < from_end; from_ptr++) {
2451 while (*m == from_ptr) *m++ = to_ptr;
230e1fce 2452 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
f337b084
TH
2453 }
2454 *to_ptr = 0;
2455
2456 while (*m == from_ptr) *m++ = to_ptr;
2457 if (m != marks + sym_ptr->level+1) {
2458 Safefree(marks);
2459 Safefree(to_start);
5637ef5b
NC
2460 Perl_croak(aTHX_ "panic: marks beyond string end, m=%p, marks=%p, "
2461 "level=%d", m, marks, sym_ptr->level);
f337b084
TH
2462 }
2463 for (group=sym_ptr; group; group = group->previous)
2464 group->strbeg = marks[group->level] - to_start;
2465 Safefree(marks);
2466
2467 if (SvOOK(sv)) {
2468 if (SvIVX(sv)) {
b162af07 2469 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
f337b084
TH
2470 from_start -= SvIVX(sv);
2471 SvIV_set(sv, 0);
2472 }
2473 SvFLAGS(sv) &= ~SVf_OOK;
2474 }
2475 if (SvLEN(sv) != 0)
2476 Safefree(from_start);
f880fe2f 2477 SvPV_set(sv, to_start);
b162af07
SP
2478 SvCUR_set(sv, to_ptr - to_start);
2479 SvLEN_set(sv, len);
f337b084
TH
2480 SvUTF8_on(sv);
2481}
2482
2483/* Exponential string grower. Makes string extension effectively O(n)
2484 needed says how many extra bytes we need (not counting the final '\0')
2485 Only grows the string if there is an actual lack of space
2486*/
2487STATIC char *
0bd48802 2488S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
f7fe979e
AL
2489 const STRLEN cur = SvCUR(sv);
2490 const STRLEN len = SvLEN(sv);
f337b084 2491 STRLEN extend;
7918f24d
NC
2492
2493 PERL_ARGS_ASSERT_SV_EXP_GROW;
2494
f337b084
TH
2495 if (len - cur > needed) return SvPVX(sv);
2496 extend = needed > len ? needed : len;
2497 return SvGROW(sv, len+extend+1);
2498}
49704364
WL
2499
2500STATIC
2501SV **
f337b084 2502S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
49704364 2503{
97aff369 2504 dVAR;
49704364 2505 tempsym_t lookahead;
f337b084
TH
2506 I32 items = endlist - beglist;
2507 bool found = next_symbol(symptr);
2508 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
041457d9 2509 bool warn_utf8 = ckWARN(WARN_UTF8);
f337b084 2510
7918f24d
NC
2511 PERL_ARGS_ASSERT_PACK_REC;
2512
f337b084
TH
2513 if (symptr->level == 0 && found && symptr->code == 'U') {
2514 marked_upgrade(aTHX_ cat, symptr);
2515 symptr->flags |= FLAG_DO_UTF8;
2516 utf8 = 0;
49704364 2517 }
f337b084 2518 symptr->strbeg = SvCUR(cat);
49704364
WL
2519
2520 while (found) {
f337b084
TH
2521 SV *fromstr;
2522 STRLEN fromlen;
2523 I32 len;
a0714e2c 2524 SV *lengthcode = NULL;
49704364 2525 I32 datumtype = symptr->code;
f337b084
TH
2526 howlen_t howlen = symptr->howlen;
2527 char *start = SvPVX(cat);
2528 char *cur = start + SvCUR(cat);
49704364 2529
f337b084
TH
2530#define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2531
2532 switch (howlen) {
fc241834 2533 case e_star:
f337b084
TH
2534 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2535 0 : items;
2536 break;
2537 default:
2538 /* e_no_len and e_number */
2539 len = symptr->length;
49704364
WL
2540 break;
2541 }
2542
f337b084 2543 if (len) {
a7a3cfaa 2544 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
f337b084 2545
a7a3cfaa
TH
2546 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2547 /* We can process this letter. */
2548 STRLEN size = props & PACK_SIZE_MASK;
2549 GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2550 }
f337b084
TH
2551 }
2552
49704364
WL
2553 /* Look ahead for next symbol. Do we have code/code? */
2554 lookahead = *symptr;
2555 found = next_symbol(&lookahead);
246f24af
TH
2556 if (symptr->flags & FLAG_SLASH) {
2557 IV count;
f337b084 2558 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
246f24af
TH
2559 if (strchr("aAZ", lookahead.code)) {
2560 if (lookahead.howlen == e_number) count = lookahead.length;
2561 else {
ce399ba6 2562 if (items > 0) {
48a5da33 2563 count = sv_len_utf8(*beglist);
ce399ba6 2564 }
246f24af
TH
2565 else count = 0;
2566 if (lookahead.code == 'Z') count++;
2567 }
2568 } else {
2569 if (lookahead.howlen == e_number && lookahead.length < items)
2570 count = lookahead.length;
2571 else count = items;
2572 }
2573 lookahead.howlen = e_number;
2574 lookahead.length = count;
2575 lengthcode = sv_2mortal(newSViv(count));
a6ec74c1 2576 }
49704364 2577
fc241834
RGS
2578 /* Code inside the switch must take care to properly update
2579 cat (CUR length and '\0' termination) if it updated *cur and
f337b084 2580 doesn't simply leave using break */
1109a392 2581 switch(TYPE_NO_ENDIANNESS(datumtype)) {
a6ec74c1 2582 default:
f337b084
TH
2583 Perl_croak(aTHX_ "Invalid type '%c' in pack",
2584 (int) TYPE_NO_MODIFIERS(datumtype));
a6ec74c1 2585 case '%':
49704364 2586 Perl_croak(aTHX_ "'%%' may not be used in pack");
28be1210
TH
2587 {
2588 char *from;
2589#ifdef PERL_PACK_CAN_SHRIEKSIGN
2590 case '.' | TYPE_IS_SHRIEKING:
2591#endif
2592 case '.':
2593 if (howlen == e_star) from = start;
2594 else if (len == 0) from = cur;
2595 else {
2596 tempsym_t *group = symptr;
2597
2598 while (--len && group) group = group->previous;
2599 from = group ? start + group->strbeg : start;
2600 }
2601 fromstr = NEXTFROM;
2602 len = SvIV(fromstr);
2603 goto resize;
2604#ifdef PERL_PACK_CAN_SHRIEKSIGN
2605 case '@' | TYPE_IS_SHRIEKING:
2606#endif
a6ec74c1 2607 case '@':
28be1210
TH
2608 from = start + symptr->strbeg;
2609 resize:
2610#ifdef PERL_PACK_CAN_SHRIEKSIGN
2611 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING))
2612#else /* PERL_PACK_CAN_SHRIEKSIGN */
2613 if (utf8)
2614#endif
2615 if (len >= 0) {
2616 while (len && from < cur) {
2617 from += UTF8SKIP(from);
2618 len--;
2619 }
2620 if (from > cur)
2621 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2622 if (len) {
2623 /* Here we know from == cur */
2624 grow:
2625 GROWING(0, cat, start, cur, len);
2626 Zero(cur, len, char);
2627 cur += len;
2628 } else if (from < cur) {
2629 len = cur - from;
2630 goto shrink;
2631 } else goto no_change;
2632 } else {
2633 cur = from;
2634 len = -len;
2635 goto utf8_shrink;
f337b084 2636 }
28be1210
TH
2637 else {
2638 len -= cur - from;
f337b084 2639 if (len > 0) goto grow;
28be1210 2640 if (len == 0) goto no_change;
fc241834 2641 len = -len;
28be1210 2642 goto shrink;
f337b084 2643 }
a6ec74c1 2644 break;
28be1210 2645 }
fc241834 2646 case '(': {
49704364 2647 tempsym_t savsym = *symptr;
66c611c5
MHM
2648 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2649 symptr->flags |= group_modifiers;
49704364
WL
2650 symptr->patend = savsym.grpend;
2651 symptr->level++;
f337b084 2652 symptr->previous = &lookahead;
18529408 2653 while (len--) {
f337b084
TH
2654 U32 was_utf8;
2655 if (utf8) symptr->flags |= FLAG_PARSE_UTF8;
2656 else symptr->flags &= ~FLAG_PARSE_UTF8;
2657 was_utf8 = SvUTF8(cat);
49704364 2658 symptr->patptr = savsym.grpbeg;
f337b084
TH
2659 beglist = pack_rec(cat, symptr, beglist, endlist);
2660 if (SvUTF8(cat) != was_utf8)
2661 /* This had better be an upgrade while in utf8==0 mode */
2662 utf8 = 1;
2663
49704364 2664 if (savsym.howlen == e_star && beglist == endlist)
18529408
IZ
2665 break; /* No way to continue */
2666 }
ee790063 2667 items = endlist - beglist;
f337b084
TH
2668 lookahead.flags = symptr->flags & ~group_modifiers;
2669 goto no_change;
18529408 2670 }
62f95557
IZ
2671 case 'X' | TYPE_IS_SHRIEKING:
2672 if (!len) /* Avoid division by 0 */
2673 len = 1;
f337b084
TH
2674 if (utf8) {
2675 char *hop, *last;
2676 I32 l = len;
2677 hop = last = start;
2678 while (hop < cur) {
2679 hop += UTF8SKIP(hop);
2680 if (--l == 0) {
2681 last = hop;
2682 l = len;
2683 }
2684 }
2685 if (last > cur)
2686 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2687 cur = last;
2688 break;
2689 }
2690 len = (cur-start) % len;
62f95557 2691 /* FALL THROUGH */
a6ec74c1 2692 case 'X':
f337b084
TH
2693 if (utf8) {
2694 if (len < 1) goto no_change;
28be1210 2695 utf8_shrink:
f337b084
TH
2696 while (len > 0) {
2697 if (cur <= start)
28be1210
TH
2698 Perl_croak(aTHX_ "'%c' outside of string in pack",
2699 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2700 while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2701 if (cur <= start)
28be1210
TH
2702 Perl_croak(aTHX_ "'%c' outside of string in pack",
2703 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2704 }
2705 len--;
2706 }
2707 } else {
fc241834 2708 shrink:
f337b084 2709 if (cur - start < len)
28be1210
TH
2710 Perl_croak(aTHX_ "'%c' outside of string in pack",
2711 (int) TYPE_NO_MODIFIERS(datumtype));
f337b084
TH
2712 cur -= len;
2713 }
2714 if (cur < start+symptr->strbeg) {
2715 /* Make sure group starts don't point into the void */
2716 tempsym_t *group;
9e27e96a 2717 const STRLEN length = cur-start;
f337b084
TH
2718 for (group = symptr;
2719 group && length < group->strbeg;
2720 group = group->previous) group->strbeg = length;
2721 lookahead.strbeg = length;
2722 }
a6ec74c1 2723 break;
fc241834
RGS
2724 case 'x' | TYPE_IS_SHRIEKING: {
2725 I32 ai32;
62f95557
IZ
2726 if (!len) /* Avoid division by 0 */
2727 len = 1;
230e1fce 2728 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
fc241834
RGS
2729 else ai32 = (cur - start) % len;
2730 if (ai32 == 0) goto no_change;
2731 len -= ai32;
2732 }
2733 /* FALL THROUGH */
a6ec74c1 2734 case 'x':
f337b084 2735 goto grow;
a6ec74c1
JH
2736 case 'A':
2737 case 'Z':
f337b084 2738 case 'a': {
f7fe979e 2739 const char *aptr;
f337b084 2740
a6ec74c1 2741 fromstr = NEXTFROM;
e62f0680 2742 aptr = SvPV_const(fromstr, fromlen);
f337b084 2743 if (DO_UTF8(fromstr)) {
f7fe979e 2744 const char *end, *s;
f337b084
TH
2745
2746 if (!utf8 && !SvUTF8(cat)) {
2747 marked_upgrade(aTHX_ cat, symptr);
2748 lookahead.flags |= FLAG_DO_UTF8;
2749 lookahead.strbeg = symptr->strbeg;
2750 utf8 = 1;
2751 start = SvPVX(cat);
2752 cur = start + SvCUR(cat);
2753 }
fc241834 2754 if (howlen == e_star) {
f337b084
TH
2755 if (utf8) goto string_copy;
2756 len = fromlen+1;
2757 }
2758 s = aptr;
2759 end = aptr + fromlen;
2760 fromlen = datumtype == 'Z' ? len-1 : len;
2761 while ((I32) fromlen > 0 && s < end) {
2762 s += UTF8SKIP(s);
2763 fromlen--;
2764 }
2765 if (s > end)
2766 Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2767 if (utf8) {
fc241834 2768 len = fromlen;
f337b084
TH
2769 if (datumtype == 'Z') len++;
2770 fromlen = s-aptr;
2771 len += fromlen;
fc241834 2772
f337b084 2773 goto string_copy;
fc241834 2774 }
f337b084
TH
2775 fromlen = len - fromlen;
2776 if (datumtype == 'Z') fromlen--;
2777 if (howlen == e_star) {
2778 len = fromlen;
2779 if (datumtype == 'Z') len++;
fc241834 2780 }
f337b084 2781 GROWING(0, cat, start, cur, len);
fc241834 2782 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
f337b084 2783 datumtype | TYPE_IS_PACK))
5637ef5b
NC
2784 Perl_croak(aTHX_ "panic: predicted utf8 length not available, "
2785 "for '%c', aptr=%p end=%p cur=%p, fromlen=%"UVuf,
2786 (int)datumtype, aptr, end, cur, (UV)fromlen);
f337b084 2787 cur += fromlen;
a6ec74c1 2788 len -= fromlen;
f337b084
TH
2789 } else if (utf8) {
2790 if (howlen == e_star) {
2791 len = fromlen;
2792 if (datumtype == 'Z') len++;
a6ec74c1 2793 }
f337b084
TH
2794 if (len <= (I32) fromlen) {
2795 fromlen = len;
2796 if (datumtype == 'Z' && fromlen > 0) fromlen--;
2797 }
fc241834 2798 /* assumes a byte expands to at most UTF8_EXPAND bytes on
3473cf63
RGS
2799 upgrade, so:
2800 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2801 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
f337b084
TH
2802 len -= fromlen;
2803 while (fromlen > 0) {
230e1fce 2804 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
f337b084
TH
2805 aptr++;
2806 fromlen--;
fc241834 2807 }
f337b084
TH
2808 } else {
2809 string_copy:
2810 if (howlen == e_star) {
2811 len = fromlen;
2812 if (datumtype == 'Z') len++;
2813 }
2814 if (len <= (I32) fromlen) {
2815 fromlen = len;
2816 if (datumtype == 'Z' && fromlen > 0) fromlen--;
a6ec74c1 2817 }
f337b084
TH
2818 GROWING(0, cat, start, cur, len);
2819 Copy(aptr, cur, fromlen, char);
2820 cur += fromlen;
2821 len -= fromlen;
a6ec74c1 2822 }
f337b084
TH
2823 memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2824 cur += len;
3c4fb04a 2825 SvTAINT(cat);
a6ec74c1 2826 break;
f337b084 2827 }
a6ec74c1 2828 case 'B':
f337b084 2829 case 'b': {
b83604b4 2830 const char *str, *end;
f337b084
TH
2831 I32 l, field_len;
2832 U8 bits;
2833 bool utf8_source;
2834 U32 utf8_flags;
a6ec74c1 2835
fc241834 2836 fromstr = NEXTFROM;
b83604b4 2837 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2838 end = str + fromlen;
2839 if (DO_UTF8(fromstr)) {
2840 utf8_source = TRUE;
041457d9 2841 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2842 } else {
2843 utf8_source = FALSE;
2844 utf8_flags = 0; /* Unused, but keep compilers happy */
2845 }
2846 if (howlen == e_star) len = fromlen;
2847 field_len = (len+7)/8;
2848 GROWING(utf8, cat, start, cur, field_len);
2849 if (len > (I32)fromlen) len = fromlen;
2850 bits = 0;
2851 l = 0;
2852 if (datumtype == 'B')
2853 while (l++ < len) {
2854 if (utf8_source) {
95b63a38 2855 UV val = 0;
f337b084
TH
2856 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2857 bits |= val & 1;
2858 } else bits |= *str++ & 1;
2859 if (l & 7) bits <<= 1;
fc241834 2860 else {
f337b084
TH
2861 PUSH_BYTE(utf8, cur, bits);
2862 bits = 0;
a6ec74c1
JH
2863 }
2864 }
f337b084
TH
2865 else
2866 /* datumtype == 'b' */
2867 while (l++ < len) {
2868 if (utf8_source) {
95b63a38 2869 UV val = 0;
f337b084
TH
2870 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2871 if (val & 1) bits |= 0x80;
2872 } else if (*str++ & 1)
2873 bits |= 0x80;
2874 if (l & 7) bits >>= 1;
fc241834 2875 else {
f337b084
TH
2876 PUSH_BYTE(utf8, cur, bits);
2877 bits = 0;
a6ec74c1
JH
2878 }
2879 }
f337b084
TH
2880 l--;
2881 if (l & 7) {
fc241834 2882 if (datumtype == 'B')
f337b084 2883 bits <<= 7 - (l & 7);
fc241834 2884 else
f337b084
TH
2885 bits >>= 7 - (l & 7);
2886 PUSH_BYTE(utf8, cur, bits);
2887 l += 7;
a6ec74c1 2888 }
f337b084
TH
2889 /* Determine how many chars are left in the requested field */
2890 l /= 8;
2891 if (howlen == e_star) field_len = 0;
2892 else field_len -= l;
2893 Zero(cur, field_len, char);
2894 cur += field_len;
a6ec74c1 2895 break;
f337b084 2896 }
a6ec74c1 2897 case 'H':
f337b084 2898 case 'h': {
10516c54 2899 const char *str, *end;
f337b084
TH
2900 I32 l, field_len;
2901 U8 bits;
2902 bool utf8_source;
2903 U32 utf8_flags;
a6ec74c1 2904
fc241834 2905 fromstr = NEXTFROM;
10516c54 2906 str = SvPV_const(fromstr, fromlen);
f337b084
TH
2907 end = str + fromlen;
2908 if (DO_UTF8(fromstr)) {
2909 utf8_source = TRUE;
041457d9 2910 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
f337b084
TH
2911 } else {
2912 utf8_source = FALSE;
2913 utf8_flags = 0; /* Unused, but keep compilers happy */
2914 }
2915 if (howlen == e_star) len = fromlen;
2916 field_len = (len+1)/2;
2917 GROWING(utf8, cat, start, cur, field_len);
2918 if (!utf8 && len > (I32)fromlen) len = fromlen;
2919 bits = 0;
2920 l = 0;
2921 if (datumtype == 'H')
2922 while (l++ < len) {
2923 if (utf8_source) {
95b63a38 2924 UV val = 0;
f337b084
TH
2925 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2926 if (val < 256 && isALPHA(val))
2927 bits |= (val + 9) & 0xf;
a6ec74c1 2928 else
f337b084
TH
2929 bits |= val & 0xf;
2930 } else if (isALPHA(*str))
2931 bits |= (*str++ + 9) & 0xf;
2932 else
2933 bits |= *str++ & 0xf;
2934 if (l & 1) bits <<= 4;
fc241834 2935 else {
f337b084
TH
2936 PUSH_BYTE(utf8, cur, bits);
2937 bits = 0;
a6ec74c1
JH
2938 }
2939 }
f337b084
TH
2940 else
2941 while (l++ < len) {
2942 if (utf8_source) {
95b63a38 2943 UV val = 0;
f337b084
TH
2944 NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2945 if (val < 256 && isALPHA(val))
2946 bits |= ((val + 9) & 0xf) << 4;
a6ec74c1 2947 else
f337b084
TH
2948 bits |= (val & 0xf) << 4;
2949 } else if (isALPHA(*str))
2950 bits |= ((*str++ + 9) & 0xf) << 4;
2951 else
2952 bits |= (*str++ & 0xf) << 4;
2953 if (l & 1) bits >>= 4;
fc241834 2954 else {
f337b084
TH
2955 PUSH_BYTE(utf8, cur, bits);
2956 bits = 0;
a6ec74c1 2957 }
fc241834 2958 }
f337b084
TH
2959 l--;
2960 if (l & 1) {
2961 PUSH_BYTE(utf8, cur, bits);
2962 l++;
2963 }
2964 /* Determine how many chars are left in the requested field */
2965 l /= 2;
2966 if (howlen == e_star) field_len = 0;
2967 else field_len -= l;
2968 Zero(cur, field_len, char);
2969 cur += field_len;
2970 break;
fc241834
RGS
2971 }
2972 case 'c':
f337b084
TH
2973 while (len-- > 0) {
2974 IV aiv;
2975 fromstr = NEXTFROM;
2976 aiv = SvIV(fromstr);
a2a5de95
NC
2977 if ((-128 > aiv || aiv > 127))
2978 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2979 "Character in 'c' format wrapped in pack");
585ec06d 2980 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
a6ec74c1
JH
2981 }
2982 break;
2983 case 'C':
f337b084
TH
2984 if (len == 0) {
2985 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2986 break;
2987 }
a6ec74c1 2988 while (len-- > 0) {
f337b084 2989 IV aiv;
a6ec74c1 2990 fromstr = NEXTFROM;
f337b084 2991 aiv = SvIV(fromstr);
a2a5de95
NC
2992 if ((0 > aiv || aiv > 0xff))
2993 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2994 "Character in 'C' format wrapped in pack");
1651fc44 2995 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
f337b084 2996 }
fc241834
RGS
2997 break;
2998 case 'W': {
2999 char *end;
670f1322 3000 U8 in_bytes = (U8)IN_BYTES;
fc241834
RGS
3001
3002 end = start+SvLEN(cat)-1;
3003 if (utf8) end -= UTF8_MAXLEN-1;
3004 while (len-- > 0) {
3005 UV auv;
3006 fromstr = NEXTFROM;
3007 auv = SvUV(fromstr);
3008 if (in_bytes) auv = auv % 0x100;
3009 if (utf8) {
3010 W_utf8:
3011 if (cur > end) {
3012 *cur = '\0';
b162af07 3013 SvCUR_set(cat, cur - start);
fc241834
RGS
3014
3015 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3016 end = start+SvLEN(cat)-UTF8_MAXLEN;
3017 }
230e1fce
NC
3018 cur = (char *) uvuni_to_utf8_flags((U8 *) cur,
3019 NATIVE_TO_UNI(auv),
041457d9 3020 warn_utf8 ?
230e1fce 3021 0 : UNICODE_ALLOW_ANY);
fc241834
RGS
3022 } else {
3023 if (auv >= 0x100) {
3024 if (!SvUTF8(cat)) {
3025 *cur = '\0';
b162af07 3026 SvCUR_set(cat, cur - start);
fc241834
RGS
3027 marked_upgrade(aTHX_ cat, symptr);
3028 lookahead.flags |= FLAG_DO_UTF8;
3029 lookahead.strbeg = symptr->strbeg;
3030 utf8 = 1;
3031 start = SvPVX(cat);
3032 cur = start + SvCUR(cat);
3033 end = start+SvLEN(cat)-UTF8_MAXLEN;
3034 goto W_utf8;
3035 }
a2a5de95
NC
3036 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3037 "Character in 'W' format wrapped in pack");
fc241834
RGS
3038 auv &= 0xff;
3039 }
3040 if (cur >= end) {
3041 *cur = '\0';
b162af07 3042 SvCUR_set(cat, cur - start);
fc241834
RGS
3043 GROWING(0, cat, start, cur, len+1);
3044 end = start+SvLEN(cat)-1;
3045 }
fe2774ed 3046 *(U8 *) cur++ = (U8)auv;
a6ec74c1
JH
3047 }
3048 }
3049 break;
fc241834
RGS
3050 }
3051 case 'U': {
3052 char *end;
3053
3054 if (len == 0) {
3055 if (!(symptr->flags & FLAG_DO_UTF8)) {
3056 marked_upgrade(aTHX_ cat, symptr);
3057 lookahead.flags |= FLAG_DO_UTF8;
3058 lookahead.strbeg = symptr->strbeg;
3059 }
3060 utf8 = 0;
3061 goto no_change;
3062 }
3063
3064 end = start+SvLEN(cat);
3065 if (!utf8) end -= UTF8_MAXLEN;
a6ec74c1 3066 while (len-- > 0) {
fc241834 3067 UV auv;
a6ec74c1 3068 fromstr = NEXTFROM;
fc241834
RGS
3069 auv = SvUV(fromstr);
3070 if (utf8) {
230e1fce 3071 U8 buffer[UTF8_MAXLEN], *endb;
fc241834 3072 endb = uvuni_to_utf8_flags(buffer, auv,
041457d9 3073 warn_utf8 ?
fc241834
RGS
3074 0 : UNICODE_ALLOW_ANY);
3075 if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
3076 *cur = '\0';
b162af07 3077 SvCUR_set(cat, cur - start);
fc241834
RGS
3078 GROWING(0, cat, start, cur,
3079 len+(endb-buffer)*UTF8_EXPAND);
3080 end = start+SvLEN(cat);
3081 }
64844641 3082 cur = bytes_to_uni(buffer, endb-buffer, cur);
fc241834
RGS
3083 } else {
3084 if (cur >= end) {
3085 *cur = '\0';
b162af07 3086 SvCUR_set(cat, cur - start);
fc241834
RGS
3087 GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3088 end = start+SvLEN(cat)-UTF8_MAXLEN;
3089 }
230e1fce 3090 cur = (char *) uvuni_to_utf8_flags((U8 *) cur, auv,
041457d9 3091 warn_utf8 ?
230e1fce 3092 0 : UNICODE_ALLOW_ANY);
fc241834 3093 }
a6ec74c1 3094 }
a6ec74c1 3095 break;
fc241834 3096 }
a6ec74c1
JH
3097 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */
3098 case 'f':
a6ec74c1 3099 while (len-- > 0) {
f337b084
TH
3100 float afloat;
3101 NV anv;
a6ec74c1 3102 fromstr = NEXTFROM;
f337b084 3103 anv = SvNV(fromstr);
5cdb9e01 3104#ifdef __VOS__
f337b084 3105 /* VOS does not automatically map a floating-point overflow
fc241834
RGS
3106 during conversion from double to float into infinity, so we
3107 do it by hand. This code should either be generalized for
3108 any OS that needs it, or removed if and when VOS implements
3109 posix-976 (suggestion to support mapping to infinity).
3110 Paul.Green@stratus.com 02-04-02. */
3722f0dc
PG
3111{
3112extern const float _float_constants[];
f337b084 3113 if (anv > FLT_MAX)
fc241834 3114 afloat = _float_constants[0]; /* single prec. inf. */
f337b084 3115 else if (anv < -FLT_MAX)
fc241834 3116 afloat = _float_constants[0]; /* single prec. inf. */
f337b084 3117 else afloat = (float) anv;
3722f0dc 3118}
f337b084 3119#else /* __VOS__ */
85bba25f 3120# if defined(VMS) && !defined(_IEEE_FP)
f337b084 3121 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
3122 * on Alpha; fake it if we don't have them.
3123 */
f337b084 3124 if (anv > FLT_MAX)
fc241834 3125 afloat = FLT_MAX;
f337b084 3126 else if (anv < -FLT_MAX)
fc241834 3127 afloat = -FLT_MAX;
f337b084 3128 else afloat = (float)anv;
baf3cf9c 3129# else
f337b084 3130 afloat = (float)anv;
baf3cf9c 3131# endif
f337b084 3132#endif /* __VOS__ */
1109a392 3133 DO_BO_PACK_N(afloat, float);
f337b084 3134 PUSH_VAR(utf8, cur, afloat);
a6ec74c1
JH
3135 }
3136 break;
3137 case 'd':
a6ec74c1 3138 while (len-- > 0) {
f337b084
TH
3139 double adouble;
3140 NV anv;
a6ec74c1 3141 fromstr = NEXTFROM;
f337b084 3142 anv = SvNV(fromstr);
5cdb9e01 3143#ifdef __VOS__
f337b084 3144 /* VOS does not automatically map a floating-point overflow
fc241834
RGS
3145 during conversion from long double to double into infinity,
3146 so we do it by hand. This code should either be generalized
3147 for any OS that needs it, or removed if and when VOS
3148 implements posix-976 (suggestion to support mapping to
3149 infinity). Paul.Green@stratus.com 02-04-02. */
3722f0dc
PG
3150{
3151extern const double _double_constants[];
f337b084 3152 if (anv > DBL_MAX)
fc241834 3153 adouble = _double_constants[0]; /* double prec. inf. */
f337b084 3154 else if (anv < -DBL_MAX)
fc241834 3155 adouble = _double_constants[0]; /* double prec. inf. */
f337b084 3156 else adouble = (double) anv;
3722f0dc 3157}
f337b084 3158#else /* __VOS__ */
85bba25f 3159# if defined(VMS) && !defined(_IEEE_FP)
f337b084 3160 /* IEEE fp overflow shenanigans are unavailable on VAX and optional
fc241834
RGS
3161 * on Alpha; fake it if we don't have them.
3162 */
f337b084 3163 if (anv > DBL_MAX)
fc241834 3164 adouble = DBL_MAX;
f337b084 3165 else if (anv < -DBL_MAX)
fc241834 3166 adouble = -DBL_MAX;
f337b084 3167 else adouble = (double)anv;
baf3cf9c 3168# else
f337b084 3169 adouble = (double)anv;
baf3cf9c 3170# endif
f337b084 3171#endif /* __VOS__ */
1109a392 3172 DO_BO_PACK_N(adouble, double);
f337b084 3173 PUSH_VAR(utf8, cur, adouble);
a6ec74c1
JH
3174 }
3175 break;
fc241834 3176 case 'F': {
275663fa 3177 NV_bytes anv;
1109a392 3178 Zero(&anv, 1, NV); /* can be long double with unused bits */
92d41999
JH
3179 while (len-- > 0) {
3180 fromstr = NEXTFROM;
cd07c537
DM
3181#ifdef __GNUC__
3182 /* to work round a gcc/x86 bug; don't use SvNV */
3183 anv.nv = sv_2nv(fromstr);
3184#else
275663fa 3185 anv.nv = SvNV(fromstr);
cd07c537 3186#endif
1109a392 3187 DO_BO_PACK_N(anv, NV);
275663fa 3188 PUSH_BYTES(utf8, cur, anv.bytes, sizeof(anv.bytes));
92d41999
JH
3189 }
3190 break;
fc241834 3191 }
92d41999 3192#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
fc241834 3193 case 'D': {
275663fa 3194 ld_bytes aldouble;
1109a392
MHM
3195 /* long doubles can have unused bits, which may be nonzero */
3196 Zero(&aldouble, 1, long double);
92d41999
JH
3197 while (len-- > 0) {
3198 fromstr = NEXTFROM;
cd07c537
DM
3199# ifdef __GNUC__
3200 /* to work round a gcc/x86 bug; don't use SvNV */
3201 aldouble.ld = (long double)sv_2nv(fromstr);
3202# else
275663fa 3203 aldouble.ld = (long double)SvNV(fromstr);
cd07c537 3204# endif
1109a392 3205 DO_BO_PACK_N(aldouble, long double);
275663fa 3206 PUSH_BYTES(utf8, cur, aldouble.bytes, sizeof(aldouble.bytes));
92d41999
JH
3207 }
3208 break;
fc241834 3209 }
92d41999 3210#endif
7212898e 3211#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3212 case 'n' | TYPE_IS_SHRIEKING:
7212898e 3213#endif
a6ec74c1
JH
3214 case 'n':
3215 while (len-- > 0) {
f337b084 3216 I16 ai16;
a6ec74c1 3217 fromstr = NEXTFROM;
ef108786 3218 ai16 = (I16)SvIV(fromstr);
a6ec74c1 3219#ifdef HAS_HTONS
ef108786 3220 ai16 = PerlSock_htons(ai16);
a6ec74c1 3221#endif
f337b084 3222 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3223 }
3224 break;
7212898e 3225#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3226 case 'v' | TYPE_IS_SHRIEKING:
7212898e 3227#endif
a6ec74c1
JH
3228 case 'v':
3229 while (len-- > 0) {
f337b084 3230 I16 ai16;
a6ec74c1 3231 fromstr = NEXTFROM;
ef108786 3232 ai16 = (I16)SvIV(fromstr);
a6ec74c1 3233#ifdef HAS_HTOVS
ef108786 3234 ai16 = htovs(ai16);
a6ec74c1 3235#endif
f337b084 3236 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3237 }
3238 break;
49704364 3239 case 'S' | TYPE_IS_SHRIEKING:
a6ec74c1 3240#if SHORTSIZE != SIZE16
fc241834 3241 while (len-- > 0) {
f337b084 3242 unsigned short aushort;
fc241834
RGS
3243 fromstr = NEXTFROM;
3244 aushort = SvUV(fromstr);
3245 DO_BO_PACK(aushort, s);
f337b084 3246 PUSH_VAR(utf8, cur, aushort);
fc241834 3247 }
49704364
WL
3248 break;
3249#else
3250 /* Fall through! */
a6ec74c1 3251#endif
49704364 3252 case 'S':
fc241834 3253 while (len-- > 0) {
f337b084 3254 U16 au16;
fc241834
RGS
3255 fromstr = NEXTFROM;
3256 au16 = (U16)SvUV(fromstr);
3257 DO_BO_PACK(au16, 16);
f337b084 3258 PUSH16(utf8, cur, &au16);
a6ec74c1
JH
3259 }
3260 break;
49704364 3261 case 's' | TYPE_IS_SHRIEKING:
a6ec74c1 3262#if SHORTSIZE != SIZE16
fc241834 3263 while (len-- > 0) {
f337b084 3264 short ashort;
fc241834
RGS
3265 fromstr = NEXTFROM;
3266 ashort = SvIV(fromstr);
3267 DO_BO_PACK(ashort, s);
f337b084 3268 PUSH_VAR(utf8, cur, ashort);
a6ec74c1 3269 }
49704364
WL
3270 break;
3271#else
3272 /* Fall through! */
a6ec74c1 3273#endif
49704364
WL
3274 case 's':
3275 while (len-- > 0) {
f337b084 3276 I16 ai16;
49704364 3277 fromstr = NEXTFROM;
ef108786
MHM
3278 ai16 = (I16)SvIV(fromstr);
3279 DO_BO_PACK(ai16, 16);
f337b084 3280 PUSH16(utf8, cur, &ai16);
a6ec74c1
JH
3281 }
3282 break;
3283 case 'I':
49704364 3284 case 'I' | TYPE_IS_SHRIEKING:
a6ec74c1 3285 while (len-- > 0) {
f337b084 3286 unsigned int auint;
a6ec74c1
JH
3287 fromstr = NEXTFROM;
3288 auint = SvUV(fromstr);
1109a392 3289 DO_BO_PACK(auint, i);
f337b084 3290 PUSH_VAR(utf8, cur, auint);
a6ec74c1
JH
3291 }
3292 break;
92d41999
JH
3293 case 'j':
3294 while (len-- > 0) {
f337b084 3295 IV aiv;
92d41999
JH
3296 fromstr = NEXTFROM;
3297 aiv = SvIV(fromstr);
1109a392
MHM
3298#if IVSIZE == INTSIZE
3299 DO_BO_PACK(aiv, i);
3300#elif IVSIZE == LONGSIZE
3301 DO_BO_PACK(aiv, l);
3302#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
3303 DO_BO_PACK(aiv, 64);
f337b084
TH
3304#else
3305 Perl_croak(aTHX_ "'j' not supported on this platform");
1109a392 3306#endif
f337b084 3307 PUSH_VAR(utf8, cur, aiv);
92d41999
JH
3308 }
3309 break;
3310 case 'J':
3311 while (len-- > 0) {
f337b084 3312 UV auv;
92d41999
JH
3313 fromstr = NEXTFROM;
3314 auv = SvUV(fromstr);
1109a392
MHM
3315#if UVSIZE == INTSIZE
3316 DO_BO_PACK(auv, i);
3317#elif UVSIZE == LONGSIZE
3318 DO_BO_PACK(auv, l);
3319#elif defined(HAS_QUAD) && UVSIZE == U64SIZE
3320 DO_BO_PACK(auv, 64);
f337b084
TH
3321#else
3322 Perl_croak(aTHX_ "'J' not supported on this platform");
1109a392 3323#endif
f337b084 3324 PUSH_VAR(utf8, cur, auv);
92d41999
JH
3325 }
3326 break;
a6ec74c1
JH
3327 case 'w':
3328 while (len-- > 0) {
f337b084 3329 NV anv;
a6ec74c1 3330 fromstr = NEXTFROM;
15e9f109 3331 anv = SvNV(fromstr);
a6ec74c1 3332
f337b084
TH
3333 if (anv < 0) {
3334 *cur = '\0';
b162af07 3335 SvCUR_set(cat, cur - start);
49704364 3336 Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
f337b084 3337 }
a6ec74c1 3338
196b62db
NC
3339 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
3340 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
3341 any negative IVs will have already been got by the croak()
3342 above. IOK is untrue for fractions, so we test them
3343 against UV_MAX_P1. */
f337b084
TH
3344 if (SvIOK(fromstr) || anv < UV_MAX_P1) {
3345 char buf[(sizeof(UV)*CHAR_BIT)/7+1];
a6ec74c1 3346 char *in = buf + sizeof(buf);
196b62db 3347 UV auv = SvUV(fromstr);
a6ec74c1
JH
3348
3349 do {
eb160463 3350 *--in = (char)((auv & 0x7f) | 0x80);
a6ec74c1
JH
3351 auv >>= 7;
3352 } while (auv);
3353 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
3354 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3355 in, (buf + sizeof(buf)) - in);
3356 } else if (SvPOKp(fromstr))
3357 goto w_string;
a6ec74c1 3358 else if (SvNOKp(fromstr)) {
0258719b 3359 /* 10**NV_MAX_10_EXP is the largest power of 10
486ec47a 3360 so 10**(NV_MAX_10_EXP+1) is definitely unrepresentable
0258719b
NC
3361 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
3362 x = (NV_MAX_10_EXP+1) * log (10) / log (128)
3363 And with that many bytes only Inf can overflow.
8f8d40ab
PG
3364 Some C compilers are strict about integral constant
3365 expressions so we conservatively divide by a slightly
3366 smaller integer instead of multiplying by the exact
3367 floating-point value.
0258719b
NC
3368 */
3369#ifdef NV_MAX_10_EXP
f337b084 3370 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
8f8d40ab 3371 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
0258719b 3372#else
f337b084 3373 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
8f8d40ab 3374 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */
0258719b 3375#endif
a6ec74c1
JH
3376 char *in = buf + sizeof(buf);
3377
8b6e33c7 3378 anv = Perl_floor(anv);
a6ec74c1 3379 do {
8b6e33c7 3380 const NV next = Perl_floor(anv / 128);
a6ec74c1 3381 if (in <= buf) /* this cannot happen ;-) */
49704364 3382 Perl_croak(aTHX_ "Cannot compress integer in pack");
0258719b 3383 *--in = (unsigned char)(anv - (next * 128)) | 0x80;
15e9f109
NC
3384 anv = next;
3385 } while (anv > 0);
a6ec74c1 3386 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
f337b084
TH
3387 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3388 in, (buf + sizeof(buf)) - in);
3389 } else {
8b6e33c7
AL
3390 const char *from;
3391 char *result, *in;
735b914b
JH
3392 SV *norm;
3393 STRLEN len;
3394 bool done;
3395
f337b084 3396 w_string:
735b914b 3397 /* Copy string and check for compliance */
349d4f2f 3398 from = SvPV_const(fromstr, len);
735b914b 3399 if ((norm = is_an_int(from, len)) == NULL)
49704364 3400 Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
735b914b 3401
a02a5408 3402 Newx(result, len, char);
735b914b
JH
3403 in = result + len;
3404 done = FALSE;
f337b084 3405 while (!done) *--in = div128(norm, &done) | 0x80;
735b914b 3406 result[len - 1] &= 0x7F; /* clear continue bit */
f337b084
TH
3407 PUSH_GROWING_BYTES(utf8, cat, start, cur,
3408 in, (result + len) - in);
735b914b
JH
3409 Safefree(result);
3410 SvREFCNT_dec(norm); /* free norm */
fc241834 3411 }
a6ec74c1
JH
3412 }
3413 break;
3414 case 'i':
49704364 3415 case 'i' | TYPE_IS_SHRIEKING:
a6ec74c1 3416 while (len-- > 0) {
f337b084 3417 int aint;
a6ec74c1
JH
3418 fromstr = NEXTFROM;
3419 aint = SvIV(fromstr);
1109a392 3420 DO_BO_PACK(aint, i);
f337b084 3421 PUSH_VAR(utf8, cur, aint);
a6ec74c1
JH
3422 }
3423 break;
7212898e 3424#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3425 case 'N' | TYPE_IS_SHRIEKING:
7212898e 3426#endif
a6ec74c1
JH
3427 case 'N':
3428 while (len-- > 0) {
f337b084 3429 U32 au32;
a6ec74c1 3430 fromstr = NEXTFROM;
ef108786 3431 au32 = SvUV(fromstr);
a6ec74c1 3432#ifdef HAS_HTONL
ef108786 3433 au32 = PerlSock_htonl(au32);
a6ec74c1 3434#endif
f337b084 3435 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3436 }
3437 break;
7212898e 3438#ifdef PERL_PACK_CAN_SHRIEKSIGN
068bd2e7 3439 case 'V' | TYPE_IS_SHRIEKING:
7212898e 3440#endif
a6ec74c1
JH
3441 case 'V':
3442 while (len-- > 0) {
f337b084 3443 U32 au32;
a6ec74c1 3444 fromstr = NEXTFROM;
ef108786 3445 au32 = SvUV(fromstr);
a6ec74c1 3446#ifdef HAS_HTOVL
ef108786 3447 au32 = htovl(au32);
a6ec74c1 3448#endif
f337b084 3449 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3450 }
3451 break;
49704364 3452 case 'L' | TYPE_IS_SHRIEKING:
a6ec74c1 3453#if LONGSIZE != SIZE32
fc241834 3454 while (len-- > 0) {
f337b084 3455 unsigned long aulong;
fc241834
RGS
3456 fromstr = NEXTFROM;
3457 aulong = SvUV(fromstr);
3458 DO_BO_PACK(aulong, l);
f337b084 3459 PUSH_VAR(utf8, cur, aulong);
a6ec74c1 3460 }
49704364
WL
3461 break;
3462#else
3463 /* Fall though! */
a6ec74c1 3464#endif
49704364 3465 case 'L':
fc241834 3466 while (len-- > 0) {
f337b084 3467 U32 au32;
fc241834
RGS
3468 fromstr = NEXTFROM;
3469 au32 = SvUV(fromstr);
3470 DO_BO_PACK(au32, 32);
f337b084 3471 PUSH32(utf8, cur, &au32);
a6ec74c1
JH
3472 }
3473 break;
49704364 3474 case 'l' | TYPE_IS_SHRIEKING:
a6ec74c1 3475#if LONGSIZE != SIZE32
fc241834 3476 while (len-- > 0) {
f337b084 3477 long along;
fc241834
RGS
3478 fromstr = NEXTFROM;
3479 along = SvIV(fromstr);
3480 DO_BO_PACK(along, l);
f337b084 3481 PUSH_VAR(utf8, cur, along);
a6ec74c1 3482 }
49704364
WL
3483 break;
3484#else
3485 /* Fall though! */
a6ec74c1 3486#endif
49704364
WL
3487 case 'l':
3488 while (len-- > 0) {
f337b084 3489 I32 ai32;
49704364 3490 fromstr = NEXTFROM;
ef108786
MHM
3491 ai32 = SvIV(fromstr);
3492 DO_BO_PACK(ai32, 32);
f337b084 3493 PUSH32(utf8, cur, &ai32);
a6ec74c1
JH
3494 }
3495 break;
3496#ifdef HAS_QUAD
3497 case 'Q':
3498 while (len-- > 0) {
f337b084 3499 Uquad_t auquad;
a6ec74c1 3500 fromstr = NEXTFROM;
f337b084 3501 auquad = (Uquad_t) SvUV(fromstr);
1109a392 3502 DO_BO_PACK(auquad, 64);
f337b084 3503 PUSH_VAR(utf8, cur, auquad);
a6ec74c1
JH
3504 }
3505 break;
3506 case 'q':
3507 while (len-- > 0) {
f337b084 3508 Quad_t aquad;
a6ec74c1
JH
3509 fromstr = NEXTFROM;
3510 aquad = (Quad_t)SvIV(fromstr);
1109a392 3511 DO_BO_PACK(aquad, 64);
f337b084 3512 PUSH_VAR(utf8, cur, aquad);
a6ec74c1
JH
3513 }
3514 break;
f337b084 3515#endif /* HAS_QUAD */
a6ec74c1
JH
3516 case 'P':
3517 len = 1; /* assume SV is correct length */
f337b084 3518 GROWING(utf8, cat, start, cur, sizeof(char *));
49704364 3519 /* Fall through! */
a6ec74c1
JH
3520 case 'p':
3521 while (len-- > 0) {
83003860 3522 const char *aptr;
f337b084 3523
a6ec74c1 3524 fromstr = NEXTFROM;
28a4f200
TH
3525 SvGETMAGIC(fromstr);
3526 if (!SvOK(fromstr)) aptr = NULL;
a6ec74c1 3527 else {
a6ec74c1
JH
3528 /* XXX better yet, could spirit away the string to
3529 * a safe spot and hang on to it until the result
3530 * of pack() (and all copies of the result) are
3531 * gone.
3532 */
041457d9 3533 if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
a2a5de95
NC
3534 !SvREADONLY(fromstr)))) {
3535 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3536 "Attempt to pack pointer to temporary value");
a6ec74c1
JH
3537 }
3538 if (SvPOK(fromstr) || SvNIOK(fromstr))
2596d9fe 3539 aptr = SvPV_nomg_const_nolen(fromstr);
a6ec74c1 3540 else
2596d9fe 3541 aptr = SvPV_force_flags_nolen(fromstr, 0);
a6ec74c1 3542 }
07409e01 3543 DO_BO_PACK_PC(aptr);
f337b084 3544 PUSH_VAR(utf8, cur, aptr);
a6ec74c1
JH
3545 }
3546 break;
fc241834 3547 case 'u': {
f7fe979e 3548 const char *aptr, *aend;
fc241834 3549 bool from_utf8;
f337b084 3550
a6ec74c1 3551 fromstr = NEXTFROM;
fc241834
RGS
3552 if (len <= 2) len = 45;
3553 else len = len / 3 * 3;
3554 if (len >= 64) {
a2a5de95
NC
3555 Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3556 "Field too wide in 'u' format in pack");
fc241834
RGS
3557 len = 63;
3558 }
83003860 3559 aptr = SvPV_const(fromstr, fromlen);
fc241834
RGS
3560 from_utf8 = DO_UTF8(fromstr);
3561 if (from_utf8) {
3562 aend = aptr + fromlen;
3f63b0e5 3563 fromlen = sv_len_utf8_nomg(fromstr);
fc241834
RGS
3564 } else aend = NULL; /* Unused, but keep compilers happy */
3565 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
a6ec74c1 3566 while (fromlen > 0) {
fc241834 3567 U8 *end;
a6ec74c1 3568 I32 todo;
fc241834 3569 U8 hunk[1+63/3*4+1];
a6ec74c1 3570
eb160463 3571 if ((I32)fromlen > len)
a6ec74c1
JH
3572 todo = len;
3573 else
3574 todo = fromlen;
fc241834
RGS
3575 if (from_utf8) {
3576 char buffer[64];
3577 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3578 'u' | TYPE_IS_PACK)) {
3579 *cur = '\0';
b162af07 3580 SvCUR_set(cat, cur - start);
5637ef5b
NC
3581 Perl_croak(aTHX_ "panic: string is shorter than advertised, "
3582 "aptr=%p, aend=%p, buffer=%p, todo=%ld",
3583 aptr, aend, buffer, (long) todo);
fc241834
RGS
3584 }
3585 end = doencodes(hunk, buffer, todo);
3586 } else {
3587 end = doencodes(hunk, aptr, todo);
3588 aptr += todo;
3589 }
3590 PUSH_BYTES(utf8, cur, hunk, end-hunk);
3591 fromlen -= todo;
3592 }
a6ec74c1
JH
3593 break;
3594 }
f337b084
TH
3595 }
3596 *cur = '\0';
b162af07 3597 SvCUR_set(cat, cur - start);
f337b084 3598 no_change:
49704364 3599 *symptr = lookahead;
a6ec74c1 3600 }
49704364 3601 return beglist;
18529408
IZ
3602}
3603#undef NEXTFROM
3604
3605
3606PP(pp_pack)
3607{
97aff369 3608 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
eb578fdb 3609 SV *cat = TARG;
18529408 3610 STRLEN fromlen;
349d4f2f 3611 SV *pat_sv = *++MARK;
eb578fdb
KW
3612 const char *pat = SvPV_const(pat_sv, fromlen);
3613 const char *patend = pat + fromlen;
18529408
IZ
3614
3615 MARK++;
76f68e9b 3616 sv_setpvs(cat, "");
f337b084 3617 SvUTF8_off(cat);
18529408 3618
7accc089 3619 packlist(cat, pat, patend, MARK, SP + 1);
18529408 3620
a6ec74c1
JH
3621 SvSETMAGIC(cat);
3622 SP = ORIGMARK;
3623 PUSHs(cat);
3624 RETURN;
3625}
a6ec74c1 3626
73cb7263
NC
3627/*
3628 * Local variables:
3629 * c-indentation-style: bsd
3630 * c-basic-offset: 4
14d04a33 3631 * indent-tabs-mode: nil
73cb7263
NC
3632 * End:
3633 *
14d04a33 3634 * ex: set ts=8 sts=4 sw=4 et:
37442d52 3635 */